From 33c35aa4817864e056fd772230b0c6b552e36ea2 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Mon, 15 May 2017 09:34:06 -0400 Subject: cgroup: Prevent kill_css() from being called more than once The kill_css() function may be called more than once under the condition that the css was killed but not physically removed yet followed by the removal of the cgroup that is hosting the css. This patch prevents any harmm from being done when that happens. Signed-off-by: Waiman Long Signed-off-by: Tejun Heo Cc: stable@vger.kernel.org # v4.5+ --- kernel/cgroup/cgroup.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'kernel') diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index c3c9a0e1b3c9..8d4e85eae42c 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -4265,6 +4265,11 @@ static void kill_css(struct cgroup_subsys_state *css) { lockdep_assert_held(&cgroup_mutex); + if (css->flags & CSS_DYING) + return; + + css->flags |= CSS_DYING; + /* * This must happen before css is disassociated with its cgroup. * See seq_css() for details. -- cgit v1.2.3 From 04dc1b2fff4e96cb4142227fbdc63c8871ad4ed9 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 19 May 2017 17:48:50 +0200 Subject: futex,rt_mutex: Fix rt_mutex_cleanup_proxy_lock() Markus reported that the glibc/nptl/tst-robustpi8 test was failing after commit: cfafcd117da0 ("futex: Rework futex_lock_pi() to use rt_mutex_*_proxy_lock()") The following trace shows the problem: ld-linux-x86-64-2161 [019] .... 410.760971: SyS_futex: 00007ffbeb76b028: 80000875 op=FUTEX_LOCK_PI ld-linux-x86-64-2161 [019] ...1 410.760972: lock_pi_update_atomic: 00007ffbeb76b028: curval=80000875 uval=80000875 newval=80000875 ret=0 ld-linux-x86-64-2165 [011] .... 410.760978: SyS_futex: 00007ffbeb76b028: 80000875 op=FUTEX_UNLOCK_PI ld-linux-x86-64-2165 [011] d..1 410.760979: do_futex: 00007ffbeb76b028: curval=80000875 uval=80000875 newval=80000871 ret=0 ld-linux-x86-64-2165 [011] .... 410.760980: SyS_futex: 00007ffbeb76b028: 80000871 ret=0000 ld-linux-x86-64-2161 [019] .... 410.760980: SyS_futex: 00007ffbeb76b028: 80000871 ret=ETIMEDOUT Task 2165 does an UNLOCK_PI, assigning the lock to the waiter task 2161 which then returns with -ETIMEDOUT. That wrecks the lock state, because now the owner isn't aware it acquired the lock and removes the pending robust list entry. If 2161 is killed, the robust list will not clear out this futex and the subsequent acquire on this futex will then (correctly) result in -ESRCH which is unexpected by glibc, triggers an internal assertion and dies. Task 2161 Task 2165 rt_mutex_wait_proxy_lock() timeout(); /* T2161 is still queued in the waiter list */ return -ETIMEDOUT; futex_unlock_pi() spin_lock(hb->lock); rtmutex_unlock() remove_rtmutex_waiter(T2161); mark_lock_available(); /* Make the next waiter owner of the user space side */ futex_uval = 2161; spin_unlock(hb->lock); spin_lock(hb->lock); rt_mutex_cleanup_proxy_lock() if (rtmutex_owner() !== current) ... return FAIL; .... return -ETIMEOUT; This means that rt_mutex_cleanup_proxy_lock() needs to call try_to_take_rt_mutex() so it can take over the rtmutex correctly which was assigned by the waker. If the rtmutex is owned by some other task then this call is harmless and just confirmes that the waiter is not able to acquire it. While there, fix what looks like a merge error which resulted in rt_mutex_cleanup_proxy_lock() having two calls to fixup_rt_mutex_waiters() and rt_mutex_wait_proxy_lock() not having any. Both should have one, since both potentially touch the waiter list. Fixes: 38d589f2fd08 ("futex,rt_mutex: Restructure rt_mutex_finish_proxy_lock()") Reported-by: Markus Trippelsdorf Bug-Spotted-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Cc: Florian Weimer Cc: Darren Hart Cc: Sebastian Andrzej Siewior Cc: Markus Trippelsdorf Link: http://lkml.kernel.org/r/20170519154850.mlomgdsd26drq5j6@hirez.programming.kicks-ass.net Signed-off-by: Thomas Gleixner --- kernel/locking/rtmutex.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index b95509416909..28cd09e635ed 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -1785,12 +1785,14 @@ int rt_mutex_wait_proxy_lock(struct rt_mutex *lock, int ret; raw_spin_lock_irq(&lock->wait_lock); - - set_current_state(TASK_INTERRUPTIBLE); - /* sleep on the mutex */ + set_current_state(TASK_INTERRUPTIBLE); ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter); - + /* + * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might + * have to fix that up. + */ + fixup_rt_mutex_waiters(lock); raw_spin_unlock_irq(&lock->wait_lock); return ret; @@ -1821,16 +1823,26 @@ bool rt_mutex_cleanup_proxy_lock(struct rt_mutex *lock, bool cleanup = false; raw_spin_lock_irq(&lock->wait_lock); + /* + * Do an unconditional try-lock, this deals with the lock stealing + * state where __rt_mutex_futex_unlock() -> mark_wakeup_next_waiter() + * sets a NULL owner. + * + * We're not interested in the return value, because the subsequent + * test on rt_mutex_owner() will infer that. If the trylock succeeded, + * we will own the lock and it will have removed the waiter. If we + * failed the trylock, we're still not owner and we need to remove + * ourselves. + */ + try_to_take_rt_mutex(lock, current, waiter); /* * Unless we're the owner; we're still enqueued on the wait_list. * So check if we became owner, if not, take us off the wait_list. */ if (rt_mutex_owner(lock) != current) { remove_waiter(lock, waiter); - fixup_rt_mutex_waiters(lock); cleanup = true; } - /* * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might * have to fix that up. -- cgit v1.2.3 From 4d6501dce079c1eb6bf0b1d8f528a5e81770109e Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Tue, 9 May 2017 09:39:59 +0200 Subject: kthread: Fix use-after-free if kthread fork fails If a kthread forks (e.g. usermodehelper since commit 1da5c46fa965) but fails in copy_process() between calling dup_task_struct() and setting p->set_child_tid, then the value of p->set_child_tid will be inherited from the parent and get prematurely freed by free_kthread_struct(). kthread() - worker_thread() - process_one_work() | - call_usermodehelper_exec_work() | - kernel_thread() | - _do_fork() | - copy_process() | - dup_task_struct() | - arch_dup_task_struct() | - tsk->set_child_tid = current->set_child_tid // implied | - ... | - goto bad_fork_* | - ... | - free_task(tsk) | - free_kthread_struct(tsk) | - kfree(tsk->set_child_tid) - ... - schedule() - __schedule() - wq_worker_sleeping() - kthread_data(task)->flags // UAF The problem started showing up with commit 1da5c46fa965 since it reused ->set_child_tid for the kthread worker data. A better long-term solution might be to get rid of the ->set_child_tid abuse. The comment in set_kthread_struct() also looks slightly wrong. Debugged-by: Jamie Iles Fixes: 1da5c46fa965 ("kthread: Make struct kthread kmalloc'ed") Signed-off-by: Vegard Nossum Acked-by: Oleg Nesterov Cc: Peter Zijlstra Cc: Greg Kroah-Hartman Cc: Andy Lutomirski Cc: Frederic Weisbecker Cc: Jamie Iles Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20170509073959.17858-1-vegard.nossum@oracle.com Signed-off-by: Thomas Gleixner --- kernel/fork.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/fork.c b/kernel/fork.c index d681f8f10d2d..b7cdea10239c 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1553,6 +1553,18 @@ static __latent_entropy struct task_struct *copy_process( if (!p) goto fork_out; + /* + * This _must_ happen before we call free_task(), i.e. before we jump + * to any of the bad_fork_* labels. This is to avoid freeing + * p->set_child_tid which is (ab)used as a kthread's data pointer for + * kernel threads (PF_KTHREAD). + */ + p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; + /* + * Clear TID on mm_release()? + */ + p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr : NULL; + ftrace_graph_init_task(p); rt_mutex_init_task(p); @@ -1716,11 +1728,6 @@ static __latent_entropy struct task_struct *copy_process( } } - p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; - /* - * Clear TID on mm_release()? - */ - p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr : NULL; #ifdef CONFIG_BLOCK p->plug = NULL; #endif -- cgit v1.2.3 From c70d9d809fdeecedb96972457ee45c49a232d97f Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 22 May 2017 15:40:12 -0500 Subject: ptrace: Properly initialize ptracer_cred on fork When I introduced ptracer_cred I failed to consider the weirdness of fork where the task_struct copies the old value by default. This winds up leaving ptracer_cred set even when a process forks and the child process does not wind up being ptraced. Because ptracer_cred is not set on non-ptraced processes whose parents were ptraced this has broken the ability of the enlightenment window manager to start setuid children. Fix this by properly initializing ptracer_cred in ptrace_init_task This must be done with a little bit of care to preserve the current value of ptracer_cred when ptrace carries through fork. Re-reading the ptracer_cred from the ptracing process at this point is inconsistent with how PT_PTRACE_CAP has been maintained all of these years. Tested-by: Takashi Iwai Fixes: 64b875f7ac8a ("ptrace: Capture the ptracer's creds not PT_PTRACE_CAP") Signed-off-by: "Eric W. Biederman" --- include/linux/ptrace.h | 7 +++++-- kernel/ptrace.c | 20 +++++++++++++------- 2 files changed, 18 insertions(+), 9 deletions(-) (limited to 'kernel') diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 422bc2e4cb6a..ef3eb8bbfee4 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -54,7 +54,8 @@ extern int ptrace_request(struct task_struct *child, long request, unsigned long addr, unsigned long data); extern void ptrace_notify(int exit_code); extern void __ptrace_link(struct task_struct *child, - struct task_struct *new_parent); + struct task_struct *new_parent, + const struct cred *ptracer_cred); extern void __ptrace_unlink(struct task_struct *child); extern void exit_ptrace(struct task_struct *tracer, struct list_head *dead); #define PTRACE_MODE_READ 0x01 @@ -206,7 +207,7 @@ static inline void ptrace_init_task(struct task_struct *child, bool ptrace) if (unlikely(ptrace) && current->ptrace) { child->ptrace = current->ptrace; - __ptrace_link(child, current->parent); + __ptrace_link(child, current->parent, current->ptracer_cred); if (child->ptrace & PT_SEIZED) task_set_jobctl_pending(child, JOBCTL_TRAP_STOP); @@ -215,6 +216,8 @@ static inline void ptrace_init_task(struct task_struct *child, bool ptrace) set_tsk_thread_flag(child, TIF_SIGPENDING); } + else + child->ptracer_cred = NULL; } /** diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 266ddcc1d8bb..60f356d91060 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -60,19 +60,25 @@ int ptrace_access_vm(struct task_struct *tsk, unsigned long addr, } +void __ptrace_link(struct task_struct *child, struct task_struct *new_parent, + const struct cred *ptracer_cred) +{ + BUG_ON(!list_empty(&child->ptrace_entry)); + list_add(&child->ptrace_entry, &new_parent->ptraced); + child->parent = new_parent; + child->ptracer_cred = get_cred(ptracer_cred); +} + /* * ptrace a task: make the debugger its new parent and * move it to the ptrace list. * * Must be called with the tasklist lock write-held. */ -void __ptrace_link(struct task_struct *child, struct task_struct *new_parent) +static void ptrace_link(struct task_struct *child, struct task_struct *new_parent) { - BUG_ON(!list_empty(&child->ptrace_entry)); - list_add(&child->ptrace_entry, &new_parent->ptraced); - child->parent = new_parent; rcu_read_lock(); - child->ptracer_cred = get_cred(__task_cred(new_parent)); + __ptrace_link(child, new_parent, __task_cred(new_parent)); rcu_read_unlock(); } @@ -386,7 +392,7 @@ static int ptrace_attach(struct task_struct *task, long request, flags |= PT_SEIZED; task->ptrace = flags; - __ptrace_link(task, current); + ptrace_link(task, current); /* SEIZE doesn't trap tracee on attach */ if (!seize) @@ -459,7 +465,7 @@ static int ptrace_traceme(void) */ if (!ret && !(current->real_parent->flags & PF_EXITING)) { current->ptrace = PT_PTRACED; - __ptrace_link(current, current->real_parent); + ptrace_link(current, current->real_parent); } } write_unlock_irq(&tasklist_lock); -- cgit v1.2.3 From 43fe8b8eb81eee713400340716cf945f59d21496 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 23 May 2017 23:27:38 +0200 Subject: posix-timers: Make signal printks conditional A recent commit added extra printks for CPU/RT limits. This can result in excessive spam in dmesg. Make the printks conditional on print_fatal_signals. Fixes: e7ea7c9806a2 ("rlimits: Print more information when CPU/RT limits are exceeded") Reported-by: Dave Jones Signed-off-by: Thomas Gleixner Cc: Arun Raghavan --- kernel/time/posix-cpu-timers.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) (limited to 'kernel') diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index 1370f067fb51..d2a1e6dd0291 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -825,8 +825,10 @@ static void check_thread_timers(struct task_struct *tsk, * At the hard limit, we just die. * No need to calculate anything else now. */ - pr_info("CPU Watchdog Timeout (hard): %s[%d]\n", - tsk->comm, task_pid_nr(tsk)); + if (print_fatal_signals) { + pr_info("CPU Watchdog Timeout (hard): %s[%d]\n", + tsk->comm, task_pid_nr(tsk)); + } __group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk); return; } @@ -838,8 +840,10 @@ static void check_thread_timers(struct task_struct *tsk, soft += USEC_PER_SEC; sig->rlim[RLIMIT_RTTIME].rlim_cur = soft; } - pr_info("RT Watchdog Timeout (soft): %s[%d]\n", - tsk->comm, task_pid_nr(tsk)); + if (print_fatal_signals) { + pr_info("RT Watchdog Timeout (soft): %s[%d]\n", + tsk->comm, task_pid_nr(tsk)); + } __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk); } } @@ -936,8 +940,10 @@ static void check_process_timers(struct task_struct *tsk, * At the hard limit, we just die. * No need to calculate anything else now. */ - pr_info("RT Watchdog Timeout (hard): %s[%d]\n", - tsk->comm, task_pid_nr(tsk)); + if (print_fatal_signals) { + pr_info("RT Watchdog Timeout (hard): %s[%d]\n", + tsk->comm, task_pid_nr(tsk)); + } __group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk); return; } @@ -945,8 +951,10 @@ static void check_process_timers(struct task_struct *tsk, /* * At the soft limit, send a SIGXCPU every second. */ - pr_info("CPU Watchdog Timeout (soft): %s[%d]\n", - tsk->comm, task_pid_nr(tsk)); + if (print_fatal_signals) { + pr_info("CPU Watchdog Timeout (soft): %s[%d]\n", + tsk->comm, task_pid_nr(tsk)); + } __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk); if (soft < hard) { soft++; -- cgit v1.2.3 From 41c25707d21716826e3c1f60967f5550610ec1c9 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 24 May 2017 12:03:48 -0400 Subject: cpuset: consider dying css as offline In most cases, a cgroup controller don't care about the liftimes of cgroups. For the controller, a css becomes online when ->css_online() is called on it and offline when ->css_offline() is called. However, cpuset is special in that the user interface it exposes cares whether certain cgroups exist or not. Combined with the RCU delay between cgroup removal and css offlining, this can lead to user visible behavior oddities where operations which should succeed after cgroup removals fail for some time period. The effects of cgroup removals are delayed when seen from userland. This patch adds css_is_dying() which tests whether offline is pending and updates is_cpuset_online() so that the function returns false also while offline is pending. This gets rid of the userland visible delays. Signed-off-by: Tejun Heo Reported-by: Daniel Jordan Link: http://lkml.kernel.org/r/327ca1f5-7957-fbb9-9e5f-9ba149d40ba2@oracle.com Cc: stable@vger.kernel.org Signed-off-by: Tejun Heo --- include/linux/cgroup.h | 20 ++++++++++++++++++++ kernel/cgroup/cpuset.c | 4 ++-- 2 files changed, 22 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index ed2573e149fa..710a005c6b7a 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -343,6 +343,26 @@ static inline bool css_tryget_online(struct cgroup_subsys_state *css) return true; } +/** + * css_is_dying - test whether the specified css is dying + * @css: target css + * + * Test whether @css is in the process of offlining or already offline. In + * most cases, ->css_online() and ->css_offline() callbacks should be + * enough; however, the actual offline operations are RCU delayed and this + * test returns %true also when @css is scheduled to be offlined. + * + * This is useful, for example, when the use case requires synchronous + * behavior with respect to cgroup removal. cgroup removal schedules css + * offlining but the css can seem alive while the operation is being + * delayed. If the delay affects user visible semantics, this test can be + * used to resolve the situation. + */ +static inline bool css_is_dying(struct cgroup_subsys_state *css) +{ + return !(css->flags & CSS_NO_REF) && percpu_ref_is_dying(&css->refcnt); +} + /** * css_put - put a css reference * @css: target css diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index f6501f4f6040..ae643412948a 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -176,9 +176,9 @@ typedef enum { } cpuset_flagbits_t; /* convenient tests for these bits */ -static inline bool is_cpuset_online(const struct cpuset *cs) +static inline bool is_cpuset_online(struct cpuset *cs) { - return test_bit(CS_ONLINE, &cs->flags); + return test_bit(CS_ONLINE, &cs->flags) && !css_is_dying(&cs->css); } static inline int is_cpu_exclusive(const struct cpuset *cs) -- cgit v1.2.3 From 1ad2f5838d345e1c102bd1cd27c4f4c1349b0dc8 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 25 May 2017 01:05:05 +0200 Subject: bpf: fix incorrect pruning decision when alignment must be tracked Currently, when we enforce alignment tracking on direct packet access, the verifier lets the following program pass despite doing a packet write with unaligned access: 0: (61) r2 = *(u32 *)(r1 +76) 1: (61) r3 = *(u32 *)(r1 +80) 2: (61) r7 = *(u32 *)(r1 +8) 3: (bf) r0 = r2 4: (07) r0 += 14 5: (25) if r7 > 0x1 goto pc+4 R0=pkt(id=0,off=14,r=0) R1=ctx R2=pkt(id=0,off=0,r=0) R3=pkt_end R7=inv,min_value=0,max_value=1 R10=fp 6: (2d) if r0 > r3 goto pc+1 R0=pkt(id=0,off=14,r=14) R1=ctx R2=pkt(id=0,off=0,r=14) R3=pkt_end R7=inv,min_value=0,max_value=1 R10=fp 7: (63) *(u32 *)(r0 -4) = r0 8: (b7) r0 = 0 9: (95) exit from 6 to 8: R0=pkt(id=0,off=14,r=0) R1=ctx R2=pkt(id=0,off=0,r=0) R3=pkt_end R7=inv,min_value=0,max_value=1 R10=fp 8: (b7) r0 = 0 9: (95) exit from 5 to 10: R0=pkt(id=0,off=14,r=0) R1=ctx R2=pkt(id=0,off=0,r=0) R3=pkt_end R7=inv,min_value=2 R10=fp 10: (07) r0 += 1 11: (05) goto pc-6 6: safe <----- here, wrongly found safe processed 15 insns However, if we enforce a pruning mismatch by adding state into r8 which is then being mismatched in states_equal(), we find that for the otherwise same program, the verifier detects a misaligned packet access when actually walking that path: 0: (61) r2 = *(u32 *)(r1 +76) 1: (61) r3 = *(u32 *)(r1 +80) 2: (61) r7 = *(u32 *)(r1 +8) 3: (b7) r8 = 1 4: (bf) r0 = r2 5: (07) r0 += 14 6: (25) if r7 > 0x1 goto pc+4 R0=pkt(id=0,off=14,r=0) R1=ctx R2=pkt(id=0,off=0,r=0) R3=pkt_end R7=inv,min_value=0,max_value=1 R8=imm1,min_value=1,max_value=1,min_align=1 R10=fp 7: (2d) if r0 > r3 goto pc+1 R0=pkt(id=0,off=14,r=14) R1=ctx R2=pkt(id=0,off=0,r=14) R3=pkt_end R7=inv,min_value=0,max_value=1 R8=imm1,min_value=1,max_value=1,min_align=1 R10=fp 8: (63) *(u32 *)(r0 -4) = r0 9: (b7) r0 = 0 10: (95) exit from 7 to 9: R0=pkt(id=0,off=14,r=0) R1=ctx R2=pkt(id=0,off=0,r=0) R3=pkt_end R7=inv,min_value=0,max_value=1 R8=imm1,min_value=1,max_value=1,min_align=1 R10=fp 9: (b7) r0 = 0 10: (95) exit from 6 to 11: R0=pkt(id=0,off=14,r=0) R1=ctx R2=pkt(id=0,off=0,r=0) R3=pkt_end R7=inv,min_value=2 R8=imm1,min_value=1,max_value=1,min_align=1 R10=fp 11: (07) r0 += 1 12: (b7) r8 = 0 13: (05) goto pc-7 <----- mismatch due to r8 7: (2d) if r0 > r3 goto pc+1 R0=pkt(id=0,off=15,r=15) R1=ctx R2=pkt(id=0,off=0,r=15) R3=pkt_end R7=inv,min_value=2 R8=imm0,min_value=0,max_value=0,min_align=2147483648 R10=fp 8: (63) *(u32 *)(r0 -4) = r0 misaligned packet access off 2+15+-4 size 4 The reason why we fail to see it in states_equal() is that the third test in compare_ptrs_to_packet() ... if (old->off <= cur->off && old->off >= old->range && cur->off >= cur->range) return true; ... will let the above pass. The situation we run into is that old->off <= cur->off (14 <= 15), meaning that prior walked paths went with smaller offset, which was later used in the packet access after successful packet range check and found to be safe already. For example: Given is R0=pkt(id=0,off=0,r=0). Adding offset 14 as in above program to it, results in R0=pkt(id=0,off=14,r=0) before the packet range test. Now, testing this against R3=pkt_end with 'if r0 > r3 goto out' will transform R0 into R0=pkt(id=0,off=14,r=14) for the case when we're within bounds. A write into the packet at offset *(u32 *)(r0 -4), that is, 2 + 14 -4, is valid and aligned (2 is for NET_IP_ALIGN). After processing this with all fall-through paths, we later on check paths from branches. When the above skb->mark test is true, then we jump near the end of the program, perform r0 += 1, and jump back to the 'if r0 > r3 goto out' test we've visited earlier already. This time, R0 is of type R0=pkt(id=0,off=15,r=0), and we'll prune that part because this time we'll have a larger safe packet range, and we already found that with off=14 all further insn were already safe, so it's safe as well with a larger off. However, the problem is that the subsequent write into the packet with 2 + 15 -4 is then unaligned, and not caught by the alignment tracking. Note that min_align, aux_off, and aux_off_align were all 0 in this example. Since we cannot tell at this time what kind of packet access was performed in the prior walk and what minimal requirements it has (we might do so in the future, but that requires more complexity), fix it to disable this pruning case for strict alignment for now, and let the verifier do check such paths instead. With that applied, the test cases pass and reject the program due to misalignment. Fixes: d1174416747d ("bpf: Track alignment of register values in the verifier.") Reference: http://patchwork.ozlabs.org/patch/761909/ Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) (limited to 'kernel') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index c72cd41f5b8b..e37e06b1229d 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -843,9 +843,6 @@ static int check_ptr_alignment(struct bpf_verifier_env *env, { bool strict = env->strict_alignment; - if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) - strict = true; - switch (reg->type) { case PTR_TO_PACKET: return check_pkt_ptr_alignment(reg, off, size, strict); @@ -2696,7 +2693,8 @@ err_free: /* the following conditions reduce the number of explored insns * from ~140k to ~80k for ultra large programs that use a lot of ptr_to_packet */ -static bool compare_ptrs_to_packet(struct bpf_reg_state *old, +static bool compare_ptrs_to_packet(struct bpf_verifier_env *env, + struct bpf_reg_state *old, struct bpf_reg_state *cur) { if (old->id != cur->id) @@ -2739,7 +2737,7 @@ static bool compare_ptrs_to_packet(struct bpf_reg_state *old, * 'if (R4 > data_end)' and all further insn were already good with r=20, * so they will be good with r=30 and we can prune the search. */ - if (old->off <= cur->off && + if (!env->strict_alignment && old->off <= cur->off && old->off >= old->range && cur->off >= cur->range) return true; @@ -2810,7 +2808,7 @@ static bool states_equal(struct bpf_verifier_env *env, continue; if (rold->type == PTR_TO_PACKET && rcur->type == PTR_TO_PACKET && - compare_ptrs_to_packet(rold, rcur)) + compare_ptrs_to_packet(env, rold, rcur)) continue; return false; @@ -3588,10 +3586,10 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) } else { log_level = 0; } - if (attr->prog_flags & BPF_F_STRICT_ALIGNMENT) + + env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT); + if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) env->strict_alignment = true; - else - env->strict_alignment = false; ret = replace_map_fd_with_map_ptr(env); if (ret < 0) @@ -3697,7 +3695,10 @@ int bpf_analyzer(struct bpf_prog *prog, const struct bpf_ext_analyzer_ops *ops, mutex_lock(&bpf_verifier_lock); log_level = 0; + env->strict_alignment = false; + if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) + env->strict_alignment = true; env->explored_states = kcalloc(env->prog->len, sizeof(struct bpf_verifier_state_list *), -- cgit v1.2.3 From a9789ef9afcb4fb0193f8dd94f2665ba3ad71e79 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 25 May 2017 01:05:06 +0200 Subject: bpf: properly reset caller saved regs after helper call and ld_abs/ind Currently, after performing helper calls, we clear all caller saved registers, that is r0 - r5 and fill r0 depending on struct bpf_func_proto specification. The way we reset these regs can affect pruning decisions in later paths, since we only reset register's imm to 0 and type to NOT_INIT. However, we leave out clearing of other variables such as id, min_value, max_value, etc, which can later on lead to pruning mismatches due to stale data. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 37 ++++++++++++++++--------------------- 1 file changed, 16 insertions(+), 21 deletions(-) (limited to 'kernel') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index e37e06b1229d..339c8a1371de 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -463,19 +463,22 @@ static const int caller_saved[CALLER_SAVED_REGS] = { BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5 }; +static void mark_reg_not_init(struct bpf_reg_state *regs, u32 regno) +{ + BUG_ON(regno >= MAX_BPF_REG); + + memset(®s[regno], 0, sizeof(regs[regno])); + regs[regno].type = NOT_INIT; + regs[regno].min_value = BPF_REGISTER_MIN_RANGE; + regs[regno].max_value = BPF_REGISTER_MAX_RANGE; +} + static void init_reg_state(struct bpf_reg_state *regs) { int i; - for (i = 0; i < MAX_BPF_REG; i++) { - regs[i].type = NOT_INIT; - regs[i].imm = 0; - regs[i].min_value = BPF_REGISTER_MIN_RANGE; - regs[i].max_value = BPF_REGISTER_MAX_RANGE; - regs[i].min_align = 0; - regs[i].aux_off = 0; - regs[i].aux_off_align = 0; - } + for (i = 0; i < MAX_BPF_REG; i++) + mark_reg_not_init(regs, i); /* frame pointer */ regs[BPF_REG_FP].type = FRAME_PTR; @@ -1346,7 +1349,6 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx) struct bpf_verifier_state *state = &env->cur_state; const struct bpf_func_proto *fn = NULL; struct bpf_reg_state *regs = state->regs; - struct bpf_reg_state *reg; struct bpf_call_arg_meta meta; bool changes_data; int i, err; @@ -1413,11 +1415,8 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx) } /* reset caller saved regs */ - for (i = 0; i < CALLER_SAVED_REGS; i++) { - reg = regs + caller_saved[i]; - reg->type = NOT_INIT; - reg->imm = 0; - } + for (i = 0; i < CALLER_SAVED_REGS; i++) + mark_reg_not_init(regs, caller_saved[i]); /* update return register */ if (fn->ret_type == RET_INTEGER) { @@ -2445,7 +2444,6 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn) { struct bpf_reg_state *regs = env->cur_state.regs; u8 mode = BPF_MODE(insn->code); - struct bpf_reg_state *reg; int i, err; if (!may_access_skb(env->prog->type)) { @@ -2478,11 +2476,8 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn) } /* reset caller saved regs to unreadable */ - for (i = 0; i < CALLER_SAVED_REGS; i++) { - reg = regs + caller_saved[i]; - reg->type = NOT_INIT; - reg->imm = 0; - } + for (i = 0; i < CALLER_SAVED_REGS; i++) + mark_reg_not_init(regs, caller_saved[i]); /* mark destination R0 register as readable, since it contains * the value fetched from the packet -- cgit v1.2.3 From a316338cb71a3260201490e615f2f6d5c0d8fb2c Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 25 May 2017 01:05:08 +0200 Subject: bpf: fix wrong exposure of map_flags into fdinfo for lpm trie_alloc() always needs to have BPF_F_NO_PREALLOC passed in via attr->map_flags, since it does not support preallocation yet. We check the flag, but we never copy the flag into trie->map.map_flags, which is later on exposed into fdinfo and used by loaders such as iproute2. Latter uses this in bpf_map_selfcheck_pinned() to test whether a pinned map has the same spec as the one from the BPF obj file and if not, bails out, which is currently the case for lpm since it exposes always 0 as flags. Also copy over flags in array_map_alloc() and stack_map_alloc(). They always have to be 0 right now, but we should make sure to not miss to copy them over at a later point in time when we add actual flags for them to use. Fixes: b95a5c4db09b ("bpf: add a longest prefix match trie map implementation") Reported-by: Jarno Rajahalme Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/arraymap.c | 1 + kernel/bpf/lpm_trie.c | 1 + kernel/bpf/stackmap.c | 1 + 3 files changed, 3 insertions(+) (limited to 'kernel') diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 5e00b2333c26..172dc8ee0e3b 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -86,6 +86,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) array->map.key_size = attr->key_size; array->map.value_size = attr->value_size; array->map.max_entries = attr->max_entries; + array->map.map_flags = attr->map_flags; array->elem_size = elem_size; if (!percpu) diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c index 39cfafd895b8..b09185f0f17d 100644 --- a/kernel/bpf/lpm_trie.c +++ b/kernel/bpf/lpm_trie.c @@ -432,6 +432,7 @@ static struct bpf_map *trie_alloc(union bpf_attr *attr) trie->map.key_size = attr->key_size; trie->map.value_size = attr->value_size; trie->map.max_entries = attr->max_entries; + trie->map.map_flags = attr->map_flags; trie->data_size = attr->key_size - offsetof(struct bpf_lpm_trie_key, data); trie->max_prefixlen = trie->data_size * 8; diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index 4dfd6f2ec2f9..31147d730abf 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -88,6 +88,7 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr) smap->map.key_size = attr->key_size; smap->map.value_size = value_size; smap->map.max_entries = attr->max_entries; + smap->map.map_flags = attr->map_flags; smap->n_buckets = n_buckets; smap->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; -- cgit v1.2.3 From 5720acf4bfc142ba568d5b6782fceaf62ed15e0b Mon Sep 17 00:00:00 2001 From: Miroslav Benes Date: Fri, 26 May 2017 14:45:21 +0200 Subject: livepatch: Make livepatch dependent on !TRIM_UNUSED_KSYMS If TRIM_UNUSED_KSYMS is enabled, all unneeded exported symbols are made unexported. Two-pass build of the kernel is done to find out which symbols are needed based on a configuration. This effectively complicates things for out-of-tree modules. Livepatch exports functions to (un)register and enable/disable a live patch. The only in-tree module which uses these functions is a sample in samples/livepatch/. If the sample is disabled, the functions are trimmed and out-of-tree live patches cannot be built. Note that live patches are intended to be built out-of-tree. Suggested-by: Michal Marek Acked-by: Josh Poimboeuf Acked-by: Jessica Yu Signed-off-by: Miroslav Benes Signed-off-by: Jiri Kosina --- kernel/livepatch/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel') diff --git a/kernel/livepatch/Kconfig b/kernel/livepatch/Kconfig index 045022557936..ec4565122e65 100644 --- a/kernel/livepatch/Kconfig +++ b/kernel/livepatch/Kconfig @@ -10,6 +10,7 @@ config LIVEPATCH depends on SYSFS depends on KALLSYMS_ALL depends on HAVE_LIVEPATCH + depends on !TRIM_UNUSED_KSYMS help Say Y here if you want to support kernel live patching. This option has no runtime impact until a kernel "patch" -- cgit v1.2.3 From f9797c2f20c0160edd718aa467101f3301e57e59 Mon Sep 17 00:00:00 2001 From: Luis Henriques Date: Thu, 25 May 2017 16:20:38 +0100 Subject: ftrace: Fix memory leak in ftrace_graph_release() ftrace_hash is being kfree'ed in ftrace_graph_release(), however the ->buckets field is not. This results in a memory leak that is easily captured by kmemleak: unreferenced object 0xffff880038afe000 (size 8192): comm "trace-cmd", pid 238, jiffies 4294916898 (age 9.736s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [] kmemleak_alloc+0x4e/0xb0 [] __kmalloc+0x12d/0x1a0 [] alloc_ftrace_hash+0x51/0x80 [] __ftrace_graph_open.isra.39.constprop.46+0xa3/0x100 [] ftrace_graph_open+0x68/0xa0 [] do_dentry_open.isra.1+0x1bd/0x2d0 [] vfs_open+0x47/0x60 [] path_openat+0x2a5/0x1020 [] do_filp_open+0x8a/0xf0 [] do_sys_open+0x12f/0x200 [] SyS_open+0x1e/0x20 [] entry_SYSCALL_64_fastpath+0x13/0x94 [] 0xffffffffffffffff Link: http://lkml.kernel.org/r/20170525152038.7661-1-lhenriques@suse.com Cc: stable@vger.kernel.org Fixes: b9b0c831bed2 ("ftrace: Convert graph filter to use hash tables") Signed-off-by: Luis Henriques Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 74fdfe9ed3db..9e5841dc14b5 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -5063,7 +5063,7 @@ ftrace_graph_release(struct inode *inode, struct file *file) } out: - kfree(fgd->new_hash); + free_ftrace_hash(fgd->new_hash); kfree(fgd); return ret; -- cgit v1.2.3 From c93f5cf571e7795f97d49ef51b766cf25e328545 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 25 May 2017 19:38:17 +0900 Subject: kprobes/x86: Fix to set RWX bits correctly before releasing trampoline Fix kprobes to set(recover) RWX bits correctly on trampoline buffer before releasing it. Releasing readonly page to module_memfree() crash the kernel. Without this fix, if kprobes user register a bunch of kprobes in function body (since kprobes on function entry usually use ftrace) and unregister it, kernel hits a BUG and crash. Link: http://lkml.kernel.org/r/149570868652.3518.14120169373590420503.stgit@devbox Signed-off-by: Masami Hiramatsu Fixes: d0381c81c2f7 ("kprobes/x86: Set kprobes pages read-only") Signed-off-by: Steven Rostedt (VMware) --- arch/x86/kernel/kprobes/core.c | 9 +++++++++ kernel/kprobes.c | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 5b2bbfbb3712..6b877807598b 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -52,6 +52,7 @@ #include #include #include +#include #include #include @@ -417,6 +418,14 @@ static void prepare_boost(struct kprobe *p, struct insn *insn) } } +/* Recover page to RW mode before releasing it */ +void free_insn_page(void *page) +{ + set_memory_nx((unsigned long)page & PAGE_MASK, 1); + set_memory_rw((unsigned long)page & PAGE_MASK, 1); + module_memfree(page); +} + static int arch_copy_kprobe(struct kprobe *p) { struct insn insn; diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 2d2d3a568e4e..adfe3b4cfe05 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -122,7 +122,7 @@ static void *alloc_insn_page(void) return module_alloc(PAGE_SIZE); } -static void free_insn_page(void *page) +void __weak free_insn_page(void *page) { module_memfree(page); } -- cgit v1.2.3 From 40da1b11f01e43aad1aa6cea64681b6125e8a2a7 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 2 Jun 2017 16:27:14 +0200 Subject: cpu/hotplug: Drop the device lock on error If a custom CPU target is specified and that one is not available _or_ can't be interrupted then the code returns to userland without dropping a lock as notices by lockdep: |echo 133 > /sys/devices/system/cpu/cpu7/hotplug/target | ================================================ | [ BUG: lock held when returning to user space! ] | ------------------------------------------------ | bash/503 is leaving the kernel with locks still held! | 1 lock held by bash/503: | #0: (device_hotplug_lock){+.+...}, at: [] lock_device_hotplug_sysfs+0x10/0x40 So release the lock then. Fixes: 757c989b9994 ("cpu/hotplug: Make target state writeable") Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20170602142714.3ogo25f2wbq6fjpj@linutronix.de --- kernel/cpu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/cpu.c b/kernel/cpu.c index 9ae6fbe5b5cf..cb5103413bd8 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -1658,13 +1658,13 @@ static ssize_t write_cpuhp_target(struct device *dev, ret = !sp->name || sp->cant_stop ? -EINVAL : 0; mutex_unlock(&cpuhp_state_mutex); if (ret) - return ret; + goto out; if (st->state < target) ret = do_cpu_up(dev->id, target); else ret = do_cpu_down(dev->id, target); - +out: unlock_device_hotplug(); return ret ? ret : count; } -- cgit v1.2.3 From f4781e76f90df7aec400635d73ea4c35ee1d4765 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 30 May 2017 23:15:34 +0200 Subject: alarmtimer: Prevent overflow of relative timers Andrey reported a alartimer related RCU stall while fuzzing the kernel with syzkaller. The reason for this is an overflow in ktime_add() which brings the resulting time into negative space and causes immediate expiry of the timer. The following rearm with a small interval does not bring the timer back into positive space due to the same issue. This results in a permanent firing alarmtimer which hogs the CPU. Use ktime_add_safe() instead which detects the overflow and clamps the result to KTIME_SEC_MAX. Reported-by: Andrey Konovalov Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Kostya Serebryany Cc: syzkaller Cc: John Stultz Cc: Dmitry Vyukov Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20170530211655.802921648@linutronix.de --- kernel/time/alarmtimer.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 5cb5b0008d97..2b2e032b4eb4 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -387,7 +387,7 @@ void alarm_start_relative(struct alarm *alarm, ktime_t start) { struct alarm_base *base = &alarm_bases[alarm->type]; - start = ktime_add(start, base->gettime()); + start = ktime_add_safe(start, base->gettime()); alarm_start(alarm, start); } EXPORT_SYMBOL_GPL(alarm_start_relative); @@ -475,7 +475,7 @@ u64 alarm_forward(struct alarm *alarm, ktime_t now, ktime_t interval) overrun++; } - alarm->node.expires = ktime_add(alarm->node.expires, interval); + alarm->node.expires = ktime_add_safe(alarm->node.expires, interval); return overrun; } EXPORT_SYMBOL_GPL(alarm_forward); @@ -666,7 +666,7 @@ static int alarm_timer_set(struct k_itimer *timr, int flags, ktime_t now; now = alarm_bases[timr->it.alarm.alarmtimer.type].gettime(); - exp = ktime_add(now, exp); + exp = ktime_add_safe(now, exp); } alarm_start(&timr->it.alarm.alarmtimer, exp); -- cgit v1.2.3 From ff86bf0c65f14346bf2440534f9ba5ac232c39a0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 30 May 2017 23:15:35 +0200 Subject: alarmtimer: Rate limit periodic intervals The alarmtimer code has another source of potentially rearming itself too fast. Interval timers with a very samll interval have a similar CPU hog effect as the previously fixed overflow issue. The reason is that alarmtimers do not implement the normal protection against this kind of problem which the other posix timer use: timer expires -> queue signal -> deliver signal -> rearm timer This scheme brings the rearming under scheduler control and prevents permanently firing timers which hog the CPU. Bringing this scheme to the alarm timer code is a major overhaul because it lacks all the necessary mechanisms completely. So for a quick fix limit the interval to one jiffie. This is not problematic in practice as alarmtimers are usually backed by an RTC for suspend which have 1 second resolution. It could be therefor argued that the resolution of this clock should be set to 1 second in general, but that's outside the scope of this fix. Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Kostya Serebryany Cc: syzkaller Cc: John Stultz Cc: Dmitry Vyukov Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20170530211655.896767100@linutronix.de --- kernel/time/alarmtimer.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'kernel') diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 2b2e032b4eb4..ee2f4202d82a 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -660,6 +660,14 @@ static int alarm_timer_set(struct k_itimer *timr, int flags, /* start the timer */ timr->it.alarm.interval = timespec64_to_ktime(new_setting->it_interval); + + /* + * Rate limit to the tick as a hot fix to prevent DOS. Will be + * mopped up later. + */ + if (timr->it.alarm.interval < TICK_NSEC) + timr->it.alarm.interval = TICK_NSEC; + exp = timespec64_to_ktime(new_setting->it_value); /* Convert (if necessary) to absolute time */ if (flags != TIMER_ABSTIME) { -- cgit v1.2.3 From f3b7eaae1b35eb8077610eb7c7db042c9b0645e1 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 7 Jun 2017 00:57:37 +0200 Subject: Revert "ACPI / sleep: Ignore spurious SCI wakeups from suspend-to-idle" Revert commit eed4d47efe95 (ACPI / sleep: Ignore spurious SCI wakeups from suspend-to-idle) as it turned out to be premature and triggered a number of different issues on various systems. That includes, but is not limited to, premature suspend-to-RAM aborts on Dell XPS 13 (9343) reported by Dominik. The issue the commit in question attempted to address is real and will need to be taken care of going forward, but evidently more work is needed for this purpose. Reported-by: Dominik Brodowski Signed-off-by: Rafael J. Wysocki --- drivers/acpi/battery.c | 2 +- drivers/acpi/button.c | 5 ++--- drivers/acpi/device_pm.c | 3 +-- drivers/acpi/sleep.c | 28 ---------------------------- drivers/base/power/main.c | 5 +++++ drivers/base/power/wakeup.c | 18 ++++++------------ include/linux/suspend.h | 7 ++----- kernel/power/process.c | 2 +- kernel/power/suspend.c | 29 ++++------------------------- 9 files changed, 22 insertions(+), 77 deletions(-) (limited to 'kernel') diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c index 83ab17e4a795..4ef1e4624b2b 100644 --- a/drivers/acpi/battery.c +++ b/drivers/acpi/battery.c @@ -776,7 +776,7 @@ static int acpi_battery_update(struct acpi_battery *battery, bool resume) if ((battery->state & ACPI_BATTERY_STATE_CRITICAL) || (test_bit(ACPI_BATTERY_ALARM_PRESENT, &battery->flags) && (battery->capacity_now <= battery->alarm))) - pm_wakeup_hard_event(&battery->device->dev); + pm_wakeup_event(&battery->device->dev, 0); return result; } diff --git a/drivers/acpi/button.c b/drivers/acpi/button.c index b7c2a06963d6..668137e4a069 100644 --- a/drivers/acpi/button.c +++ b/drivers/acpi/button.c @@ -216,7 +216,7 @@ static int acpi_lid_notify_state(struct acpi_device *device, int state) } if (state) - pm_wakeup_hard_event(&device->dev); + pm_wakeup_event(&device->dev, 0); ret = blocking_notifier_call_chain(&acpi_lid_notifier, state, device); if (ret == NOTIFY_DONE) @@ -398,7 +398,7 @@ static void acpi_button_notify(struct acpi_device *device, u32 event) } else { int keycode; - pm_wakeup_hard_event(&device->dev); + pm_wakeup_event(&device->dev, 0); if (button->suspended) break; @@ -530,7 +530,6 @@ static int acpi_button_add(struct acpi_device *device) lid_device = device; } - device_init_wakeup(&device->dev, true); printk(KERN_INFO PREFIX "%s [%s]\n", name, acpi_device_bid(device)); return 0; diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c index 798d5003a039..993fd31394c8 100644 --- a/drivers/acpi/device_pm.c +++ b/drivers/acpi/device_pm.c @@ -24,7 +24,6 @@ #include #include #include -#include #include "internal.h" @@ -400,7 +399,7 @@ static void acpi_pm_notify_handler(acpi_handle handle, u32 val, void *not_used) mutex_lock(&acpi_pm_notifier_lock); if (adev->wakeup.flags.notifier_present) { - pm_wakeup_ws_event(adev->wakeup.ws, 0, true); + __pm_wakeup_event(adev->wakeup.ws, 0); if (adev->wakeup.context.work.func) queue_pm_work(&adev->wakeup.context.work); } diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index e84005d642e6..a4327af676fe 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -662,40 +662,14 @@ static int acpi_freeze_prepare(void) acpi_os_wait_events_complete(); if (acpi_sci_irq_valid()) enable_irq_wake(acpi_sci_irq); - return 0; } -static void acpi_freeze_wake(void) -{ - /* - * If IRQD_WAKEUP_ARMED is not set for the SCI at this point, it means - * that the SCI has triggered while suspended, so cancel the wakeup in - * case it has not been a wakeup event (the GPEs will be checked later). - */ - if (acpi_sci_irq_valid() && - !irqd_is_wakeup_armed(irq_get_irq_data(acpi_sci_irq))) - pm_system_cancel_wakeup(); -} - -static void acpi_freeze_sync(void) -{ - /* - * Process all pending events in case there are any wakeup ones. - * - * The EC driver uses the system workqueue, so that one needs to be - * flushed too. - */ - acpi_os_wait_events_complete(); - flush_scheduled_work(); -} - static void acpi_freeze_restore(void) { acpi_disable_wakeup_devices(ACPI_STATE_S0); if (acpi_sci_irq_valid()) disable_irq_wake(acpi_sci_irq); - acpi_enable_all_runtime_gpes(); } @@ -707,8 +681,6 @@ static void acpi_freeze_end(void) static const struct platform_freeze_ops acpi_freeze_ops = { .begin = acpi_freeze_begin, .prepare = acpi_freeze_prepare, - .wake = acpi_freeze_wake, - .sync = acpi_freeze_sync, .restore = acpi_freeze_restore, .end = acpi_freeze_end, }; diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index e987a6f55d36..9faee1c893e5 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -1091,6 +1091,11 @@ static int __device_suspend_noirq(struct device *dev, pm_message_t state, bool a if (async_error) goto Complete; + if (pm_wakeup_pending()) { + async_error = -EBUSY; + goto Complete; + } + if (dev->power.syscore || dev->power.direct_complete) goto Complete; diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index 9c36b27996fc..c313b600d356 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -28,8 +28,8 @@ bool events_check_enabled __read_mostly; /* First wakeup IRQ seen by the kernel in the last cycle. */ unsigned int pm_wakeup_irq __read_mostly; -/* If greater than 0 and the system is suspending, terminate the suspend. */ -static atomic_t pm_abort_suspend __read_mostly; +/* If set and the system is suspending, terminate the suspend. */ +static bool pm_abort_suspend __read_mostly; /* * Combined counters of registered wakeup events and wakeup events in progress. @@ -855,26 +855,20 @@ bool pm_wakeup_pending(void) pm_print_active_wakeup_sources(); } - return ret || atomic_read(&pm_abort_suspend) > 0; + return ret || pm_abort_suspend; } void pm_system_wakeup(void) { - atomic_inc(&pm_abort_suspend); + pm_abort_suspend = true; freeze_wake(); } EXPORT_SYMBOL_GPL(pm_system_wakeup); -void pm_system_cancel_wakeup(void) -{ - atomic_dec(&pm_abort_suspend); -} - -void pm_wakeup_clear(bool reset) +void pm_wakeup_clear(void) { + pm_abort_suspend = false; pm_wakeup_irq = 0; - if (reset) - atomic_set(&pm_abort_suspend, 0); } void pm_system_irq_wakeup(unsigned int irq_number) diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 0b1cf32edfd7..d9718378a8be 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -189,8 +189,6 @@ struct platform_suspend_ops { struct platform_freeze_ops { int (*begin)(void); int (*prepare)(void); - void (*wake)(void); - void (*sync)(void); void (*restore)(void); void (*end)(void); }; @@ -430,8 +428,7 @@ extern unsigned int pm_wakeup_irq; extern bool pm_wakeup_pending(void); extern void pm_system_wakeup(void); -extern void pm_system_cancel_wakeup(void); -extern void pm_wakeup_clear(bool reset); +extern void pm_wakeup_clear(void); extern void pm_system_irq_wakeup(unsigned int irq_number); extern bool pm_get_wakeup_count(unsigned int *count, bool block); extern bool pm_save_wakeup_count(unsigned int count); @@ -481,7 +478,7 @@ static inline int unregister_pm_notifier(struct notifier_block *nb) static inline bool pm_wakeup_pending(void) { return false; } static inline void pm_system_wakeup(void) {} -static inline void pm_wakeup_clear(bool reset) {} +static inline void pm_wakeup_clear(void) {} static inline void pm_system_irq_wakeup(unsigned int irq_number) {} static inline void lock_system_sleep(void) {} diff --git a/kernel/power/process.c b/kernel/power/process.c index 78672d324a6e..c7209f060eeb 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -132,7 +132,7 @@ int freeze_processes(void) if (!pm_freezing) atomic_inc(&system_freezing_cnt); - pm_wakeup_clear(true); + pm_wakeup_clear(); pr_info("Freezing user space processes ... "); pm_freezing = true; error = try_to_freeze_tasks(true); diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index c0248c74d6d4..15e6baef5c73 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -72,8 +72,6 @@ static void freeze_begin(void) static void freeze_enter(void) { - trace_suspend_resume(TPS("machine_suspend"), PM_SUSPEND_FREEZE, true); - spin_lock_irq(&suspend_freeze_lock); if (pm_wakeup_pending()) goto out; @@ -100,27 +98,6 @@ static void freeze_enter(void) out: suspend_freeze_state = FREEZE_STATE_NONE; spin_unlock_irq(&suspend_freeze_lock); - - trace_suspend_resume(TPS("machine_suspend"), PM_SUSPEND_FREEZE, false); -} - -static void s2idle_loop(void) -{ - do { - freeze_enter(); - - if (freeze_ops && freeze_ops->wake) - freeze_ops->wake(); - - dpm_resume_noirq(PMSG_RESUME); - if (freeze_ops && freeze_ops->sync) - freeze_ops->sync(); - - if (pm_wakeup_pending()) - break; - - pm_wakeup_clear(false); - } while (!dpm_suspend_noirq(PMSG_SUSPEND)); } void freeze_wake(void) @@ -394,8 +371,10 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) * all the devices are suspended. */ if (state == PM_SUSPEND_FREEZE) { - s2idle_loop(); - goto Platform_early_resume; + trace_suspend_resume(TPS("machine_suspend"), state, true); + freeze_enter(); + trace_suspend_resume(TPS("machine_suspend"), state, false); + goto Platform_wake; } error = disable_nonboot_cpus(); -- cgit v1.2.3 From cc1582c231ea041fbc68861dfaf957eaf902b829 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Thu, 25 May 2017 18:09:07 +0800 Subject: perf/core: Drop kernel samples even though :u is specified When doing sampling, for example: perf record -e cycles:u ... On workloads that do a lot of kernel entry/exits we see kernel samples, even though :u is specified. This is due to skid existing. This might be a security issue because it can leak kernel addresses even though kernel sampling support is disabled. The patch drops the kernel samples if exclude_kernel is specified. For example, test on Haswell desktop: perf record -e cycles:u perf report --stdio Before patch applied: 99.77% mgen mgen [.] buf_read 0.20% mgen mgen [.] rand_buf_init 0.01% mgen [kernel.vmlinux] [k] apic_timer_interrupt 0.00% mgen mgen [.] last_free_elem 0.00% mgen libc-2.23.so [.] __random_r 0.00% mgen libc-2.23.so [.] _int_malloc 0.00% mgen mgen [.] rand_array_init 0.00% mgen [kernel.vmlinux] [k] page_fault 0.00% mgen libc-2.23.so [.] __random 0.00% mgen libc-2.23.so [.] __strcasestr 0.00% mgen ld-2.23.so [.] strcmp 0.00% mgen ld-2.23.so [.] _dl_start 0.00% mgen libc-2.23.so [.] sched_setaffinity@@GLIBC_2.3.4 0.00% mgen ld-2.23.so [.] _start We can see kernel symbols apic_timer_interrupt and page_fault. After patch applied: 99.79% mgen mgen [.] buf_read 0.19% mgen mgen [.] rand_buf_init 0.00% mgen libc-2.23.so [.] __random_r 0.00% mgen mgen [.] rand_array_init 0.00% mgen mgen [.] last_free_elem 0.00% mgen libc-2.23.so [.] vfprintf 0.00% mgen libc-2.23.so [.] rand 0.00% mgen libc-2.23.so [.] __random 0.00% mgen libc-2.23.so [.] _int_malloc 0.00% mgen libc-2.23.so [.] _IO_doallocbuf 0.00% mgen ld-2.23.so [.] do_lookup_x 0.00% mgen ld-2.23.so [.] open_verify.constprop.7 0.00% mgen ld-2.23.so [.] _dl_important_hwcaps 0.00% mgen libc-2.23.so [.] sched_setaffinity@@GLIBC_2.3.4 0.00% mgen ld-2.23.so [.] _start There are only userspace symbols. Signed-off-by: Jin Yao Signed-off-by: Peter Zijlstra (Intel) Cc: Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: acme@kernel.org Cc: jolsa@kernel.org Cc: kan.liang@intel.com Cc: mark.rutland@arm.com Cc: will.deacon@arm.com Cc: yao.jin@intel.com Link: http://lkml.kernel.org/r/1495706947-3744-1-git-send-email-yao.jin@linux.intel.com Signed-off-by: Ingo Molnar --- kernel/events/core.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'kernel') diff --git a/kernel/events/core.c b/kernel/events/core.c index 6e75a5c9412d..6c4e523dc1e2 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7316,6 +7316,21 @@ int perf_event_account_interrupt(struct perf_event *event) return __perf_event_account_interrupt(event, 1); } +static bool sample_is_allowed(struct perf_event *event, struct pt_regs *regs) +{ + /* + * Due to interrupt latency (AKA "skid"), we may enter the + * kernel before taking an overflow, even if the PMU is only + * counting user events. + * To avoid leaking information to userspace, we must always + * reject kernel samples when exclude_kernel is set. + */ + if (event->attr.exclude_kernel && !user_mode(regs)) + return false; + + return true; +} + /* * Generic event overflow handling, sampling. */ @@ -7336,6 +7351,12 @@ static int __perf_event_overflow(struct perf_event *event, ret = __perf_event_account_interrupt(event, throttle); + /* + * For security, drop the skid kernel samples if necessary. + */ + if (!sample_is_allowed(event, regs)) + return ret; + /* * XXX event_limit might not quite work as expected on inherited * events -- cgit v1.2.3 From dac8bbbae1d0ccba96402d25deeed3a2e87992c6 Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Thu, 8 Jun 2017 12:01:30 +0200 Subject: Revert "printk: fix double printing with earlycon" This reverts commit cf39bf58afdaabc0b86f141630fb3fd18190294e. The commit regression to users that define both console=ttyS1 and console=ttyS0 on the command line, see https://lkml.kernel.org/r/20170509082915.GA13236@bistromath.localdomain The kernel log messages always appeared only on one serial port. It is even documented in Documentation/admin-guide/serial-console.rst: "Note that you can only define one console per device type (serial, video)." The above mentioned commit changed the order in which the command line parameters are searched. As a result, the kernel log messages go to the last mentioned ttyS* instead of the first one. We long thought that using two console=ttyS* on the command line did not make sense. But then we realized that console= parameters were handled also by systemd, see http://0pointer.de/blog/projects/serial-console.html "By default systemd will instantiate one serial-getty@.service on the main kernel console, if it is not a virtual terminal." where "[4] If multiple kernel consoles are used simultaneously, the main console is the one listed first in /sys/class/tty/console/active, which is the last one listed on the kernel command line." This puts the original report into another light. The system is running in qemu. The first serial port is used to store the messages into a file. The second one is used to login to the system via a socket. It depends on systemd and the historic kernel behavior. By other words, systemd causes that it makes sense to define both console=ttyS1 console=ttyS0 on the command line. The kernel fix caused regression related to userspace (systemd) and need to be reverted. In addition, it went out that the fix helped only partially. The messages still were duplicated when the boot console was removed early by late_initcall(printk_late_init). Then the entire log was replayed when the same console was registered as a normal one. Link: 20170606160339.GC7604@pathway.suse.cz Cc: Aleksey Makarov Cc: Sabrina Dubroca Cc: Sudeep Holla Cc: Greg Kroah-Hartman Cc: Peter Hurley Cc: Jiri Slaby Cc: Robin Murphy , Cc: Steven Rostedt Cc: "Nair, Jayachandran" Cc: linux-serial@vger.kernel.org Cc: linux-kernel@vger.kernel.org Reported-by: Sabrina Dubroca Acked-by: Sergey Senozhatsky Signed-off-by: Petr Mladek --- kernel/printk/printk.c | 46 ++++++++++------------------------------------ 1 file changed, 10 insertions(+), 36 deletions(-) (limited to 'kernel') diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 779479ac9f57..9dbceb76e6bc 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -269,7 +269,6 @@ static struct console *exclusive_console; #define MAX_CMDLINECONSOLES 8 static struct console_cmdline console_cmdline[MAX_CMDLINECONSOLES]; -static int console_cmdline_cnt; static int preferred_console = -1; int console_set_on_cmdline; @@ -1906,25 +1905,12 @@ static int __add_preferred_console(char *name, int idx, char *options, * See if this tty is not yet registered, and * if we have a slot free. */ - for (i = 0, c = console_cmdline; i < console_cmdline_cnt; i++, c++) { + for (i = 0, c = console_cmdline; + i < MAX_CMDLINECONSOLES && c->name[0]; + i++, c++) { if (strcmp(c->name, name) == 0 && c->index == idx) { - if (brl_options) - return 0; - - /* - * Maintain an invariant that will help to find if - * the matching console is preferred, see - * register_console(): - * - * The last non-braille console is always - * the preferred one. - */ - if (i != console_cmdline_cnt - 1) - swap(console_cmdline[i], - console_cmdline[console_cmdline_cnt - 1]); - - preferred_console = console_cmdline_cnt - 1; - + if (!brl_options) + preferred_console = i; return 0; } } @@ -1937,7 +1923,6 @@ static int __add_preferred_console(char *name, int idx, char *options, braille_set_options(c, brl_options); c->index = idx; - console_cmdline_cnt++; return 0; } /* @@ -2477,23 +2462,12 @@ void register_console(struct console *newcon) } /* - * See if this console matches one we selected on the command line. - * - * There may be several entries in the console_cmdline array matching - * with the same console, one with newcon->match(), another by - * name/index: - * - * pl011,mmio,0x87e024000000,115200 -- added from SPCR - * ttyAMA0 -- added from command line - * - * Traverse the console_cmdline array in reverse order to be - * sure that if this console is preferred then it will be the first - * matching entry. We use the invariant that is maintained in - * __add_preferred_console(). + * See if this console matches one we selected on + * the command line. */ - for (i = console_cmdline_cnt - 1; i >= 0; i--) { - c = console_cmdline + i; - + for (i = 0, c = console_cmdline; + i < MAX_CMDLINECONSOLES && c->name[0]; + i++, c++) { if (!newcon->match || newcon->match(newcon, c->name, c->index, c->options) != 0) { /* default matching */ -- cgit v1.2.3 From cdf7abc4610a7f1c43d06cda246c5f748a4fd267 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 31 May 2017 14:03:10 +0200 Subject: srcu: Allow use of Tiny/Tree SRCU from both process and interrupt context Linu Cherian reported a WARN in cleanup_srcu_struct() when shutting down a guest running iperf on a VFIO assigned device. This happens because irqfd_wakeup() calls srcu_read_lock(&kvm->irq_srcu) in interrupt context, while a worker thread does the same inside kvm_set_irq(). If the interrupt happens while the worker thread is executing __srcu_read_lock(), updates to the Classic SRCU ->lock_count[] field or the Tree SRCU ->srcu_lock_count[] field can be lost. The docs say you are not supposed to call srcu_read_lock() and srcu_read_unlock() from irq context, but KVM interrupt injection happens from (host) interrupt context and it would be nice if SRCU supported the use case. KVM is using SRCU here not really for the "sleepable" part, but rather due to its IPI-free fast detection of grace periods. It is therefore not desirable to switch back to RCU, which would effectively revert commit 719d93cd5f5c ("kvm/irqchip: Speed up KVM_SET_GSI_ROUTING", 2014-01-16). However, the docs are overly conservative. You can have an SRCU instance only has users in irq context, and you can mix process and irq context as long as process context users disable interrupts. In addition, __srcu_read_unlock() actually uses this_cpu_dec() on both Tree SRCU and Classic SRCU. For those two implementations, only srcu_read_lock() is unsafe. When Classic SRCU's __srcu_read_unlock() was changed to use this_cpu_dec(), in commit 5a41344a3d83 ("srcu: Simplify __srcu_read_unlock() via this_cpu_dec()", 2012-11-29), __srcu_read_lock() did two increments. Therefore it kept __this_cpu_inc(), with preempt_disable/enable in the caller. Tree SRCU however only does one increment, so on most architectures it is more efficient for __srcu_read_lock() to use this_cpu_inc(), and any performance differences appear to be down in the noise. Unlike Classic and Tree SRCU, Tiny SRCU does increments and decrements on a single variable. Therefore, as Peter Zijlstra pointed out, Tiny SRCU's implementation already supports mixed-context use of srcu_read_lock() and srcu_read_unlock(), at least as long as uses of srcu_read_lock() and srcu_read_unlock() in each handler are nested and paired properly. In other words, it is still illegal to (say) invoke srcu_read_lock() in an interrupt handler and to invoke the matching srcu_read_unlock() in a softirq handler. Therefore, the only change required for Tiny SRCU is to its comments. Fixes: 719d93cd5f5c ("kvm/irqchip: Speed up KVM_SET_GSI_ROUTING") Reported-by: Linu Cherian Suggested-by: Linu Cherian Cc: kvm@vger.kernel.org Signed-off-by: Paolo Bonzini Cc: Linus Torvalds Signed-off-by: Paul E. McKenney Tested-by: Paolo Bonzini --- kernel/rcu/srcutiny.c | 7 ++++--- kernel/rcu/srcutree.c | 5 ++--- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c index 36e1f82faed1..32798eb14853 100644 --- a/kernel/rcu/srcutiny.c +++ b/kernel/rcu/srcutiny.c @@ -97,8 +97,9 @@ EXPORT_SYMBOL_GPL(cleanup_srcu_struct); /* * Counts the new reader in the appropriate per-CPU element of the - * srcu_struct. Must be called from process context. - * Returns an index that must be passed to the matching srcu_read_unlock(). + * srcu_struct. Can be invoked from irq/bh handlers, but the matching + * __srcu_read_unlock() must be in the same handler instance. Returns an + * index that must be passed to the matching srcu_read_unlock(). */ int __srcu_read_lock(struct srcu_struct *sp) { @@ -112,7 +113,7 @@ EXPORT_SYMBOL_GPL(__srcu_read_lock); /* * Removes the count for the old reader from the appropriate element of - * the srcu_struct. Must be called from process context. + * the srcu_struct. */ void __srcu_read_unlock(struct srcu_struct *sp, int idx) { diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index 3ae8474557df..157654fa436a 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -357,7 +357,7 @@ EXPORT_SYMBOL_GPL(cleanup_srcu_struct); /* * Counts the new reader in the appropriate per-CPU element of the - * srcu_struct. Must be called from process context. + * srcu_struct. * Returns an index that must be passed to the matching srcu_read_unlock(). */ int __srcu_read_lock(struct srcu_struct *sp) @@ -365,7 +365,7 @@ int __srcu_read_lock(struct srcu_struct *sp) int idx; idx = READ_ONCE(sp->srcu_idx) & 0x1; - __this_cpu_inc(sp->sda->srcu_lock_count[idx]); + this_cpu_inc(sp->sda->srcu_lock_count[idx]); smp_mb(); /* B */ /* Avoid leaking the critical section. */ return idx; } @@ -375,7 +375,6 @@ EXPORT_SYMBOL_GPL(__srcu_read_lock); * Removes the count for the old reader from the appropriate per-CPU * element of the srcu_struct. Note that this may well be a different * CPU than that which was incremented by the corresponding srcu_read_lock(). - * Must be called from process context. */ void __srcu_read_unlock(struct srcu_struct *sp, int idx) { -- cgit v1.2.3 From 1123a6041654e8f889014659593bad4168e542c2 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 31 May 2017 14:03:11 +0200 Subject: srcu: Allow use of Classic SRCU from both process and interrupt context Linu Cherian reported a WARN in cleanup_srcu_struct() when shutting down a guest running iperf on a VFIO assigned device. This happens because irqfd_wakeup() calls srcu_read_lock(&kvm->irq_srcu) in interrupt context, while a worker thread does the same inside kvm_set_irq(). If the interrupt happens while the worker thread is executing __srcu_read_lock(), updates to the Classic SRCU ->lock_count[] field or the Tree SRCU ->srcu_lock_count[] field can be lost. The docs say you are not supposed to call srcu_read_lock() and srcu_read_unlock() from irq context, but KVM interrupt injection happens from (host) interrupt context and it would be nice if SRCU supported the use case. KVM is using SRCU here not really for the "sleepable" part, but rather due to its IPI-free fast detection of grace periods. It is therefore not desirable to switch back to RCU, which would effectively revert commit 719d93cd5f5c ("kvm/irqchip: Speed up KVM_SET_GSI_ROUTING", 2014-01-16). However, the docs are overly conservative. You can have an SRCU instance only has users in irq context, and you can mix process and irq context as long as process context users disable interrupts. In addition, __srcu_read_unlock() actually uses this_cpu_dec() on both Tree SRCU and Classic SRCU. For those two implementations, only srcu_read_lock() is unsafe. When Classic SRCU's __srcu_read_unlock() was changed to use this_cpu_dec(), in commit 5a41344a3d83 ("srcu: Simplify __srcu_read_unlock() via this_cpu_dec()", 2012-11-29), __srcu_read_lock() did two increments. Therefore it kept __this_cpu_inc(), with preempt_disable/enable in the caller. Tree SRCU however only does one increment, so on most architectures it is more efficient for __srcu_read_lock() to use this_cpu_inc(), and any performance differences appear to be down in the noise. Cc: stable@vger.kernel.org Fixes: 719d93cd5f5c ("kvm/irqchip: Speed up KVM_SET_GSI_ROUTING") Reported-by: Linu Cherian Suggested-by: Linu Cherian Cc: kvm@vger.kernel.org Signed-off-by: Paolo Bonzini Cc: Linus Torvalds Signed-off-by: Paul E. McKenney --- include/linux/srcu.h | 2 -- kernel/rcu/srcu.c | 5 ++--- 2 files changed, 2 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 167ad8831aaf..4c1d5f7e62c4 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -172,9 +172,7 @@ static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp) { int retval; - preempt_disable(); retval = __srcu_read_lock(sp); - preempt_enable(); rcu_lock_acquire(&(sp)->dep_map); return retval; } diff --git a/kernel/rcu/srcu.c b/kernel/rcu/srcu.c index 584d8a983883..dea03614263f 100644 --- a/kernel/rcu/srcu.c +++ b/kernel/rcu/srcu.c @@ -263,7 +263,7 @@ EXPORT_SYMBOL_GPL(cleanup_srcu_struct); /* * Counts the new reader in the appropriate per-CPU element of the - * srcu_struct. Must be called from process context. + * srcu_struct. * Returns an index that must be passed to the matching srcu_read_unlock(). */ int __srcu_read_lock(struct srcu_struct *sp) @@ -271,7 +271,7 @@ int __srcu_read_lock(struct srcu_struct *sp) int idx; idx = READ_ONCE(sp->completed) & 0x1; - __this_cpu_inc(sp->per_cpu_ref->lock_count[idx]); + this_cpu_inc(sp->per_cpu_ref->lock_count[idx]); smp_mb(); /* B */ /* Avoid leaking the critical section. */ return idx; } @@ -281,7 +281,6 @@ EXPORT_SYMBOL_GPL(__srcu_read_lock); * Removes the count for the old reader from the appropriate per-CPU * element of the srcu_struct. Note that this may well be a different * CPU than that which was incremented by the corresponding srcu_read_lock(). - * Must be called from process context. */ void __srcu_read_unlock(struct srcu_struct *sp, int idx) { -- cgit v1.2.3 From f67abed585efe251edda52dc9690020d6441890f Mon Sep 17 00:00:00 2001 From: Marcin Nowakowski Date: Fri, 9 Jun 2017 10:00:29 +0200 Subject: sched/fair: Fix typo in printk message 'schedstats' kernel parameter should be set to enable/disable, so correct the printk hint saying that it should be set to 'enable' rather than 'enabled' to enable scheduler tracepoints. Signed-off-by: Marcin Nowakowski Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1496995229-31245-1-git-send-email-marcin.nowakowski@imgtec.com Signed-off-by: Ingo Molnar --- kernel/sched/fair.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index d71109321841..c77e4b1d51c0 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3563,7 +3563,7 @@ static inline void check_schedstat_required(void) trace_sched_stat_runtime_enabled()) { printk_deferred_once("Scheduler tracepoints stat_sleep, stat_iowait, " "stat_blocked and stat_runtime require the " - "kernel parameter schedstats=enabled or " + "kernel parameter schedstats=enable or " "kernel.sched_schedstats=1\n"); } #endif -- cgit v1.2.3 From 252d2a4117bc181b287eeddf848863788da733ae Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Fri, 9 Jun 2017 11:49:15 -0700 Subject: sched/core: Idle_task_exit() shouldn't use switch_mm_irqs_off() idle_task_exit() can be called with IRQs on x86 on and therefore should use switch_mm(), not switch_mm_irqs_off(). This doesn't seem to cause any problems right now, but it will confuse my upcoming TLB flush changes. Nonetheless, I think it should be backported because it's trivial. There won't be any meaningful performance impact because idle_task_exit() is only used when offlining a CPU. Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: stable@vger.kernel.org Fixes: f98db6013c55 ("sched/core: Add switch_mm_irqs_off() and use it in the scheduler") Link: http://lkml.kernel.org/r/ca3d1a9fa93a0b49f5a8ff729eda3640fb6abdf9.1497034141.git.luto@kernel.org Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 803c3bc274c4..326d4f88e2b1 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5605,7 +5605,7 @@ void idle_task_exit(void) BUG_ON(cpu_online(smp_processor_id())); if (mm != &init_mm) { - switch_mm_irqs_off(mm, &init_mm, current); + switch_mm(mm, &init_mm, current); finish_arch_post_lock_switch(); } mmdrop(mm); -- cgit v1.2.3 From ff0a6d6f932ff4b9eb8e8140f98cc1cf763d0d78 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 12 Jun 2017 14:16:16 +0200 Subject: Revert "cpufreq: schedutil: Reduce frequencies slower" Revert commit 39b64aa1c007 (cpufreq: schedutil: Reduce frequencies slower) that introduced unintentional changes in behavior leading to adverse effects on some systems. Reported-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- kernel/sched/cpufreq_schedutil.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'kernel') diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 76877a62b5fa..8773d1efdfab 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -101,9 +101,6 @@ static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time, if (sg_policy->next_freq == next_freq) return; - if (sg_policy->next_freq > next_freq) - next_freq = (sg_policy->next_freq + next_freq) >> 1; - sg_policy->next_freq = next_freq; sg_policy->last_freq_update_time = time; -- cgit v1.2.3 From 94114c367553f3301747e47f6947cabde947575f Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Wed, 7 Jun 2017 23:36:03 -0700 Subject: tick/broadcast: Make tick_broadcast_setup_oneshot() static This function isn't used outside of tick-broadcast.c, so let's mark it static. Signed-off-by: Stephen Boyd Link: http://lkml.kernel.org/r/20170608063603.13276-1-sboyd@codeaurora.org Signed-off-by: Thomas Gleixner --- kernel/time/tick-broadcast.c | 4 +++- kernel/time/tick-internal.h | 2 -- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 987e496bb51a..b398c2ea69b2 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -37,9 +37,11 @@ static int tick_broadcast_forced; static __cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(tick_broadcast_lock); #ifdef CONFIG_TICK_ONESHOT +static void tick_broadcast_setup_oneshot(struct clock_event_device *bc); static void tick_broadcast_clear_oneshot(int cpu); static void tick_resume_broadcast_oneshot(struct clock_event_device *bc); #else +static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { BUG(); } static inline void tick_broadcast_clear_oneshot(int cpu) { } static inline void tick_resume_broadcast_oneshot(struct clock_event_device *bc) { } #endif @@ -867,7 +869,7 @@ static void tick_broadcast_init_next_event(struct cpumask *mask, /** * tick_broadcast_setup_oneshot - setup the broadcast device */ -void tick_broadcast_setup_oneshot(struct clock_event_device *bc) +static void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { int cpu = smp_processor_id(); diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index f738251000fe..be0ac01f2e12 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -126,7 +126,6 @@ static inline int tick_check_oneshot_change(int allow_nohz) { return 0; } /* Functions related to oneshot broadcasting */ #if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT) -extern void tick_broadcast_setup_oneshot(struct clock_event_device *bc); extern void tick_broadcast_switch_to_oneshot(void); extern void tick_shutdown_broadcast_oneshot(unsigned int cpu); extern int tick_broadcast_oneshot_active(void); @@ -134,7 +133,6 @@ extern void tick_check_oneshot_broadcast_this_cpu(void); bool tick_broadcast_oneshot_available(void); extern struct cpumask *tick_get_broadcast_oneshot_mask(void); #else /* !(BROADCAST && ONESHOT): */ -static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { BUG(); } static inline void tick_broadcast_switch_to_oneshot(void) { } static inline void tick_shutdown_broadcast_oneshot(unsigned int cpu) { } static inline int tick_broadcast_oneshot_active(void) { return 0; } -- cgit v1.2.3 From fa07ab72cbb0d843429e61bf179308aed6cbe0dd Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sun, 11 Jun 2017 00:38:36 +0200 Subject: genirq: Release resources in __setup_irq() error path In case __irq_set_trigger() fails the resources requested via irq_request_resources() are not released. Add the missing release call into the error handling path. Fixes: c1bacbae8192 ("genirq: Provide irq_request/release_resources chip callbacks") Signed-off-by: Heiner Kallweit Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/655538f5-cb20-a892-ff15-fbd2dd1fa4ec@gmail.com --- kernel/irq/manage.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 070be980c37a..425170d4439b 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -1312,8 +1312,10 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) ret = __irq_set_trigger(desc, new->flags & IRQF_TRIGGER_MASK); - if (ret) + if (ret) { + irq_release_resources(desc); goto out_mask; + } } desc->istate &= ~(IRQS_AUTODETECT | IRQS_SPURIOUS_DISABLED | \ -- cgit v1.2.3 From ac6424b981bce1c4bc55675c6ce11bfe1bbfa64f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 20 Jun 2017 12:06:13 +0200 Subject: sched/wait: Rename wait_queue_t => wait_queue_entry_t Rename: wait_queue_t => wait_queue_entry_t 'wait_queue_t' was always a slight misnomer: its name implies that it's a "queue", but in reality it's a queue *entry*. The 'real' queue is the wait queue head, which had to carry the name. Start sorting this out by renaming it to 'wait_queue_entry_t'. This also allows the real structure name 'struct __wait_queue' to lose its double underscore and become 'struct wait_queue_entry', which is the more canonical nomenclature for such data types. Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- Documentation/DocBook/kernel-hacking.tmpl | 2 +- Documentation/filesystems/autofs4.txt | 12 ++-- block/blk-mq.c | 2 +- block/blk-wbt.c | 2 +- block/kyber-iosched.c | 8 +-- drivers/bluetooth/btmrvl_main.c | 2 +- drivers/char/ipmi/ipmi_watchdog.c | 2 +- drivers/gpu/drm/i915/i915_gem_request.h | 2 +- drivers/gpu/drm/i915/i915_sw_fence.c | 14 ++--- drivers/gpu/drm/i915/i915_sw_fence.h | 2 +- drivers/gpu/drm/radeon/radeon.h | 2 +- drivers/gpu/drm/radeon/radeon_fence.c | 2 +- drivers/gpu/vga/vgaarb.c | 2 +- drivers/infiniband/hw/i40iw/i40iw_main.c | 2 +- drivers/md/bcache/btree.h | 2 +- drivers/net/ethernet/cavium/liquidio/octeon_main.h | 4 +- drivers/net/wireless/cisco/airo.c | 2 +- .../net/wireless/intersil/hostap/hostap_ioctl.c | 2 +- drivers/net/wireless/marvell/libertas/main.c | 2 +- drivers/scsi/dpt/dpti_i2o.h | 2 +- drivers/scsi/ips.c | 12 ++-- drivers/scsi/ips.h | 4 +- .../staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c | 6 +- .../staging/lustre/lnet/klnds/socklnd/socklnd_cb.c | 4 +- drivers/staging/lustre/lnet/libcfs/debug.c | 2 +- drivers/staging/lustre/lnet/libcfs/tracefile.c | 2 +- drivers/staging/lustre/lnet/lnet/lib-eq.c | 2 +- drivers/staging/lustre/lnet/lnet/lib-socket.c | 2 +- drivers/staging/lustre/lustre/fid/fid_request.c | 6 +- drivers/staging/lustre/lustre/include/lustre_lib.h | 4 +- drivers/staging/lustre/lustre/llite/lcommon_cl.c | 2 +- .../staging/lustre/lustre/lov/lov_cl_internal.h | 2 +- drivers/staging/lustre/lustre/lov/lov_object.c | 2 +- drivers/staging/lustre/lustre/obdclass/lu_object.c | 6 +- drivers/tty/synclink_gt.c | 2 +- drivers/vfio/virqfd.c | 2 +- drivers/vhost/vhost.c | 2 +- drivers/vhost/vhost.h | 2 +- fs/autofs4/autofs_i.h | 2 +- fs/autofs4/waitq.c | 18 +++--- fs/cachefiles/internal.h | 2 +- fs/cachefiles/namei.c | 2 +- fs/cachefiles/rdwr.c | 2 +- fs/dax.c | 4 +- fs/eventfd.c | 2 +- fs/eventpoll.c | 10 ++-- fs/fs_pin.c | 2 +- fs/nfs/nfs4proc.c | 4 +- fs/nilfs2/segment.c | 2 +- fs/orangefs/orangefs-bufmap.c | 4 +- fs/reiserfs/journal.c | 2 +- fs/select.c | 4 +- fs/signalfd.c | 2 +- fs/userfaultfd.c | 8 +-- include/linux/blk-mq.h | 2 +- include/linux/eventfd.h | 4 +- include/linux/kvm_irqfd.h | 2 +- include/linux/pagemap.h | 2 +- include/linux/poll.h | 2 +- include/linux/vfio.h | 2 +- include/linux/wait.h | 67 +++++++++++----------- include/net/af_unix.h | 2 +- include/uapi/linux/auto_fs.h | 4 +- include/uapi/linux/auto_fs4.h | 4 +- kernel/exit.c | 4 +- kernel/futex.c | 2 +- kernel/sched/completion.c | 2 +- kernel/sched/core.c | 2 +- kernel/sched/wait.c | 42 +++++++------- kernel/workqueue.c | 4 +- mm/filemap.c | 10 ++-- mm/memcontrol.c | 8 +-- mm/mempool.c | 2 +- mm/shmem.c | 2 +- net/9p/trans_fd.c | 4 +- net/bluetooth/bnep/core.c | 2 +- net/bluetooth/cmtp/core.c | 2 +- net/bluetooth/hidp/core.c | 2 +- net/core/datagram.c | 2 +- net/unix/af_unix.c | 4 +- sound/core/control.c | 2 +- sound/core/hwdep.c | 2 +- sound/core/init.c | 2 +- sound/core/oss/pcm_oss.c | 4 +- sound/core/pcm_lib.c | 2 +- sound/core/pcm_native.c | 4 +- sound/core/rawmidi.c | 8 +-- sound/core/seq/seq_fifo.c | 2 +- sound/core/seq/seq_memory.c | 2 +- sound/core/timer.c | 2 +- sound/isa/wavefront/wavefront_synth.c | 2 +- sound/pci/mixart/mixart_core.c | 4 +- sound/pci/ymfpci/ymfpci_main.c | 2 +- virt/kvm/eventfd.c | 2 +- 94 files changed, 216 insertions(+), 213 deletions(-) (limited to 'kernel') diff --git a/Documentation/DocBook/kernel-hacking.tmpl b/Documentation/DocBook/kernel-hacking.tmpl index da5c087462b1..c3c705591532 100644 --- a/Documentation/DocBook/kernel-hacking.tmpl +++ b/Documentation/DocBook/kernel-hacking.tmpl @@ -819,7 +819,7 @@ printk(KERN_INFO "my ip: %pI4\n", &ipaddress); certain condition is true. They must be used carefully to ensure there is no race condition. You declare a wait_queue_head_t, and then processes which want to - wait for that condition declare a wait_queue_t + wait for that condition declare a wait_queue_entry_t referring to themselves, and place that in the queue. diff --git a/Documentation/filesystems/autofs4.txt b/Documentation/filesystems/autofs4.txt index f10dd590f69f..8444dc3d57e8 100644 --- a/Documentation/filesystems/autofs4.txt +++ b/Documentation/filesystems/autofs4.txt @@ -316,7 +316,7 @@ For version 5, the format of the message is: struct autofs_v5_packet { int proto_version; /* Protocol version */ int type; /* Type of packet */ - autofs_wqt_t wait_queue_token; + autofs_wqt_t wait_queue_entry_token; __u32 dev; __u64 ino; __u32 uid; @@ -341,12 +341,12 @@ The pipe will be set to "packet mode" (equivalent to passing `O_DIRECT`) to _pipe2(2)_ so that a read from the pipe will return at most one packet, and any unread portion of a packet will be discarded. -The `wait_queue_token` is a unique number which can identify a +The `wait_queue_entry_token` is a unique number which can identify a particular request to be acknowledged. When a message is sent over the pipe the affected dentry is marked as either "active" or "expiring" and other accesses to it block until the message is acknowledged using one of the ioctls below and the relevant -`wait_queue_token`. +`wait_queue_entry_token`. Communicating with autofs: root directory ioctls ------------------------------------------------ @@ -358,7 +358,7 @@ capability, or must be the automount daemon. The available ioctl commands are: - **AUTOFS_IOC_READY**: a notification has been handled. The argument - to the ioctl command is the "wait_queue_token" number + to the ioctl command is the "wait_queue_entry_token" number corresponding to the notification being acknowledged. - **AUTOFS_IOC_FAIL**: similar to above, but indicates failure with the error code `ENOENT`. @@ -382,14 +382,14 @@ The available ioctl commands are: struct autofs_packet_expire_multi { int proto_version; /* Protocol version */ int type; /* Type of packet */ - autofs_wqt_t wait_queue_token; + autofs_wqt_t wait_queue_entry_token; int len; char name[NAME_MAX+1]; }; is required. This is filled in with the name of something that can be unmounted or removed. If nothing can be expired, - `errno` is set to `EAGAIN`. Even though a `wait_queue_token` + `errno` is set to `EAGAIN`. Even though a `wait_queue_entry_token` is present in the structure, no "wait queue" is established and no acknowledgment is needed. - **AUTOFS_IOC_EXPIRE_MULTI**: This is similar to diff --git a/block/blk-mq.c b/block/blk-mq.c index bb66c96850b1..a083f95e04b1 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -926,7 +926,7 @@ static bool reorder_tags_to_front(struct list_head *list) return first != NULL; } -static int blk_mq_dispatch_wake(wait_queue_t *wait, unsigned mode, int flags, +static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode, int flags, void *key) { struct blk_mq_hw_ctx *hctx; diff --git a/block/blk-wbt.c b/block/blk-wbt.c index 17676f4d7fd1..5f3a37c2784c 100644 --- a/block/blk-wbt.c +++ b/block/blk-wbt.c @@ -503,7 +503,7 @@ static inline unsigned int get_limit(struct rq_wb *rwb, unsigned long rw) } static inline bool may_queue(struct rq_wb *rwb, struct rq_wait *rqw, - wait_queue_t *wait, unsigned long rw) + wait_queue_entry_t *wait, unsigned long rw) { /* * inc it here even if disabled, since we'll dec it at completion. diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c index b9faabc75fdb..b95d6bd714c0 100644 --- a/block/kyber-iosched.c +++ b/block/kyber-iosched.c @@ -99,7 +99,7 @@ struct kyber_hctx_data { struct list_head rqs[KYBER_NUM_DOMAINS]; unsigned int cur_domain; unsigned int batching; - wait_queue_t domain_wait[KYBER_NUM_DOMAINS]; + wait_queue_entry_t domain_wait[KYBER_NUM_DOMAINS]; atomic_t wait_index[KYBER_NUM_DOMAINS]; }; @@ -507,7 +507,7 @@ static void kyber_flush_busy_ctxs(struct kyber_hctx_data *khd, } } -static int kyber_domain_wake(wait_queue_t *wait, unsigned mode, int flags, +static int kyber_domain_wake(wait_queue_entry_t *wait, unsigned mode, int flags, void *key) { struct blk_mq_hw_ctx *hctx = READ_ONCE(wait->private); @@ -523,7 +523,7 @@ static int kyber_get_domain_token(struct kyber_queue_data *kqd, { unsigned int sched_domain = khd->cur_domain; struct sbitmap_queue *domain_tokens = &kqd->domain_tokens[sched_domain]; - wait_queue_t *wait = &khd->domain_wait[sched_domain]; + wait_queue_entry_t *wait = &khd->domain_wait[sched_domain]; struct sbq_wait_state *ws; int nr; @@ -734,7 +734,7 @@ static int kyber_##name##_waiting_show(void *data, struct seq_file *m) \ { \ struct blk_mq_hw_ctx *hctx = data; \ struct kyber_hctx_data *khd = hctx->sched_data; \ - wait_queue_t *wait = &khd->domain_wait[domain]; \ + wait_queue_entry_t *wait = &khd->domain_wait[domain]; \ \ seq_printf(m, "%d\n", !list_empty_careful(&wait->task_list)); \ return 0; \ diff --git a/drivers/bluetooth/btmrvl_main.c b/drivers/bluetooth/btmrvl_main.c index c38cb5b91291..fe850f0567cb 100644 --- a/drivers/bluetooth/btmrvl_main.c +++ b/drivers/bluetooth/btmrvl_main.c @@ -602,7 +602,7 @@ static int btmrvl_service_main_thread(void *data) struct btmrvl_thread *thread = data; struct btmrvl_private *priv = thread->priv; struct btmrvl_adapter *adapter = priv->adapter; - wait_queue_t wait; + wait_queue_entry_t wait; struct sk_buff *skb; ulong flags; diff --git a/drivers/char/ipmi/ipmi_watchdog.c b/drivers/char/ipmi/ipmi_watchdog.c index d165af8abe36..a5c6cfe71a8e 100644 --- a/drivers/char/ipmi/ipmi_watchdog.c +++ b/drivers/char/ipmi/ipmi_watchdog.c @@ -821,7 +821,7 @@ static ssize_t ipmi_read(struct file *file, loff_t *ppos) { int rv = 0; - wait_queue_t wait; + wait_queue_entry_t wait; if (count <= 0) return 0; diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index 129c58bb4805..a4a920c4c454 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -123,7 +123,7 @@ struct drm_i915_gem_request { * It is used by the driver to then queue the request for execution. */ struct i915_sw_fence submit; - wait_queue_t submitq; + wait_queue_entry_t submitq; wait_queue_head_t execute; /* A list of everyone we wait upon, and everyone who waits upon us. diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c index a277f8eb7beb..8669bfa33064 100644 --- a/drivers/gpu/drm/i915/i915_sw_fence.c +++ b/drivers/gpu/drm/i915/i915_sw_fence.c @@ -152,7 +152,7 @@ static void __i915_sw_fence_wake_up_all(struct i915_sw_fence *fence, struct list_head *continuation) { wait_queue_head_t *x = &fence->wait; - wait_queue_t *pos, *next; + wait_queue_entry_t *pos, *next; unsigned long flags; debug_fence_deactivate(fence); @@ -254,7 +254,7 @@ void i915_sw_fence_commit(struct i915_sw_fence *fence) __i915_sw_fence_commit(fence); } -static int i915_sw_fence_wake(wait_queue_t *wq, unsigned mode, int flags, void *key) +static int i915_sw_fence_wake(wait_queue_entry_t *wq, unsigned mode, int flags, void *key) { list_del(&wq->task_list); __i915_sw_fence_complete(wq->private, key); @@ -267,7 +267,7 @@ static int i915_sw_fence_wake(wait_queue_t *wq, unsigned mode, int flags, void * static bool __i915_sw_fence_check_if_after(struct i915_sw_fence *fence, const struct i915_sw_fence * const signaler) { - wait_queue_t *wq; + wait_queue_entry_t *wq; if (__test_and_set_bit(I915_SW_FENCE_CHECKED_BIT, &fence->flags)) return false; @@ -288,7 +288,7 @@ static bool __i915_sw_fence_check_if_after(struct i915_sw_fence *fence, static void __i915_sw_fence_clear_checked_bit(struct i915_sw_fence *fence) { - wait_queue_t *wq; + wait_queue_entry_t *wq; if (!__test_and_clear_bit(I915_SW_FENCE_CHECKED_BIT, &fence->flags)) return; @@ -320,7 +320,7 @@ static bool i915_sw_fence_check_if_after(struct i915_sw_fence *fence, static int __i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence, struct i915_sw_fence *signaler, - wait_queue_t *wq, gfp_t gfp) + wait_queue_entry_t *wq, gfp_t gfp) { unsigned long flags; int pending; @@ -359,7 +359,7 @@ static int __i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence, spin_lock_irqsave(&signaler->wait.lock, flags); if (likely(!i915_sw_fence_done(signaler))) { - __add_wait_queue_tail(&signaler->wait, wq); + __add_wait_queue_entry_tail(&signaler->wait, wq); pending = 1; } else { i915_sw_fence_wake(wq, 0, 0, NULL); @@ -372,7 +372,7 @@ static int __i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence, int i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence, struct i915_sw_fence *signaler, - wait_queue_t *wq) + wait_queue_entry_t *wq) { return __i915_sw_fence_await_sw_fence(fence, signaler, wq, 0); } diff --git a/drivers/gpu/drm/i915/i915_sw_fence.h b/drivers/gpu/drm/i915/i915_sw_fence.h index d31cefbbcc04..fd3c3bf6c8b7 100644 --- a/drivers/gpu/drm/i915/i915_sw_fence.h +++ b/drivers/gpu/drm/i915/i915_sw_fence.h @@ -66,7 +66,7 @@ void i915_sw_fence_commit(struct i915_sw_fence *fence); int i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence, struct i915_sw_fence *after, - wait_queue_t *wq); + wait_queue_entry_t *wq); int i915_sw_fence_await_sw_fence_gfp(struct i915_sw_fence *fence, struct i915_sw_fence *after, gfp_t gfp); diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index c1c8e2208a21..e562a78510ff 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -375,7 +375,7 @@ struct radeon_fence { unsigned ring; bool is_vm_update; - wait_queue_t fence_wake; + wait_queue_entry_t fence_wake; }; int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring); diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c index ef09f0a63754..e86f2bd38410 100644 --- a/drivers/gpu/drm/radeon/radeon_fence.c +++ b/drivers/gpu/drm/radeon/radeon_fence.c @@ -158,7 +158,7 @@ int radeon_fence_emit(struct radeon_device *rdev, * for the fence locking itself, so unlocked variants are used for * fence_signal, and remove_wait_queue. */ -static int radeon_fence_check_signaled(wait_queue_t *wait, unsigned mode, int flags, void *key) +static int radeon_fence_check_signaled(wait_queue_entry_t *wait, unsigned mode, int flags, void *key) { struct radeon_fence *fence; u64 seq; diff --git a/drivers/gpu/vga/vgaarb.c b/drivers/gpu/vga/vgaarb.c index 92f1452dad57..76875f6299b8 100644 --- a/drivers/gpu/vga/vgaarb.c +++ b/drivers/gpu/vga/vgaarb.c @@ -417,7 +417,7 @@ int vga_get(struct pci_dev *pdev, unsigned int rsrc, int interruptible) { struct vga_device *vgadev, *conflict; unsigned long flags; - wait_queue_t wait; + wait_queue_entry_t wait; int rc = 0; vga_check_first_use(); diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c index a3f18a22f5ed..e0f47cc2effc 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_main.c +++ b/drivers/infiniband/hw/i40iw/i40iw_main.c @@ -1939,7 +1939,7 @@ static int i40iw_virtchnl_receive(struct i40e_info *ldev, bool i40iw_vf_clear_to_send(struct i40iw_sc_dev *dev) { struct i40iw_device *iwdev; - wait_queue_t wait; + wait_queue_entry_t wait; iwdev = dev->back_dev; diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h index 9b80417cd547..73da1f5626cb 100644 --- a/drivers/md/bcache/btree.h +++ b/drivers/md/bcache/btree.h @@ -207,7 +207,7 @@ void bkey_put(struct cache_set *c, struct bkey *k); struct btree_op { /* for waiting on btree reserve in btree_split() */ - wait_queue_t wait; + wait_queue_entry_t wait; /* Btree level at which we start taking write locks */ short lock; diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_main.h b/drivers/net/ethernet/cavium/liquidio/octeon_main.h index bed9ef17bc26..7ccffbb0019e 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_main.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_main.h @@ -144,7 +144,7 @@ static inline int sleep_cond(wait_queue_head_t *wait_queue, int *condition) { int errno = 0; - wait_queue_t we; + wait_queue_entry_t we; init_waitqueue_entry(&we, current); add_wait_queue(wait_queue, &we); @@ -171,7 +171,7 @@ sleep_timeout_cond(wait_queue_head_t *wait_queue, int *condition, int timeout) { - wait_queue_t we; + wait_queue_entry_t we; init_waitqueue_entry(&we, current); add_wait_queue(wait_queue, &we); diff --git a/drivers/net/wireless/cisco/airo.c b/drivers/net/wireless/cisco/airo.c index 1b7e125a28e2..6a13303af2b7 100644 --- a/drivers/net/wireless/cisco/airo.c +++ b/drivers/net/wireless/cisco/airo.c @@ -3066,7 +3066,7 @@ static int airo_thread(void *data) { if (ai->jobs) { locked = down_interruptible(&ai->sem); } else { - wait_queue_t wait; + wait_queue_entry_t wait; init_waitqueue_entry(&wait, current); add_wait_queue(&ai->thr_wait, &wait); diff --git a/drivers/net/wireless/intersil/hostap/hostap_ioctl.c b/drivers/net/wireless/intersil/hostap/hostap_ioctl.c index b2c6b065b542..ff153ce29539 100644 --- a/drivers/net/wireless/intersil/hostap/hostap_ioctl.c +++ b/drivers/net/wireless/intersil/hostap/hostap_ioctl.c @@ -2544,7 +2544,7 @@ static int prism2_ioctl_priv_prism2_param(struct net_device *dev, ret = -EINVAL; } if (local->iw_mode == IW_MODE_MASTER) { - wait_queue_t __wait; + wait_queue_entry_t __wait; init_waitqueue_entry(&__wait, current); add_wait_queue(&local->hostscan_wq, &__wait); set_current_state(TASK_INTERRUPTIBLE); diff --git a/drivers/net/wireless/marvell/libertas/main.c b/drivers/net/wireless/marvell/libertas/main.c index e3500203715c..dde065d0d5c1 100644 --- a/drivers/net/wireless/marvell/libertas/main.c +++ b/drivers/net/wireless/marvell/libertas/main.c @@ -453,7 +453,7 @@ static int lbs_thread(void *data) { struct net_device *dev = data; struct lbs_private *priv = dev->ml_priv; - wait_queue_t wait; + wait_queue_entry_t wait; lbs_deb_enter(LBS_DEB_THREAD); diff --git a/drivers/scsi/dpt/dpti_i2o.h b/drivers/scsi/dpt/dpti_i2o.h index bd9e31e16249..16fc380b5512 100644 --- a/drivers/scsi/dpt/dpti_i2o.h +++ b/drivers/scsi/dpt/dpti_i2o.h @@ -48,7 +48,7 @@ #include typedef wait_queue_head_t adpt_wait_queue_head_t; #define ADPT_DECLARE_WAIT_QUEUE_HEAD(wait) DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wait) -typedef wait_queue_t adpt_wait_queue_t; +typedef wait_queue_entry_t adpt_wait_queue_entry_t; /* * message structures diff --git a/drivers/scsi/ips.c b/drivers/scsi/ips.c index 3419e1bcdff6..67621308eb9c 100644 --- a/drivers/scsi/ips.c +++ b/drivers/scsi/ips.c @@ -301,13 +301,13 @@ static uint32_t ips_statupd_copperhead_memio(ips_ha_t *); static uint32_t ips_statupd_morpheus(ips_ha_t *); static ips_scb_t *ips_getscb(ips_ha_t *); static void ips_putq_scb_head(ips_scb_queue_t *, ips_scb_t *); -static void ips_putq_wait_tail(ips_wait_queue_t *, struct scsi_cmnd *); +static void ips_putq_wait_tail(ips_wait_queue_entry_t *, struct scsi_cmnd *); static void ips_putq_copp_tail(ips_copp_queue_t *, ips_copp_wait_item_t *); static ips_scb_t *ips_removeq_scb_head(ips_scb_queue_t *); static ips_scb_t *ips_removeq_scb(ips_scb_queue_t *, ips_scb_t *); -static struct scsi_cmnd *ips_removeq_wait_head(ips_wait_queue_t *); -static struct scsi_cmnd *ips_removeq_wait(ips_wait_queue_t *, +static struct scsi_cmnd *ips_removeq_wait_head(ips_wait_queue_entry_t *); +static struct scsi_cmnd *ips_removeq_wait(ips_wait_queue_entry_t *, struct scsi_cmnd *); static ips_copp_wait_item_t *ips_removeq_copp(ips_copp_queue_t *, ips_copp_wait_item_t *); @@ -2871,7 +2871,7 @@ ips_removeq_scb(ips_scb_queue_t * queue, ips_scb_t * item) /* ASSUMED to be called from within the HA lock */ /* */ /****************************************************************************/ -static void ips_putq_wait_tail(ips_wait_queue_t *queue, struct scsi_cmnd *item) +static void ips_putq_wait_tail(ips_wait_queue_entry_t *queue, struct scsi_cmnd *item) { METHOD_TRACE("ips_putq_wait_tail", 1); @@ -2902,7 +2902,7 @@ static void ips_putq_wait_tail(ips_wait_queue_t *queue, struct scsi_cmnd *item) /* ASSUMED to be called from within the HA lock */ /* */ /****************************************************************************/ -static struct scsi_cmnd *ips_removeq_wait_head(ips_wait_queue_t *queue) +static struct scsi_cmnd *ips_removeq_wait_head(ips_wait_queue_entry_t *queue) { struct scsi_cmnd *item; @@ -2936,7 +2936,7 @@ static struct scsi_cmnd *ips_removeq_wait_head(ips_wait_queue_t *queue) /* ASSUMED to be called from within the HA lock */ /* */ /****************************************************************************/ -static struct scsi_cmnd *ips_removeq_wait(ips_wait_queue_t *queue, +static struct scsi_cmnd *ips_removeq_wait(ips_wait_queue_entry_t *queue, struct scsi_cmnd *item) { struct scsi_cmnd *p; diff --git a/drivers/scsi/ips.h b/drivers/scsi/ips.h index b782bb60baf0..366be3b2f9b4 100644 --- a/drivers/scsi/ips.h +++ b/drivers/scsi/ips.h @@ -989,7 +989,7 @@ typedef struct ips_wait_queue { struct scsi_cmnd *head; struct scsi_cmnd *tail; int count; -} ips_wait_queue_t; +} ips_wait_queue_entry_t; typedef struct ips_copp_wait_item { struct scsi_cmnd *scsi_cmd; @@ -1035,7 +1035,7 @@ typedef struct ips_ha { ips_stat_t sp; /* Status packer pointer */ struct ips_scb *scbs; /* Array of all CCBS */ struct ips_scb *scb_freelist; /* SCB free list */ - ips_wait_queue_t scb_waitlist; /* Pending SCB list */ + ips_wait_queue_entry_t scb_waitlist; /* Pending SCB list */ ips_copp_queue_t copp_waitlist; /* Pending PT list */ ips_scb_queue_t scb_activelist; /* Active SCB list */ IPS_IO_CMD *dummy; /* dummy command */ diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c index 0db662d6abdd..85b242ec5f9b 100644 --- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c +++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c @@ -3267,7 +3267,7 @@ int kiblnd_connd(void *arg) { spinlock_t *lock = &kiblnd_data.kib_connd_lock; - wait_queue_t wait; + wait_queue_entry_t wait; unsigned long flags; struct kib_conn *conn; int timeout; @@ -3521,7 +3521,7 @@ kiblnd_scheduler(void *arg) long id = (long)arg; struct kib_sched_info *sched; struct kib_conn *conn; - wait_queue_t wait; + wait_queue_entry_t wait; unsigned long flags; struct ib_wc wc; int did_something; @@ -3656,7 +3656,7 @@ kiblnd_failover_thread(void *arg) { rwlock_t *glock = &kiblnd_data.kib_global_lock; struct kib_dev *dev; - wait_queue_t wait; + wait_queue_entry_t wait; unsigned long flags; int rc; diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c index 3ed3b08c122c..6b38d5a8fe92 100644 --- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c +++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c @@ -2166,7 +2166,7 @@ ksocknal_connd(void *arg) { spinlock_t *connd_lock = &ksocknal_data.ksnd_connd_lock; struct ksock_connreq *cr; - wait_queue_t wait; + wait_queue_entry_t wait; int nloops = 0; int cons_retry = 0; @@ -2554,7 +2554,7 @@ ksocknal_check_peer_timeouts(int idx) int ksocknal_reaper(void *arg) { - wait_queue_t wait; + wait_queue_entry_t wait; struct ksock_conn *conn; struct ksock_sched *sched; struct list_head enomem_conns; diff --git a/drivers/staging/lustre/lnet/libcfs/debug.c b/drivers/staging/lustre/lnet/libcfs/debug.c index c56e9922cd5b..49deb448b044 100644 --- a/drivers/staging/lustre/lnet/libcfs/debug.c +++ b/drivers/staging/lustre/lnet/libcfs/debug.c @@ -361,7 +361,7 @@ static int libcfs_debug_dumplog_thread(void *arg) void libcfs_debug_dumplog(void) { - wait_queue_t wait; + wait_queue_entry_t wait; struct task_struct *dumper; /* we're being careful to ensure that the kernel thread is diff --git a/drivers/staging/lustre/lnet/libcfs/tracefile.c b/drivers/staging/lustre/lnet/libcfs/tracefile.c index 9599b7441feb..27082d2f7938 100644 --- a/drivers/staging/lustre/lnet/libcfs/tracefile.c +++ b/drivers/staging/lustre/lnet/libcfs/tracefile.c @@ -990,7 +990,7 @@ static int tracefiled(void *arg) complete(&tctl->tctl_start); while (1) { - wait_queue_t __wait; + wait_queue_entry_t __wait; pc.pc_want_daemon_pages = 0; collect_pages(&pc); diff --git a/drivers/staging/lustre/lnet/lnet/lib-eq.c b/drivers/staging/lustre/lnet/lnet/lib-eq.c index ce4b83584e17..9ebba4ef5f90 100644 --- a/drivers/staging/lustre/lnet/lnet/lib-eq.c +++ b/drivers/staging/lustre/lnet/lnet/lib-eq.c @@ -312,7 +312,7 @@ __must_hold(&the_lnet.ln_eq_wait_lock) { int tms = *timeout_ms; int wait; - wait_queue_t wl; + wait_queue_entry_t wl; unsigned long now; if (!tms) diff --git a/drivers/staging/lustre/lnet/lnet/lib-socket.c b/drivers/staging/lustre/lnet/lnet/lib-socket.c index 9fca8d225ee0..f075706bba6d 100644 --- a/drivers/staging/lustre/lnet/lnet/lib-socket.c +++ b/drivers/staging/lustre/lnet/lnet/lib-socket.c @@ -516,7 +516,7 @@ lnet_sock_listen(struct socket **sockp, __u32 local_ip, int local_port, int lnet_sock_accept(struct socket **newsockp, struct socket *sock) { - wait_queue_t wait; + wait_queue_entry_t wait; struct socket *newsock; int rc; diff --git a/drivers/staging/lustre/lustre/fid/fid_request.c b/drivers/staging/lustre/lustre/fid/fid_request.c index 999f250ceed0..bf31bc200d27 100644 --- a/drivers/staging/lustre/lustre/fid/fid_request.c +++ b/drivers/staging/lustre/lustre/fid/fid_request.c @@ -192,7 +192,7 @@ static int seq_client_alloc_seq(const struct lu_env *env, } static int seq_fid_alloc_prep(struct lu_client_seq *seq, - wait_queue_t *link) + wait_queue_entry_t *link) { if (seq->lcs_update) { add_wait_queue(&seq->lcs_waitq, link); @@ -223,7 +223,7 @@ static void seq_fid_alloc_fini(struct lu_client_seq *seq) int seq_client_alloc_fid(const struct lu_env *env, struct lu_client_seq *seq, struct lu_fid *fid) { - wait_queue_t link; + wait_queue_entry_t link; int rc; LASSERT(seq); @@ -290,7 +290,7 @@ EXPORT_SYMBOL(seq_client_alloc_fid); */ void seq_client_flush(struct lu_client_seq *seq) { - wait_queue_t link; + wait_queue_entry_t link; LASSERT(seq); init_waitqueue_entry(&link, current); diff --git a/drivers/staging/lustre/lustre/include/lustre_lib.h b/drivers/staging/lustre/lustre/include/lustre_lib.h index b04d613846ee..f24970da8323 100644 --- a/drivers/staging/lustre/lustre/include/lustre_lib.h +++ b/drivers/staging/lustre/lustre/include/lustre_lib.h @@ -201,7 +201,7 @@ struct l_wait_info { sigmask(SIGALRM)) /** - * wait_queue_t of Linux (version < 2.6.34) is a FIFO list for exclusively + * wait_queue_entry_t of Linux (version < 2.6.34) is a FIFO list for exclusively * waiting threads, which is not always desirable because all threads will * be waken up again and again, even user only needs a few of them to be * active most time. This is not good for performance because cache can @@ -228,7 +228,7 @@ struct l_wait_info { */ #define __l_wait_event(wq, condition, info, ret, l_add_wait) \ do { \ - wait_queue_t __wait; \ + wait_queue_entry_t __wait; \ long __timeout = info->lwi_timeout; \ sigset_t __blocked; \ int __allow_intr = info->lwi_allow_intr; \ diff --git a/drivers/staging/lustre/lustre/llite/lcommon_cl.c b/drivers/staging/lustre/lustre/llite/lcommon_cl.c index 8af611033e12..96515b839436 100644 --- a/drivers/staging/lustre/lustre/llite/lcommon_cl.c +++ b/drivers/staging/lustre/lustre/llite/lcommon_cl.c @@ -207,7 +207,7 @@ int cl_file_inode_init(struct inode *inode, struct lustre_md *md) static void cl_object_put_last(struct lu_env *env, struct cl_object *obj) { struct lu_object_header *header = obj->co_lu.lo_header; - wait_queue_t waiter; + wait_queue_entry_t waiter; if (unlikely(atomic_read(&header->loh_ref) != 1)) { struct lu_site *site = obj->co_lu.lo_dev->ld_site; diff --git a/drivers/staging/lustre/lustre/lov/lov_cl_internal.h b/drivers/staging/lustre/lustre/lov/lov_cl_internal.h index 391c632365ae..e889d3a7de9c 100644 --- a/drivers/staging/lustre/lustre/lov/lov_cl_internal.h +++ b/drivers/staging/lustre/lustre/lov/lov_cl_internal.h @@ -370,7 +370,7 @@ struct lov_thread_info { struct ost_lvb lti_lvb; struct cl_2queue lti_cl2q; struct cl_page_list lti_plist; - wait_queue_t lti_waiter; + wait_queue_entry_t lti_waiter; struct cl_attr lti_attr; }; diff --git a/drivers/staging/lustre/lustre/lov/lov_object.c b/drivers/staging/lustre/lustre/lov/lov_object.c index ab3ecfeeadc8..eddabbe31e5c 100644 --- a/drivers/staging/lustre/lustre/lov/lov_object.c +++ b/drivers/staging/lustre/lustre/lov/lov_object.c @@ -371,7 +371,7 @@ static void lov_subobject_kill(const struct lu_env *env, struct lov_object *lov, struct lov_layout_raid0 *r0; struct lu_site *site; struct lu_site_bkt_data *bkt; - wait_queue_t *waiter; + wait_queue_entry_t *waiter; r0 = &lov->u.raid0; LASSERT(r0->lo_sub[idx] == los); diff --git a/drivers/staging/lustre/lustre/obdclass/lu_object.c b/drivers/staging/lustre/lustre/obdclass/lu_object.c index abcf951208d2..76ae600ae2c8 100644 --- a/drivers/staging/lustre/lustre/obdclass/lu_object.c +++ b/drivers/staging/lustre/lustre/obdclass/lu_object.c @@ -556,7 +556,7 @@ EXPORT_SYMBOL(lu_object_print); static struct lu_object *htable_lookup(struct lu_site *s, struct cfs_hash_bd *bd, const struct lu_fid *f, - wait_queue_t *waiter, + wait_queue_entry_t *waiter, __u64 *version) { struct lu_site_bkt_data *bkt; @@ -670,7 +670,7 @@ static struct lu_object *lu_object_find_try(const struct lu_env *env, struct lu_device *dev, const struct lu_fid *f, const struct lu_object_conf *conf, - wait_queue_t *waiter) + wait_queue_entry_t *waiter) { struct lu_object *o; struct lu_object *shadow; @@ -750,7 +750,7 @@ struct lu_object *lu_object_find_at(const struct lu_env *env, { struct lu_site_bkt_data *bkt; struct lu_object *obj; - wait_queue_t wait; + wait_queue_entry_t wait; while (1) { obj = lu_object_find_try(env, dev, f, conf, &wait); diff --git a/drivers/tty/synclink_gt.c b/drivers/tty/synclink_gt.c index 31885f20fc15..cc047de72e2a 100644 --- a/drivers/tty/synclink_gt.c +++ b/drivers/tty/synclink_gt.c @@ -184,7 +184,7 @@ static void hdlcdev_exit(struct slgt_info *info); struct cond_wait { struct cond_wait *next; wait_queue_head_t q; - wait_queue_t wait; + wait_queue_entry_t wait; unsigned int data; }; static void init_cond_wait(struct cond_wait *w, unsigned int data); diff --git a/drivers/vfio/virqfd.c b/drivers/vfio/virqfd.c index 27c89cd5d70b..4797217e5e72 100644 --- a/drivers/vfio/virqfd.c +++ b/drivers/vfio/virqfd.c @@ -43,7 +43,7 @@ static void virqfd_deactivate(struct virqfd *virqfd) queue_work(vfio_irqfd_cleanup_wq, &virqfd->shutdown); } -static int virqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key) +static int virqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key) { struct virqfd *virqfd = container_of(wait, struct virqfd, wait); unsigned long flags = (unsigned long)key; diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 042030e5a035..e4613a3c362d 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -165,7 +165,7 @@ static void vhost_poll_func(struct file *file, wait_queue_head_t *wqh, add_wait_queue(wqh, &poll->wait); } -static int vhost_poll_wakeup(wait_queue_t *wait, unsigned mode, int sync, +static int vhost_poll_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key) { struct vhost_poll *poll = container_of(wait, struct vhost_poll, wait); diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index f55671d53f28..f72095868b93 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -31,7 +31,7 @@ struct vhost_work { struct vhost_poll { poll_table table; wait_queue_head_t *wqh; - wait_queue_t wait; + wait_queue_entry_t wait; struct vhost_work work; unsigned long mask; struct vhost_dev *dev; diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index beef981aa54f..974f5346458a 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -83,7 +83,7 @@ struct autofs_info { struct autofs_wait_queue { wait_queue_head_t queue; struct autofs_wait_queue *next; - autofs_wqt_t wait_queue_token; + autofs_wqt_t wait_queue_entry_token; /* We use the following to see what we are waiting for */ struct qstr name; u32 dev; diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index 24a58bf9ca72..7071895b0678 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c @@ -104,7 +104,7 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi, size_t pktsz; pr_debug("wait id = 0x%08lx, name = %.*s, type=%d\n", - (unsigned long) wq->wait_queue_token, + (unsigned long) wq->wait_queue_entry_token, wq->name.len, wq->name.name, type); memset(&pkt, 0, sizeof(pkt)); /* For security reasons */ @@ -120,7 +120,7 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi, pktsz = sizeof(*mp); - mp->wait_queue_token = wq->wait_queue_token; + mp->wait_queue_entry_token = wq->wait_queue_entry_token; mp->len = wq->name.len; memcpy(mp->name, wq->name.name, wq->name.len); mp->name[wq->name.len] = '\0'; @@ -133,7 +133,7 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi, pktsz = sizeof(*ep); - ep->wait_queue_token = wq->wait_queue_token; + ep->wait_queue_entry_token = wq->wait_queue_entry_token; ep->len = wq->name.len; memcpy(ep->name, wq->name.name, wq->name.len); ep->name[wq->name.len] = '\0'; @@ -153,7 +153,7 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi, pktsz = sizeof(*packet); - packet->wait_queue_token = wq->wait_queue_token; + packet->wait_queue_entry_token = wq->wait_queue_entry_token; packet->len = wq->name.len; memcpy(packet->name, wq->name.name, wq->name.len); packet->name[wq->name.len] = '\0'; @@ -428,7 +428,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, return -ENOMEM; } - wq->wait_queue_token = autofs4_next_wait_queue; + wq->wait_queue_entry_token = autofs4_next_wait_queue; if (++autofs4_next_wait_queue == 0) autofs4_next_wait_queue = 1; wq->next = sbi->queues; @@ -461,7 +461,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, } pr_debug("new wait id = 0x%08lx, name = %.*s, nfy=%d\n", - (unsigned long) wq->wait_queue_token, wq->name.len, + (unsigned long) wq->wait_queue_entry_token, wq->name.len, wq->name.name, notify); /* @@ -471,7 +471,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, } else { wq->wait_ctr++; pr_debug("existing wait id = 0x%08lx, name = %.*s, nfy=%d\n", - (unsigned long) wq->wait_queue_token, wq->name.len, + (unsigned long) wq->wait_queue_entry_token, wq->name.len, wq->name.name, notify); mutex_unlock(&sbi->wq_mutex); kfree(qstr.name); @@ -550,13 +550,13 @@ int autofs4_wait(struct autofs_sb_info *sbi, } -int autofs4_wait_release(struct autofs_sb_info *sbi, autofs_wqt_t wait_queue_token, int status) +int autofs4_wait_release(struct autofs_sb_info *sbi, autofs_wqt_t wait_queue_entry_token, int status) { struct autofs_wait_queue *wq, **wql; mutex_lock(&sbi->wq_mutex); for (wql = &sbi->queues; (wq = *wql) != NULL; wql = &wq->next) { - if (wq->wait_queue_token == wait_queue_token) + if (wq->wait_queue_entry_token == wait_queue_entry_token) break; } diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h index 9bf90bcc56ac..54a4fcd679ed 100644 --- a/fs/cachefiles/internal.h +++ b/fs/cachefiles/internal.h @@ -97,7 +97,7 @@ struct cachefiles_cache { * backing file read tracking */ struct cachefiles_one_read { - wait_queue_t monitor; /* link into monitored waitqueue */ + wait_queue_entry_t monitor; /* link into monitored waitqueue */ struct page *back_page; /* backing file page we're waiting for */ struct page *netfs_page; /* netfs page we're going to fill */ struct fscache_retrieval *op; /* retrieval op covering this */ diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index 41df8a27d7eb..3978b324cbca 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -204,7 +204,7 @@ wait_for_old_object: wait_queue_head_t *wq; signed long timeout = 60 * HZ; - wait_queue_t wait; + wait_queue_entry_t wait; bool requeue; /* if the object we're waiting for is queued for processing, diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index afbdc418966d..8be33b33b981 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c @@ -21,7 +21,7 @@ * - we use this to detect read completion of backing pages * - the caller holds the waitqueue lock */ -static int cachefiles_read_waiter(wait_queue_t *wait, unsigned mode, +static int cachefiles_read_waiter(wait_queue_entry_t *wait, unsigned mode, int sync, void *_key) { struct cachefiles_one_read *monitor = diff --git a/fs/dax.c b/fs/dax.c index 2a6889b3585f..323ea481d4a8 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -84,7 +84,7 @@ struct exceptional_entry_key { }; struct wait_exceptional_entry_queue { - wait_queue_t wait; + wait_queue_entry_t wait; struct exceptional_entry_key key; }; @@ -108,7 +108,7 @@ static wait_queue_head_t *dax_entry_waitqueue(struct address_space *mapping, return wait_table + hash; } -static int wake_exceptional_entry_func(wait_queue_t *wait, unsigned int mode, +static int wake_exceptional_entry_func(wait_queue_entry_t *wait, unsigned int mode, int sync, void *keyp) { struct exceptional_entry_key *key = keyp; diff --git a/fs/eventfd.c b/fs/eventfd.c index 68b9fffcb2c8..9736df2ce89d 100644 --- a/fs/eventfd.c +++ b/fs/eventfd.c @@ -191,7 +191,7 @@ static void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt) * This is used to atomically remove a wait queue entry from the eventfd wait * queue head, and read/reset the counter value. */ -int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_t *wait, +int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *wait, __u64 *cnt) { unsigned long flags; diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 5420767c9b68..5ac1cba5ef72 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -244,7 +244,7 @@ struct eppoll_entry { * Wait queue item that will be linked to the target file wait * queue head. */ - wait_queue_t wait; + wait_queue_entry_t wait; /* The wait queue head that linked the "wait" wait queue item */ wait_queue_head_t *whead; @@ -347,13 +347,13 @@ static inline int ep_is_linked(struct list_head *p) return !list_empty(p); } -static inline struct eppoll_entry *ep_pwq_from_wait(wait_queue_t *p) +static inline struct eppoll_entry *ep_pwq_from_wait(wait_queue_entry_t *p) { return container_of(p, struct eppoll_entry, wait); } /* Get the "struct epitem" from a wait queue pointer */ -static inline struct epitem *ep_item_from_wait(wait_queue_t *p) +static inline struct epitem *ep_item_from_wait(wait_queue_entry_t *p) { return container_of(p, struct eppoll_entry, wait)->base; } @@ -1078,7 +1078,7 @@ static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd) * mechanism. It is called by the stored file descriptors when they * have events to report. */ -static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *key) +static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, void *key) { int pwake = 0; unsigned long flags; @@ -1699,7 +1699,7 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, int res = 0, eavail, timed_out = 0; unsigned long flags; u64 slack = 0; - wait_queue_t wait; + wait_queue_entry_t wait; ktime_t expires, *to = NULL; if (timeout > 0) { diff --git a/fs/fs_pin.c b/fs/fs_pin.c index 611b5408f6ec..7b447a245760 100644 --- a/fs/fs_pin.c +++ b/fs/fs_pin.c @@ -34,7 +34,7 @@ void pin_insert(struct fs_pin *pin, struct vfsmount *m) void pin_kill(struct fs_pin *p) { - wait_queue_t wait; + wait_queue_entry_t wait; if (!p) { rcu_read_unlock(); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index c08c46a3b8cd..be5a8f84e5bb 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6372,7 +6372,7 @@ struct nfs4_lock_waiter { }; static int -nfs4_wake_lock_waiter(wait_queue_t *wait, unsigned int mode, int flags, void *key) +nfs4_wake_lock_waiter(wait_queue_entry_t *wait, unsigned int mode, int flags, void *key) { int ret; struct cb_notify_lock_args *cbnl = key; @@ -6415,7 +6415,7 @@ nfs4_retry_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) .inode = state->inode, .owner = &owner, .notified = false }; - wait_queue_t wait; + wait_queue_entry_t wait; /* Don't bother with waitqueue if we don't expect a callback */ if (!test_bit(NFS_STATE_MAY_NOTIFY_LOCK, &state->flags)) diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index febed1217b3f..775304e7f96f 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -2161,7 +2161,7 @@ void nilfs_flush_segment(struct super_block *sb, ino_t ino) } struct nilfs_segctor_wait_request { - wait_queue_t wq; + wait_queue_entry_t wq; __u32 seq; int err; atomic_t done; diff --git a/fs/orangefs/orangefs-bufmap.c b/fs/orangefs/orangefs-bufmap.c index 83b506020718..9e37b7028ea4 100644 --- a/fs/orangefs/orangefs-bufmap.c +++ b/fs/orangefs/orangefs-bufmap.c @@ -47,7 +47,7 @@ static void run_down(struct slot_map *m) if (m->c != -1) { for (;;) { if (likely(list_empty(&wait.task_list))) - __add_wait_queue_tail(&m->q, &wait); + __add_wait_queue_entry_tail(&m->q, &wait); set_current_state(TASK_UNINTERRUPTIBLE); if (m->c == -1) @@ -85,7 +85,7 @@ static int wait_for_free(struct slot_map *m) do { long n = left, t; if (likely(list_empty(&wait.task_list))) - __add_wait_queue_tail_exclusive(&m->q, &wait); + __add_wait_queue_entry_tail_exclusive(&m->q, &wait); set_current_state(TASK_INTERRUPTIBLE); if (m->c > 0) diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index 39bb1e838d8d..a11d773e5ff3 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -2956,7 +2956,7 @@ void reiserfs_wait_on_write_block(struct super_block *s) static void queue_log_writer(struct super_block *s) { - wait_queue_t wait; + wait_queue_entry_t wait; struct reiserfs_journal *journal = SB_JOURNAL(s); set_bit(J_WRITERS_QUEUED, &journal->j_state); diff --git a/fs/select.c b/fs/select.c index d6c652a31e99..5b524a977d91 100644 --- a/fs/select.c +++ b/fs/select.c @@ -180,7 +180,7 @@ static struct poll_table_entry *poll_get_entry(struct poll_wqueues *p) return table->entry++; } -static int __pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key) +static int __pollwake(wait_queue_entry_t *wait, unsigned mode, int sync, void *key) { struct poll_wqueues *pwq = wait->private; DECLARE_WAITQUEUE(dummy_wait, pwq->polling_task); @@ -206,7 +206,7 @@ static int __pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key) return default_wake_function(&dummy_wait, mode, sync, key); } -static int pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key) +static int pollwake(wait_queue_entry_t *wait, unsigned mode, int sync, void *key) { struct poll_table_entry *entry; diff --git a/fs/signalfd.c b/fs/signalfd.c index 7e3d71109f51..593b022ac11b 100644 --- a/fs/signalfd.c +++ b/fs/signalfd.c @@ -43,7 +43,7 @@ void signalfd_cleanup(struct sighand_struct *sighand) if (likely(!waitqueue_active(wqh))) return; - /* wait_queue_t->func(POLLFREE) should do remove_wait_queue() */ + /* wait_queue_entry_t->func(POLLFREE) should do remove_wait_queue() */ wake_up_poll(wqh, POLLHUP | POLLFREE); } diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 1d622f276e3a..bda64fcd8a0c 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -81,7 +81,7 @@ struct userfaultfd_unmap_ctx { struct userfaultfd_wait_queue { struct uffd_msg msg; - wait_queue_t wq; + wait_queue_entry_t wq; struct userfaultfd_ctx *ctx; bool waken; }; @@ -91,7 +91,7 @@ struct userfaultfd_wake_range { unsigned long len; }; -static int userfaultfd_wake_function(wait_queue_t *wq, unsigned mode, +static int userfaultfd_wake_function(wait_queue_entry_t *wq, unsigned mode, int wake_flags, void *key) { struct userfaultfd_wake_range *range = key; @@ -860,7 +860,7 @@ wakeup: static inline struct userfaultfd_wait_queue *find_userfault_in( wait_queue_head_t *wqh) { - wait_queue_t *wq; + wait_queue_entry_t *wq; struct userfaultfd_wait_queue *uwq; VM_BUG_ON(!spin_is_locked(&wqh->lock)); @@ -1747,7 +1747,7 @@ static long userfaultfd_ioctl(struct file *file, unsigned cmd, static void userfaultfd_show_fdinfo(struct seq_file *m, struct file *f) { struct userfaultfd_ctx *ctx = f->private_data; - wait_queue_t *wq; + wait_queue_entry_t *wq; struct userfaultfd_wait_queue *uwq; unsigned long pending = 0, total = 0; diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index fcd641032f8d..95ba83806c5d 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -33,7 +33,7 @@ struct blk_mq_hw_ctx { struct blk_mq_ctx **ctxs; unsigned int nr_ctx; - wait_queue_t dispatch_wait; + wait_queue_entry_t dispatch_wait; atomic_t wait_index; struct blk_mq_tags *tags; diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h index ff0b981f078e..9e4befd95bc7 100644 --- a/include/linux/eventfd.h +++ b/include/linux/eventfd.h @@ -37,7 +37,7 @@ struct eventfd_ctx *eventfd_ctx_fdget(int fd); struct eventfd_ctx *eventfd_ctx_fileget(struct file *file); __u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n); ssize_t eventfd_ctx_read(struct eventfd_ctx *ctx, int no_wait, __u64 *cnt); -int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_t *wait, +int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *wait, __u64 *cnt); #else /* CONFIG_EVENTFD */ @@ -73,7 +73,7 @@ static inline ssize_t eventfd_ctx_read(struct eventfd_ctx *ctx, int no_wait, } static inline int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, - wait_queue_t *wait, __u64 *cnt) + wait_queue_entry_t *wait, __u64 *cnt) { return -ENOSYS; } diff --git a/include/linux/kvm_irqfd.h b/include/linux/kvm_irqfd.h index 0c1de05098c8..76c2fbc59f35 100644 --- a/include/linux/kvm_irqfd.h +++ b/include/linux/kvm_irqfd.h @@ -46,7 +46,7 @@ struct kvm_kernel_irqfd_resampler { struct kvm_kernel_irqfd { /* Used for MSI fast-path */ struct kvm *kvm; - wait_queue_t wait; + wait_queue_entry_t wait; /* Update side is protected by irqfds.lock */ struct kvm_kernel_irq_routing_entry irq_entry; seqcount_t irq_entry_sc; diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 316a19f6b635..e7bbd9d4dc6c 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -524,7 +524,7 @@ void page_endio(struct page *page, bool is_write, int err); /* * Add an arbitrary waiter to a page's wait queue */ -extern void add_page_wait_queue(struct page *page, wait_queue_t *waiter); +extern void add_page_wait_queue(struct page *page, wait_queue_entry_t *waiter); /* * Fault everything in given userspace address range in. diff --git a/include/linux/poll.h b/include/linux/poll.h index 75ffc5729e4c..2889f09a1c60 100644 --- a/include/linux/poll.h +++ b/include/linux/poll.h @@ -75,7 +75,7 @@ static inline void init_poll_funcptr(poll_table *pt, poll_queue_proc qproc) struct poll_table_entry { struct file *filp; unsigned long key; - wait_queue_t wait; + wait_queue_entry_t wait; wait_queue_head_t *wait_address; }; diff --git a/include/linux/vfio.h b/include/linux/vfio.h index edf9b2cad277..f57076b958b7 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -183,7 +183,7 @@ struct virqfd { void (*thread)(void *, void *); void *data; struct work_struct inject; - wait_queue_t wait; + wait_queue_entry_t wait; poll_table pt; struct work_struct shutdown; struct virqfd **pvirqfd; diff --git a/include/linux/wait.h b/include/linux/wait.h index db076ca7f11d..5889f0c86ff7 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -10,15 +10,18 @@ #include #include -typedef struct __wait_queue wait_queue_t; -typedef int (*wait_queue_func_t)(wait_queue_t *wait, unsigned mode, int flags, void *key); -int default_wake_function(wait_queue_t *wait, unsigned mode, int flags, void *key); +typedef struct wait_queue_entry wait_queue_entry_t; +typedef int (*wait_queue_func_t)(wait_queue_entry_t *wait, unsigned mode, int flags, void *key); +int default_wake_function(wait_queue_entry_t *wait, unsigned mode, int flags, void *key); -/* __wait_queue::flags */ +/* wait_queue_entry::flags */ #define WQ_FLAG_EXCLUSIVE 0x01 #define WQ_FLAG_WOKEN 0x02 -struct __wait_queue { +/* + * A single wait-queue entry structure: + */ +struct wait_queue_entry { unsigned int flags; void *private; wait_queue_func_t func; @@ -34,7 +37,7 @@ struct wait_bit_key { struct wait_bit_queue { struct wait_bit_key key; - wait_queue_t wait; + wait_queue_entry_t wait; }; struct __wait_queue_head { @@ -55,7 +58,7 @@ struct task_struct; .task_list = { NULL, NULL } } #define DECLARE_WAITQUEUE(name, tsk) \ - wait_queue_t name = __WAITQUEUE_INITIALIZER(name, tsk) + wait_queue_entry_t name = __WAITQUEUE_INITIALIZER(name, tsk) #define __WAIT_QUEUE_HEAD_INITIALIZER(name) { \ .lock = __SPIN_LOCK_UNLOCKED(name.lock), \ @@ -88,7 +91,7 @@ extern void __init_waitqueue_head(wait_queue_head_t *q, const char *name, struct # define DECLARE_WAIT_QUEUE_HEAD_ONSTACK(name) DECLARE_WAIT_QUEUE_HEAD(name) #endif -static inline void init_waitqueue_entry(wait_queue_t *q, struct task_struct *p) +static inline void init_waitqueue_entry(wait_queue_entry_t *q, struct task_struct *p) { q->flags = 0; q->private = p; @@ -96,7 +99,7 @@ static inline void init_waitqueue_entry(wait_queue_t *q, struct task_struct *p) } static inline void -init_waitqueue_func_entry(wait_queue_t *q, wait_queue_func_t func) +init_waitqueue_func_entry(wait_queue_entry_t *q, wait_queue_func_t func) { q->flags = 0; q->private = NULL; @@ -159,11 +162,11 @@ static inline bool wq_has_sleeper(wait_queue_head_t *wq) return waitqueue_active(wq); } -extern void add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait); -extern void add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t *wait); -extern void remove_wait_queue(wait_queue_head_t *q, wait_queue_t *wait); +extern void add_wait_queue(wait_queue_head_t *q, wait_queue_entry_t *wait); +extern void add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_entry_t *wait); +extern void remove_wait_queue(wait_queue_head_t *q, wait_queue_entry_t *wait); -static inline void __add_wait_queue(wait_queue_head_t *head, wait_queue_t *new) +static inline void __add_wait_queue(wait_queue_head_t *head, wait_queue_entry_t *new) { list_add(&new->task_list, &head->task_list); } @@ -172,27 +175,27 @@ static inline void __add_wait_queue(wait_queue_head_t *head, wait_queue_t *new) * Used for wake-one threads: */ static inline void -__add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t *wait) +__add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_entry_t *wait) { wait->flags |= WQ_FLAG_EXCLUSIVE; __add_wait_queue(q, wait); } -static inline void __add_wait_queue_tail(wait_queue_head_t *head, - wait_queue_t *new) +static inline void __add_wait_queue_entry_tail(wait_queue_head_t *head, + wait_queue_entry_t *new) { list_add_tail(&new->task_list, &head->task_list); } static inline void -__add_wait_queue_tail_exclusive(wait_queue_head_t *q, wait_queue_t *wait) +__add_wait_queue_entry_tail_exclusive(wait_queue_head_t *q, wait_queue_entry_t *wait) { wait->flags |= WQ_FLAG_EXCLUSIVE; - __add_wait_queue_tail(q, wait); + __add_wait_queue_entry_tail(q, wait); } static inline void -__remove_wait_queue(wait_queue_head_t *head, wait_queue_t *old) +__remove_wait_queue(wait_queue_head_t *head, wait_queue_entry_t *old) { list_del(&old->task_list); } @@ -249,7 +252,7 @@ wait_queue_head_t *bit_waitqueue(void *, int); (!__builtin_constant_p(state) || \ state == TASK_INTERRUPTIBLE || state == TASK_KILLABLE) \ -extern void init_wait_entry(wait_queue_t *__wait, int flags); +extern void init_wait_entry(wait_queue_entry_t *__wait, int flags); /* * The below macro ___wait_event() has an explicit shadow of the __ret @@ -266,7 +269,7 @@ extern void init_wait_entry(wait_queue_t *__wait, int flags); #define ___wait_event(wq, condition, state, exclusive, ret, cmd) \ ({ \ __label__ __out; \ - wait_queue_t __wait; \ + wait_queue_entry_t __wait; \ long __ret = ret; /* explicit shadow */ \ \ init_wait_entry(&__wait, exclusive ? WQ_FLAG_EXCLUSIVE : 0); \ @@ -620,8 +623,8 @@ do { \ __ret; \ }) -extern int do_wait_intr(wait_queue_head_t *, wait_queue_t *); -extern int do_wait_intr_irq(wait_queue_head_t *, wait_queue_t *); +extern int do_wait_intr(wait_queue_head_t *, wait_queue_entry_t *); +extern int do_wait_intr_irq(wait_queue_head_t *, wait_queue_entry_t *); #define __wait_event_interruptible_locked(wq, condition, exclusive, fn) \ ({ \ @@ -967,17 +970,17 @@ do { \ /* * Waitqueues which are removed from the waitqueue_head at wakeup time */ -void prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state); -void prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state); -long prepare_to_wait_event(wait_queue_head_t *q, wait_queue_t *wait, int state); -void finish_wait(wait_queue_head_t *q, wait_queue_t *wait); -long wait_woken(wait_queue_t *wait, unsigned mode, long timeout); -int woken_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key); -int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key); -int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *key); +void prepare_to_wait(wait_queue_head_t *q, wait_queue_entry_t *wait, int state); +void prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_entry_t *wait, int state); +long prepare_to_wait_event(wait_queue_head_t *q, wait_queue_entry_t *wait, int state); +void finish_wait(wait_queue_head_t *q, wait_queue_entry_t *wait); +long wait_woken(wait_queue_entry_t *wait, unsigned mode, long timeout); +int woken_wake_function(wait_queue_entry_t *wait, unsigned mode, int sync, void *key); +int autoremove_wake_function(wait_queue_entry_t *wait, unsigned mode, int sync, void *key); +int wake_bit_function(wait_queue_entry_t *wait, unsigned mode, int sync, void *key); #define DEFINE_WAIT_FUNC(name, function) \ - wait_queue_t name = { \ + wait_queue_entry_t name = { \ .private = current, \ .func = function, \ .task_list = LIST_HEAD_INIT((name).task_list), \ diff --git a/include/net/af_unix.h b/include/net/af_unix.h index fd60eccb59a6..75e612a45824 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -62,7 +62,7 @@ struct unix_sock { #define UNIX_GC_CANDIDATE 0 #define UNIX_GC_MAYBE_CYCLE 1 struct socket_wq peer_wq; - wait_queue_t peer_wake; + wait_queue_entry_t peer_wake; }; static inline struct unix_sock *unix_sk(const struct sock *sk) diff --git a/include/uapi/linux/auto_fs.h b/include/uapi/linux/auto_fs.h index aa63451ef20a..1953f8d6063b 100644 --- a/include/uapi/linux/auto_fs.h +++ b/include/uapi/linux/auto_fs.h @@ -26,7 +26,7 @@ #define AUTOFS_MIN_PROTO_VERSION AUTOFS_PROTO_VERSION /* - * The wait_queue_token (autofs_wqt_t) is part of a structure which is passed + * The wait_queue_entry_token (autofs_wqt_t) is part of a structure which is passed * back to the kernel via ioctl from userspace. On architectures where 32- and * 64-bit userspace binaries can be executed it's important that the size of * autofs_wqt_t stays constant between 32- and 64-bit Linux kernels so that we @@ -49,7 +49,7 @@ struct autofs_packet_hdr { struct autofs_packet_missing { struct autofs_packet_hdr hdr; - autofs_wqt_t wait_queue_token; + autofs_wqt_t wait_queue_entry_token; int len; char name[NAME_MAX+1]; }; diff --git a/include/uapi/linux/auto_fs4.h b/include/uapi/linux/auto_fs4.h index 7c6da423d54e..65b72d0222e7 100644 --- a/include/uapi/linux/auto_fs4.h +++ b/include/uapi/linux/auto_fs4.h @@ -108,7 +108,7 @@ enum autofs_notify { /* v4 multi expire (via pipe) */ struct autofs_packet_expire_multi { struct autofs_packet_hdr hdr; - autofs_wqt_t wait_queue_token; + autofs_wqt_t wait_queue_entry_token; int len; char name[NAME_MAX+1]; }; @@ -123,7 +123,7 @@ union autofs_packet_union { /* autofs v5 common packet struct */ struct autofs_v5_packet { struct autofs_packet_hdr hdr; - autofs_wqt_t wait_queue_token; + autofs_wqt_t wait_queue_entry_token; __u32 dev; __u64 ino; __u32 uid; diff --git a/kernel/exit.c b/kernel/exit.c index 516acdb0e0ec..7d694437ab44 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1004,7 +1004,7 @@ struct wait_opts { int __user *wo_stat; struct rusage __user *wo_rusage; - wait_queue_t child_wait; + wait_queue_entry_t child_wait; int notask_error; }; @@ -1541,7 +1541,7 @@ static int ptrace_do_wait(struct wait_opts *wo, struct task_struct *tsk) return 0; } -static int child_wait_callback(wait_queue_t *wait, unsigned mode, +static int child_wait_callback(wait_queue_entry_t *wait, unsigned mode, int sync, void *key) { struct wait_opts *wo = container_of(wait, struct wait_opts, diff --git a/kernel/futex.c b/kernel/futex.c index 357348a6cf6b..d6cf71d08f21 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -225,7 +225,7 @@ struct futex_pi_state { * @requeue_pi_key: the requeue_pi target futex key * @bitset: bitset for the optional bitmasked wakeup * - * We use this hashed waitqueue, instead of a normal wait_queue_t, so + * We use this hashed waitqueue, instead of a normal wait_queue_entry_t, so * we can wake only the relevant ones (hashed queues may be shared). * * A futex_q has a woken state, just like tasks have TASK_RUNNING. diff --git a/kernel/sched/completion.c b/kernel/sched/completion.c index 53f9558fa925..13fc5ae9bf2f 100644 --- a/kernel/sched/completion.c +++ b/kernel/sched/completion.c @@ -66,7 +66,7 @@ do_wait_for_common(struct completion *x, if (!x->done) { DECLARE_WAITQUEUE(wait, current); - __add_wait_queue_tail_exclusive(&x->wait, &wait); + __add_wait_queue_entry_tail_exclusive(&x->wait, &wait); do { if (signal_pending_state(state, current)) { timeout = -ERESTARTSYS; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 326d4f88e2b1..5b36644536ab 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3687,7 +3687,7 @@ asmlinkage __visible void __sched preempt_schedule_irq(void) exception_exit(prev_state); } -int default_wake_function(wait_queue_t *curr, unsigned mode, int wake_flags, +int default_wake_function(wait_queue_entry_t *curr, unsigned mode, int wake_flags, void *key) { return try_to_wake_up(curr->private, mode, wake_flags); diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c index b8c84c6dee64..301ea02dede0 100644 --- a/kernel/sched/wait.c +++ b/kernel/sched/wait.c @@ -21,7 +21,7 @@ void __init_waitqueue_head(wait_queue_head_t *q, const char *name, struct lock_c EXPORT_SYMBOL(__init_waitqueue_head); -void add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait) +void add_wait_queue(wait_queue_head_t *q, wait_queue_entry_t *wait) { unsigned long flags; @@ -32,18 +32,18 @@ void add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait) } EXPORT_SYMBOL(add_wait_queue); -void add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t *wait) +void add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_entry_t *wait) { unsigned long flags; wait->flags |= WQ_FLAG_EXCLUSIVE; spin_lock_irqsave(&q->lock, flags); - __add_wait_queue_tail(q, wait); + __add_wait_queue_entry_tail(q, wait); spin_unlock_irqrestore(&q->lock, flags); } EXPORT_SYMBOL(add_wait_queue_exclusive); -void remove_wait_queue(wait_queue_head_t *q, wait_queue_t *wait) +void remove_wait_queue(wait_queue_head_t *q, wait_queue_entry_t *wait) { unsigned long flags; @@ -66,7 +66,7 @@ EXPORT_SYMBOL(remove_wait_queue); static void __wake_up_common(wait_queue_head_t *q, unsigned int mode, int nr_exclusive, int wake_flags, void *key) { - wait_queue_t *curr, *next; + wait_queue_entry_t *curr, *next; list_for_each_entry_safe(curr, next, &q->task_list, task_list) { unsigned flags = curr->flags; @@ -170,7 +170,7 @@ EXPORT_SYMBOL_GPL(__wake_up_sync); /* For internal use only */ * loads to move into the critical region). */ void -prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state) +prepare_to_wait(wait_queue_head_t *q, wait_queue_entry_t *wait, int state) { unsigned long flags; @@ -184,20 +184,20 @@ prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state) EXPORT_SYMBOL(prepare_to_wait); void -prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state) +prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_entry_t *wait, int state) { unsigned long flags; wait->flags |= WQ_FLAG_EXCLUSIVE; spin_lock_irqsave(&q->lock, flags); if (list_empty(&wait->task_list)) - __add_wait_queue_tail(q, wait); + __add_wait_queue_entry_tail(q, wait); set_current_state(state); spin_unlock_irqrestore(&q->lock, flags); } EXPORT_SYMBOL(prepare_to_wait_exclusive); -void init_wait_entry(wait_queue_t *wait, int flags) +void init_wait_entry(wait_queue_entry_t *wait, int flags) { wait->flags = flags; wait->private = current; @@ -206,7 +206,7 @@ void init_wait_entry(wait_queue_t *wait, int flags) } EXPORT_SYMBOL(init_wait_entry); -long prepare_to_wait_event(wait_queue_head_t *q, wait_queue_t *wait, int state) +long prepare_to_wait_event(wait_queue_head_t *q, wait_queue_entry_t *wait, int state) { unsigned long flags; long ret = 0; @@ -230,7 +230,7 @@ long prepare_to_wait_event(wait_queue_head_t *q, wait_queue_t *wait, int state) } else { if (list_empty(&wait->task_list)) { if (wait->flags & WQ_FLAG_EXCLUSIVE) - __add_wait_queue_tail(q, wait); + __add_wait_queue_entry_tail(q, wait); else __add_wait_queue(q, wait); } @@ -249,10 +249,10 @@ EXPORT_SYMBOL(prepare_to_wait_event); * condition in the caller before they add the wait * entry to the wake queue. */ -int do_wait_intr(wait_queue_head_t *wq, wait_queue_t *wait) +int do_wait_intr(wait_queue_head_t *wq, wait_queue_entry_t *wait) { if (likely(list_empty(&wait->task_list))) - __add_wait_queue_tail(wq, wait); + __add_wait_queue_entry_tail(wq, wait); set_current_state(TASK_INTERRUPTIBLE); if (signal_pending(current)) @@ -265,10 +265,10 @@ int do_wait_intr(wait_queue_head_t *wq, wait_queue_t *wait) } EXPORT_SYMBOL(do_wait_intr); -int do_wait_intr_irq(wait_queue_head_t *wq, wait_queue_t *wait) +int do_wait_intr_irq(wait_queue_head_t *wq, wait_queue_entry_t *wait) { if (likely(list_empty(&wait->task_list))) - __add_wait_queue_tail(wq, wait); + __add_wait_queue_entry_tail(wq, wait); set_current_state(TASK_INTERRUPTIBLE); if (signal_pending(current)) @@ -290,7 +290,7 @@ EXPORT_SYMBOL(do_wait_intr_irq); * the wait descriptor from the given waitqueue if still * queued. */ -void finish_wait(wait_queue_head_t *q, wait_queue_t *wait) +void finish_wait(wait_queue_head_t *q, wait_queue_entry_t *wait) { unsigned long flags; @@ -316,7 +316,7 @@ void finish_wait(wait_queue_head_t *q, wait_queue_t *wait) } EXPORT_SYMBOL(finish_wait); -int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key) +int autoremove_wake_function(wait_queue_entry_t *wait, unsigned mode, int sync, void *key) { int ret = default_wake_function(wait, mode, sync, key); @@ -351,7 +351,7 @@ static inline bool is_kthread_should_stop(void) * remove_wait_queue(&wq, &wait); * */ -long wait_woken(wait_queue_t *wait, unsigned mode, long timeout) +long wait_woken(wait_queue_entry_t *wait, unsigned mode, long timeout) { set_current_state(mode); /* A */ /* @@ -375,7 +375,7 @@ long wait_woken(wait_queue_t *wait, unsigned mode, long timeout) } EXPORT_SYMBOL(wait_woken); -int woken_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key) +int woken_wake_function(wait_queue_entry_t *wait, unsigned mode, int sync, void *key) { /* * Although this function is called under waitqueue lock, LOCK @@ -391,7 +391,7 @@ int woken_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key) } EXPORT_SYMBOL(woken_wake_function); -int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *arg) +int wake_bit_function(wait_queue_entry_t *wait, unsigned mode, int sync, void *arg) { struct wait_bit_key *key = arg; struct wait_bit_queue *wait_bit @@ -534,7 +534,7 @@ static inline wait_queue_head_t *atomic_t_waitqueue(atomic_t *p) return bit_waitqueue(p, 0); } -static int wake_atomic_t_function(wait_queue_t *wait, unsigned mode, int sync, +static int wake_atomic_t_function(wait_queue_entry_t *wait, unsigned mode, int sync, void *arg) { struct wait_bit_key *key = arg; diff --git a/kernel/workqueue.c b/kernel/workqueue.c index c74bf39ef764..a86688fabc55 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -2864,11 +2864,11 @@ bool flush_work(struct work_struct *work) EXPORT_SYMBOL_GPL(flush_work); struct cwt_wait { - wait_queue_t wait; + wait_queue_entry_t wait; struct work_struct *work; }; -static int cwt_wakefn(wait_queue_t *wait, unsigned mode, int sync, void *key) +static int cwt_wakefn(wait_queue_entry_t *wait, unsigned mode, int sync, void *key) { struct cwt_wait *cwait = container_of(wait, struct cwt_wait, wait); diff --git a/mm/filemap.c b/mm/filemap.c index 6f1be573a5e6..80c19ee81e95 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -768,10 +768,10 @@ struct wait_page_key { struct wait_page_queue { struct page *page; int bit_nr; - wait_queue_t wait; + wait_queue_entry_t wait; }; -static int wake_page_function(wait_queue_t *wait, unsigned mode, int sync, void *arg) +static int wake_page_function(wait_queue_entry_t *wait, unsigned mode, int sync, void *arg) { struct wait_page_key *key = arg; struct wait_page_queue *wait_page @@ -834,7 +834,7 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q, struct page *page, int bit_nr, int state, bool lock) { struct wait_page_queue wait_page; - wait_queue_t *wait = &wait_page.wait; + wait_queue_entry_t *wait = &wait_page.wait; int ret = 0; init_wait(wait); @@ -847,7 +847,7 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q, if (likely(list_empty(&wait->task_list))) { if (lock) - __add_wait_queue_tail_exclusive(q, wait); + __add_wait_queue_entry_tail_exclusive(q, wait); else __add_wait_queue(q, wait); SetPageWaiters(page); @@ -907,7 +907,7 @@ int wait_on_page_bit_killable(struct page *page, int bit_nr) * * Add an arbitrary @waiter to the wait queue for the nominated @page. */ -void add_page_wait_queue(struct page *page, wait_queue_t *waiter) +void add_page_wait_queue(struct page *page, wait_queue_entry_t *waiter) { wait_queue_head_t *q = page_waitqueue(page); unsigned long flags; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 94172089f52f..9a90b096dc6b 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -170,7 +170,7 @@ struct mem_cgroup_event { */ poll_table pt; wait_queue_head_t *wqh; - wait_queue_t wait; + wait_queue_entry_t wait; struct work_struct remove; }; @@ -1479,10 +1479,10 @@ static DECLARE_WAIT_QUEUE_HEAD(memcg_oom_waitq); struct oom_wait_info { struct mem_cgroup *memcg; - wait_queue_t wait; + wait_queue_entry_t wait; }; -static int memcg_oom_wake_function(wait_queue_t *wait, +static int memcg_oom_wake_function(wait_queue_entry_t *wait, unsigned mode, int sync, void *arg) { struct mem_cgroup *wake_memcg = (struct mem_cgroup *)arg; @@ -3725,7 +3725,7 @@ static void memcg_event_remove(struct work_struct *work) * * Called with wqh->lock held and interrupts disabled. */ -static int memcg_event_wake(wait_queue_t *wait, unsigned mode, +static int memcg_event_wake(wait_queue_entry_t *wait, unsigned mode, int sync, void *key) { struct mem_cgroup_event *event = diff --git a/mm/mempool.c b/mm/mempool.c index 47a659dedd44..1c0294858527 100644 --- a/mm/mempool.c +++ b/mm/mempool.c @@ -312,7 +312,7 @@ void *mempool_alloc(mempool_t *pool, gfp_t gfp_mask) { void *element; unsigned long flags; - wait_queue_t wait; + wait_queue_entry_t wait; gfp_t gfp_temp; VM_WARN_ON_ONCE(gfp_mask & __GFP_ZERO); diff --git a/mm/shmem.c b/mm/shmem.c index e67d6ba4e98e..a6c7dece4660 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1902,7 +1902,7 @@ unlock: * entry unconditionally - even if something else had already woken the * target. */ -static int synchronous_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key) +static int synchronous_wake_function(wait_queue_entry_t *wait, unsigned mode, int sync, void *key) { int ret = default_wake_function(wait, mode, sync, key); list_del_init(&wait->task_list); diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index 7bc2208b6cc4..dca3cdd1a014 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -95,7 +95,7 @@ enum { struct p9_poll_wait { struct p9_conn *conn; - wait_queue_t wait; + wait_queue_entry_t wait; wait_queue_head_t *wait_addr; }; @@ -522,7 +522,7 @@ error: clear_bit(Wworksched, &m->wsched); } -static int p9_pollwake(wait_queue_t *wait, unsigned int mode, int sync, void *key) +static int p9_pollwake(wait_queue_entry_t *wait, unsigned int mode, int sync, void *key) { struct p9_poll_wait *pwait = container_of(wait, struct p9_poll_wait, wait); diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c index fbf251fef70f..5c4808b3da2d 100644 --- a/net/bluetooth/bnep/core.c +++ b/net/bluetooth/bnep/core.c @@ -484,7 +484,7 @@ static int bnep_session(void *arg) struct net_device *dev = s->dev; struct sock *sk = s->sock->sk; struct sk_buff *skb; - wait_queue_t wait; + wait_queue_entry_t wait; BT_DBG(""); diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c index 9e59b6654126..14f7c8135c31 100644 --- a/net/bluetooth/cmtp/core.c +++ b/net/bluetooth/cmtp/core.c @@ -280,7 +280,7 @@ static int cmtp_session(void *arg) struct cmtp_session *session = arg; struct sock *sk = session->sock->sk; struct sk_buff *skb; - wait_queue_t wait; + wait_queue_entry_t wait; BT_DBG("session %p", session); diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 0bec4588c3c8..fc31161e98f2 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -1244,7 +1244,7 @@ static void hidp_session_run(struct hidp_session *session) static int hidp_session_thread(void *arg) { struct hidp_session *session = arg; - wait_queue_t ctrl_wait, intr_wait; + wait_queue_entry_t ctrl_wait, intr_wait; BT_DBG("session %p", session); diff --git a/net/core/datagram.c b/net/core/datagram.c index db1866f2ffcf..34678828e2bb 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -68,7 +68,7 @@ static inline int connection_based(struct sock *sk) return sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM; } -static int receiver_wake_function(wait_queue_t *wait, unsigned int mode, int sync, +static int receiver_wake_function(wait_queue_entry_t *wait, unsigned int mode, int sync, void *key) { unsigned long bits = (unsigned long)key; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 1a0c961f4ffe..c77ced0109b7 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -343,7 +343,7 @@ found: * are still connected to it and there's no way to inform "a polling * implementation" that it should let go of a certain wait queue * - * In order to propagate a wake up, a wait_queue_t of the client + * In order to propagate a wake up, a wait_queue_entry_t of the client * socket is enqueued on the peer_wait queue of the server socket * whose wake function does a wake_up on the ordinary client socket * wait queue. This connection is established whenever a write (or @@ -352,7 +352,7 @@ found: * was relayed. */ -static int unix_dgram_peer_wake_relay(wait_queue_t *q, unsigned mode, int flags, +static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags, void *key) { struct unix_sock *u; diff --git a/sound/core/control.c b/sound/core/control.c index c109b82eef4b..6362da17ac3f 100644 --- a/sound/core/control.c +++ b/sound/core/control.c @@ -1577,7 +1577,7 @@ static ssize_t snd_ctl_read(struct file *file, char __user *buffer, struct snd_ctl_event ev; struct snd_kctl_event *kev; while (list_empty(&ctl->events)) { - wait_queue_t wait; + wait_queue_entry_t wait; if ((file->f_flags & O_NONBLOCK) != 0 || result > 0) { err = -EAGAIN; goto __end_lock; diff --git a/sound/core/hwdep.c b/sound/core/hwdep.c index 9602a7e38d8a..a73baa1242be 100644 --- a/sound/core/hwdep.c +++ b/sound/core/hwdep.c @@ -85,7 +85,7 @@ static int snd_hwdep_open(struct inode *inode, struct file * file) int major = imajor(inode); struct snd_hwdep *hw; int err; - wait_queue_t wait; + wait_queue_entry_t wait; if (major == snd_major) { hw = snd_lookup_minor_data(iminor(inode), diff --git a/sound/core/init.c b/sound/core/init.c index 6bda8436d765..d61d2b3cd521 100644 --- a/sound/core/init.c +++ b/sound/core/init.c @@ -989,7 +989,7 @@ EXPORT_SYMBOL(snd_card_file_remove); */ int snd_power_wait(struct snd_card *card, unsigned int power_state) { - wait_queue_t wait; + wait_queue_entry_t wait; int result = 0; /* fastpath */ diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c index 36baf962f9b0..cd8b7bef8d06 100644 --- a/sound/core/oss/pcm_oss.c +++ b/sound/core/oss/pcm_oss.c @@ -1554,7 +1554,7 @@ static int snd_pcm_oss_sync1(struct snd_pcm_substream *substream, size_t size) ssize_t result = 0; snd_pcm_state_t state; long res; - wait_queue_t wait; + wait_queue_entry_t wait; runtime = substream->runtime; init_waitqueue_entry(&wait, current); @@ -2387,7 +2387,7 @@ static int snd_pcm_oss_open(struct inode *inode, struct file *file) struct snd_pcm_oss_file *pcm_oss_file; struct snd_pcm_oss_setup setup[2]; int nonblock; - wait_queue_t wait; + wait_queue_entry_t wait; err = nonseekable_open(inode, file); if (err < 0) diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c index 5088d4b8db22..dd5254077ef7 100644 --- a/sound/core/pcm_lib.c +++ b/sound/core/pcm_lib.c @@ -1904,7 +1904,7 @@ static int wait_for_avail(struct snd_pcm_substream *substream, { struct snd_pcm_runtime *runtime = substream->runtime; int is_playback = substream->stream == SNDRV_PCM_STREAM_PLAYBACK; - wait_queue_t wait; + wait_queue_entry_t wait; int err = 0; snd_pcm_uframes_t avail = 0; long wait_time, tout; diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index 13dec5ec93f2..faa2e2be6f2e 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -1652,7 +1652,7 @@ static int snd_pcm_drain(struct snd_pcm_substream *substream, struct snd_card *card; struct snd_pcm_runtime *runtime; struct snd_pcm_substream *s; - wait_queue_t wait; + wait_queue_entry_t wait; int result = 0; int nonblock = 0; @@ -2353,7 +2353,7 @@ static int snd_pcm_capture_open(struct inode *inode, struct file *file) static int snd_pcm_open(struct file *file, struct snd_pcm *pcm, int stream) { int err; - wait_queue_t wait; + wait_queue_entry_t wait; if (pcm == NULL) { err = -ENODEV; diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c index ab890336175f..32588ad05653 100644 --- a/sound/core/rawmidi.c +++ b/sound/core/rawmidi.c @@ -368,7 +368,7 @@ static int snd_rawmidi_open(struct inode *inode, struct file *file) int err; struct snd_rawmidi *rmidi; struct snd_rawmidi_file *rawmidi_file = NULL; - wait_queue_t wait; + wait_queue_entry_t wait; if ((file->f_flags & O_APPEND) && !(file->f_flags & O_NONBLOCK)) return -EINVAL; /* invalid combination */ @@ -1002,7 +1002,7 @@ static ssize_t snd_rawmidi_read(struct file *file, char __user *buf, size_t coun while (count > 0) { spin_lock_irq(&runtime->lock); while (!snd_rawmidi_ready(substream)) { - wait_queue_t wait; + wait_queue_entry_t wait; if ((file->f_flags & O_NONBLOCK) != 0 || result > 0) { spin_unlock_irq(&runtime->lock); return result > 0 ? result : -EAGAIN; @@ -1306,7 +1306,7 @@ static ssize_t snd_rawmidi_write(struct file *file, const char __user *buf, while (count > 0) { spin_lock_irq(&runtime->lock); while (!snd_rawmidi_ready_append(substream, count)) { - wait_queue_t wait; + wait_queue_entry_t wait; if (file->f_flags & O_NONBLOCK) { spin_unlock_irq(&runtime->lock); return result > 0 ? result : -EAGAIN; @@ -1338,7 +1338,7 @@ static ssize_t snd_rawmidi_write(struct file *file, const char __user *buf, if (file->f_flags & O_DSYNC) { spin_lock_irq(&runtime->lock); while (runtime->avail != runtime->buffer_size) { - wait_queue_t wait; + wait_queue_entry_t wait; unsigned int last_avail = runtime->avail; init_waitqueue_entry(&wait, current); add_wait_queue(&runtime->sleep, &wait); diff --git a/sound/core/seq/seq_fifo.c b/sound/core/seq/seq_fifo.c index 01c4cfe30c9f..a8c2822e0198 100644 --- a/sound/core/seq/seq_fifo.c +++ b/sound/core/seq/seq_fifo.c @@ -179,7 +179,7 @@ int snd_seq_fifo_cell_out(struct snd_seq_fifo *f, { struct snd_seq_event_cell *cell; unsigned long flags; - wait_queue_t wait; + wait_queue_entry_t wait; if (snd_BUG_ON(!f)) return -EINVAL; diff --git a/sound/core/seq/seq_memory.c b/sound/core/seq/seq_memory.c index d4c61ec9be13..d6e9aacdc36b 100644 --- a/sound/core/seq/seq_memory.c +++ b/sound/core/seq/seq_memory.c @@ -227,7 +227,7 @@ static int snd_seq_cell_alloc(struct snd_seq_pool *pool, struct snd_seq_event_cell *cell; unsigned long flags; int err = -EAGAIN; - wait_queue_t wait; + wait_queue_entry_t wait; if (pool == NULL) return -EINVAL; diff --git a/sound/core/timer.c b/sound/core/timer.c index cd67d1c12cf1..884c3066b028 100644 --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -1964,7 +1964,7 @@ static ssize_t snd_timer_user_read(struct file *file, char __user *buffer, spin_lock_irq(&tu->qlock); while ((long)count - result >= unit) { while (!tu->qused) { - wait_queue_t wait; + wait_queue_entry_t wait; if ((file->f_flags & O_NONBLOCK) != 0 || result > 0) { err = -EAGAIN; diff --git a/sound/isa/wavefront/wavefront_synth.c b/sound/isa/wavefront/wavefront_synth.c index 4dae9ff9ef5a..0b1e4b34b299 100644 --- a/sound/isa/wavefront/wavefront_synth.c +++ b/sound/isa/wavefront/wavefront_synth.c @@ -1782,7 +1782,7 @@ wavefront_should_cause_interrupt (snd_wavefront_t *dev, int val, int port, unsigned long timeout) { - wait_queue_t wait; + wait_queue_entry_t wait; init_waitqueue_entry(&wait, current); spin_lock_irq(&dev->irq_lock); diff --git a/sound/pci/mixart/mixart_core.c b/sound/pci/mixart/mixart_core.c index dccf3db48fe0..8bf2ce32d4a8 100644 --- a/sound/pci/mixart/mixart_core.c +++ b/sound/pci/mixart/mixart_core.c @@ -239,7 +239,7 @@ int snd_mixart_send_msg(struct mixart_mgr *mgr, struct mixart_msg *request, int struct mixart_msg resp; u32 msg_frame = 0; /* set to 0, so it's no notification to wait for, but the answer */ int err; - wait_queue_t wait; + wait_queue_entry_t wait; long timeout; init_waitqueue_entry(&wait, current); @@ -284,7 +284,7 @@ int snd_mixart_send_msg_wait_notif(struct mixart_mgr *mgr, struct mixart_msg *request, u32 notif_event) { int err; - wait_queue_t wait; + wait_queue_entry_t wait; long timeout; if (snd_BUG_ON(!notif_event)) diff --git a/sound/pci/ymfpci/ymfpci_main.c b/sound/pci/ymfpci/ymfpci_main.c index fe4ba463b57c..1114166c685c 100644 --- a/sound/pci/ymfpci/ymfpci_main.c +++ b/sound/pci/ymfpci/ymfpci_main.c @@ -781,7 +781,7 @@ static snd_pcm_uframes_t snd_ymfpci_capture_pointer(struct snd_pcm_substream *su static void snd_ymfpci_irq_wait(struct snd_ymfpci *chip) { - wait_queue_t wait; + wait_queue_entry_t wait; int loops = 4; while (loops-- > 0) { diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index a8d540398bbd..9120edf3c94b 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -184,7 +184,7 @@ int __attribute__((weak)) kvm_arch_set_irq_inatomic( * Called with wqh->lock held and interrupts disabled */ static int -irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key) +irqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key) { struct kvm_kernel_irqfd *irqfd = container_of(wait, struct kvm_kernel_irqfd, wait); -- cgit v1.2.3 From 50816c48997af857d4bab3dca1aba90339705e96 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 5 Mar 2017 10:33:16 +0100 Subject: sched/wait: Standardize internal naming of wait-queue entries So the various wait-queue entry variables in include/linux/wait.h and kernel/sched/wait.c are named in a colorfully inconsistent way: wait_queue_entry_t *wait wait_queue_entry_t *__wait (even in plain C code!) wait_queue_entry_t *q (!) wait_queue_entry_t *new (making anyone who knows C++ cringe) wait_queue_entry_t *old I think part of the reason for the inconsistency is the constant apparent confusion about what a wait queue 'head' versus 'entry' is. ( Some of the documentation talks about a 'wait descriptor', which is the wait-queue entry itself - further adding to the confusion. ) The most common name is 'wait', but that in itself is somewhat ambiguous as well, as it does not really make it clear whether it's a wait-queue entry or head. To improve all this name the wait-queue entry structure parameters and variables consistently and push through this naming into all the wait.h and wait.c code: struct wait_queue_entry *wq_entry The 'wq_' prefix makes it easy to grep for, and we also use the opportunity to move away from the typedef to a plain 'struct' naming: in the kernel we typically reserve typedefs for cases where a C structure is really small and somewhat opaque - such as pte_t. wait-queue entries are neither small nor opaque, so use the more standard 'struct xxx_entry' list management code nomenclature instead. ( We don't touch external users, and we preserve the typedef as well for actual wait-queue users, to reduce unnecessary churn. ) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/wait.h | 84 ++++++++++++++++++++++---------------------- kernel/sched/wait.c | 98 ++++++++++++++++++++++++++-------------------------- 2 files changed, 91 insertions(+), 91 deletions(-) (limited to 'kernel') diff --git a/include/linux/wait.h b/include/linux/wait.h index 5889f0c86ff7..77fdea851d8b 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -11,8 +11,9 @@ #include typedef struct wait_queue_entry wait_queue_entry_t; -typedef int (*wait_queue_func_t)(wait_queue_entry_t *wait, unsigned mode, int flags, void *key); -int default_wake_function(wait_queue_entry_t *wait, unsigned mode, int flags, void *key); + +typedef int (*wait_queue_func_t)(struct wait_queue_entry *wq_entry, unsigned mode, int flags, void *key); +int default_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int flags, void *key); /* wait_queue_entry::flags */ #define WQ_FLAG_EXCLUSIVE 0x01 @@ -37,7 +38,7 @@ struct wait_bit_key { struct wait_bit_queue { struct wait_bit_key key; - wait_queue_entry_t wait; + struct wait_queue_entry wait; }; struct __wait_queue_head { @@ -58,7 +59,7 @@ struct task_struct; .task_list = { NULL, NULL } } #define DECLARE_WAITQUEUE(name, tsk) \ - wait_queue_entry_t name = __WAITQUEUE_INITIALIZER(name, tsk) + struct wait_queue_entry name = __WAITQUEUE_INITIALIZER(name, tsk) #define __WAIT_QUEUE_HEAD_INITIALIZER(name) { \ .lock = __SPIN_LOCK_UNLOCKED(name.lock), \ @@ -91,19 +92,19 @@ extern void __init_waitqueue_head(wait_queue_head_t *q, const char *name, struct # define DECLARE_WAIT_QUEUE_HEAD_ONSTACK(name) DECLARE_WAIT_QUEUE_HEAD(name) #endif -static inline void init_waitqueue_entry(wait_queue_entry_t *q, struct task_struct *p) +static inline void init_waitqueue_entry(struct wait_queue_entry *wq_entry, struct task_struct *p) { - q->flags = 0; - q->private = p; - q->func = default_wake_function; + wq_entry->flags = 0; + wq_entry->private = p; + wq_entry->func = default_wake_function; } static inline void -init_waitqueue_func_entry(wait_queue_entry_t *q, wait_queue_func_t func) +init_waitqueue_func_entry(struct wait_queue_entry *wq_entry, wait_queue_func_t func) { - q->flags = 0; - q->private = NULL; - q->func = func; + wq_entry->flags = 0; + wq_entry->private = NULL; + wq_entry->func = func; } /** @@ -162,42 +163,41 @@ static inline bool wq_has_sleeper(wait_queue_head_t *wq) return waitqueue_active(wq); } -extern void add_wait_queue(wait_queue_head_t *q, wait_queue_entry_t *wait); -extern void add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_entry_t *wait); -extern void remove_wait_queue(wait_queue_head_t *q, wait_queue_entry_t *wait); +extern void add_wait_queue(wait_queue_head_t *q, struct wait_queue_entry *wq_entry); +extern void add_wait_queue_exclusive(wait_queue_head_t *q, struct wait_queue_entry *wq_entry); +extern void remove_wait_queue(wait_queue_head_t *q, struct wait_queue_entry *wq_entry); -static inline void __add_wait_queue(wait_queue_head_t *head, wait_queue_entry_t *new) +static inline void __add_wait_queue(wait_queue_head_t *head, struct wait_queue_entry *wq_entry) { - list_add(&new->task_list, &head->task_list); + list_add(&wq_entry->task_list, &head->task_list); } /* * Used for wake-one threads: */ static inline void -__add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_entry_t *wait) +__add_wait_queue_exclusive(wait_queue_head_t *q, struct wait_queue_entry *wq_entry) { - wait->flags |= WQ_FLAG_EXCLUSIVE; - __add_wait_queue(q, wait); + wq_entry->flags |= WQ_FLAG_EXCLUSIVE; + __add_wait_queue(q, wq_entry); } -static inline void __add_wait_queue_entry_tail(wait_queue_head_t *head, - wait_queue_entry_t *new) +static inline void __add_wait_queue_entry_tail(wait_queue_head_t *head, struct wait_queue_entry *wq_entry) { - list_add_tail(&new->task_list, &head->task_list); + list_add_tail(&wq_entry->task_list, &head->task_list); } static inline void -__add_wait_queue_entry_tail_exclusive(wait_queue_head_t *q, wait_queue_entry_t *wait) +__add_wait_queue_entry_tail_exclusive(wait_queue_head_t *q, struct wait_queue_entry *wq_entry) { - wait->flags |= WQ_FLAG_EXCLUSIVE; - __add_wait_queue_entry_tail(q, wait); + wq_entry->flags |= WQ_FLAG_EXCLUSIVE; + __add_wait_queue_entry_tail(q, wq_entry); } static inline void -__remove_wait_queue(wait_queue_head_t *head, wait_queue_entry_t *old) +__remove_wait_queue(wait_queue_head_t *head, struct wait_queue_entry *wq_entry) { - list_del(&old->task_list); + list_del(&wq_entry->task_list); } typedef int wait_bit_action_f(struct wait_bit_key *, int mode); @@ -252,7 +252,7 @@ wait_queue_head_t *bit_waitqueue(void *, int); (!__builtin_constant_p(state) || \ state == TASK_INTERRUPTIBLE || state == TASK_KILLABLE) \ -extern void init_wait_entry(wait_queue_entry_t *__wait, int flags); +extern void init_wait_entry(struct wait_queue_entry *wq_entry, int flags); /* * The below macro ___wait_event() has an explicit shadow of the __ret @@ -269,12 +269,12 @@ extern void init_wait_entry(wait_queue_entry_t *__wait, int flags); #define ___wait_event(wq, condition, state, exclusive, ret, cmd) \ ({ \ __label__ __out; \ - wait_queue_entry_t __wait; \ + struct wait_queue_entry __wq_entry; \ long __ret = ret; /* explicit shadow */ \ \ - init_wait_entry(&__wait, exclusive ? WQ_FLAG_EXCLUSIVE : 0); \ + init_wait_entry(&__wq_entry, exclusive ? WQ_FLAG_EXCLUSIVE : 0);\ for (;;) { \ - long __int = prepare_to_wait_event(&wq, &__wait, state);\ + long __int = prepare_to_wait_event(&wq, &__wq_entry, state);\ \ if (condition) \ break; \ @@ -286,7 +286,7 @@ extern void init_wait_entry(wait_queue_entry_t *__wait, int flags); \ cmd; \ } \ - finish_wait(&wq, &__wait); \ + finish_wait(&wq, &__wq_entry); \ __out: __ret; \ }) @@ -970,17 +970,17 @@ do { \ /* * Waitqueues which are removed from the waitqueue_head at wakeup time */ -void prepare_to_wait(wait_queue_head_t *q, wait_queue_entry_t *wait, int state); -void prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_entry_t *wait, int state); -long prepare_to_wait_event(wait_queue_head_t *q, wait_queue_entry_t *wait, int state); -void finish_wait(wait_queue_head_t *q, wait_queue_entry_t *wait); -long wait_woken(wait_queue_entry_t *wait, unsigned mode, long timeout); -int woken_wake_function(wait_queue_entry_t *wait, unsigned mode, int sync, void *key); -int autoremove_wake_function(wait_queue_entry_t *wait, unsigned mode, int sync, void *key); -int wake_bit_function(wait_queue_entry_t *wait, unsigned mode, int sync, void *key); +void prepare_to_wait(wait_queue_head_t *q, struct wait_queue_entry *wq_entry, int state); +void prepare_to_wait_exclusive(wait_queue_head_t *q, struct wait_queue_entry *wq_entry, int state); +long prepare_to_wait_event(wait_queue_head_t *q, struct wait_queue_entry *wq_entry, int state); +void finish_wait(wait_queue_head_t *q, struct wait_queue_entry *wq_entry); +long wait_woken(struct wait_queue_entry *wq_entry, unsigned mode, long timeout); +int woken_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key); +int autoremove_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key); +int wake_bit_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key); #define DEFINE_WAIT_FUNC(name, function) \ - wait_queue_entry_t name = { \ + struct wait_queue_entry name = { \ .private = current, \ .func = function, \ .task_list = LIST_HEAD_INIT((name).task_list), \ diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c index 301ea02dede0..c37b3140763e 100644 --- a/kernel/sched/wait.c +++ b/kernel/sched/wait.c @@ -21,34 +21,34 @@ void __init_waitqueue_head(wait_queue_head_t *q, const char *name, struct lock_c EXPORT_SYMBOL(__init_waitqueue_head); -void add_wait_queue(wait_queue_head_t *q, wait_queue_entry_t *wait) +void add_wait_queue(wait_queue_head_t *q, struct wait_queue_entry *wq_entry) { unsigned long flags; - wait->flags &= ~WQ_FLAG_EXCLUSIVE; + wq_entry->flags &= ~WQ_FLAG_EXCLUSIVE; spin_lock_irqsave(&q->lock, flags); - __add_wait_queue(q, wait); + __add_wait_queue_entry_tail(q, wq_entry); spin_unlock_irqrestore(&q->lock, flags); } EXPORT_SYMBOL(add_wait_queue); -void add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_entry_t *wait) +void add_wait_queue_exclusive(wait_queue_head_t *q, struct wait_queue_entry *wq_entry) { unsigned long flags; - wait->flags |= WQ_FLAG_EXCLUSIVE; + wq_entry->flags |= WQ_FLAG_EXCLUSIVE; spin_lock_irqsave(&q->lock, flags); - __add_wait_queue_entry_tail(q, wait); + __add_wait_queue_entry_tail(q, wq_entry); spin_unlock_irqrestore(&q->lock, flags); } EXPORT_SYMBOL(add_wait_queue_exclusive); -void remove_wait_queue(wait_queue_head_t *q, wait_queue_entry_t *wait) +void remove_wait_queue(wait_queue_head_t *q, struct wait_queue_entry *wq_entry) { unsigned long flags; spin_lock_irqsave(&q->lock, flags); - __remove_wait_queue(q, wait); + __remove_wait_queue(q, wq_entry); spin_unlock_irqrestore(&q->lock, flags); } EXPORT_SYMBOL(remove_wait_queue); @@ -170,43 +170,43 @@ EXPORT_SYMBOL_GPL(__wake_up_sync); /* For internal use only */ * loads to move into the critical region). */ void -prepare_to_wait(wait_queue_head_t *q, wait_queue_entry_t *wait, int state) +prepare_to_wait(wait_queue_head_t *q, struct wait_queue_entry *wq_entry, int state) { unsigned long flags; - wait->flags &= ~WQ_FLAG_EXCLUSIVE; + wq_entry->flags &= ~WQ_FLAG_EXCLUSIVE; spin_lock_irqsave(&q->lock, flags); - if (list_empty(&wait->task_list)) - __add_wait_queue(q, wait); + if (list_empty(&wq_entry->task_list)) + __add_wait_queue(q, wq_entry); set_current_state(state); spin_unlock_irqrestore(&q->lock, flags); } EXPORT_SYMBOL(prepare_to_wait); void -prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_entry_t *wait, int state) +prepare_to_wait_exclusive(wait_queue_head_t *q, struct wait_queue_entry *wq_entry, int state) { unsigned long flags; - wait->flags |= WQ_FLAG_EXCLUSIVE; + wq_entry->flags |= WQ_FLAG_EXCLUSIVE; spin_lock_irqsave(&q->lock, flags); - if (list_empty(&wait->task_list)) - __add_wait_queue_entry_tail(q, wait); + if (list_empty(&wq_entry->task_list)) + __add_wait_queue_entry_tail(q, wq_entry); set_current_state(state); spin_unlock_irqrestore(&q->lock, flags); } EXPORT_SYMBOL(prepare_to_wait_exclusive); -void init_wait_entry(wait_queue_entry_t *wait, int flags) +void init_wait_entry(struct wait_queue_entry *wq_entry, int flags) { - wait->flags = flags; - wait->private = current; - wait->func = autoremove_wake_function; - INIT_LIST_HEAD(&wait->task_list); + wq_entry->flags = flags; + wq_entry->private = current; + wq_entry->func = autoremove_wake_function; + INIT_LIST_HEAD(&wq_entry->task_list); } EXPORT_SYMBOL(init_wait_entry); -long prepare_to_wait_event(wait_queue_head_t *q, wait_queue_entry_t *wait, int state) +long prepare_to_wait_event(wait_queue_head_t *q, struct wait_queue_entry *wq_entry, int state) { unsigned long flags; long ret = 0; @@ -225,14 +225,14 @@ long prepare_to_wait_event(wait_queue_head_t *q, wait_queue_entry_t *wait, int s * can't see us, it should wake up another exclusive waiter if * we fail. */ - list_del_init(&wait->task_list); + list_del_init(&wq_entry->task_list); ret = -ERESTARTSYS; } else { - if (list_empty(&wait->task_list)) { - if (wait->flags & WQ_FLAG_EXCLUSIVE) - __add_wait_queue_entry_tail(q, wait); + if (list_empty(&wq_entry->task_list)) { + if (wq_entry->flags & WQ_FLAG_EXCLUSIVE) + __add_wait_queue_entry_tail(q, wq_entry); else - __add_wait_queue(q, wait); + __add_wait_queue(q, wq_entry); } set_current_state(state); } @@ -284,13 +284,13 @@ EXPORT_SYMBOL(do_wait_intr_irq); /** * finish_wait - clean up after waiting in a queue * @q: waitqueue waited on - * @wait: wait descriptor + * @wq_entry: wait descriptor * * Sets current thread back to running state and removes * the wait descriptor from the given waitqueue if still * queued. */ -void finish_wait(wait_queue_head_t *q, wait_queue_entry_t *wait) +void finish_wait(wait_queue_head_t *q, struct wait_queue_entry *wq_entry) { unsigned long flags; @@ -308,20 +308,20 @@ void finish_wait(wait_queue_head_t *q, wait_queue_entry_t *wait) * have _one_ other CPU that looks at or modifies * the list). */ - if (!list_empty_careful(&wait->task_list)) { + if (!list_empty_careful(&wq_entry->task_list)) { spin_lock_irqsave(&q->lock, flags); - list_del_init(&wait->task_list); + list_del_init(&wq_entry->task_list); spin_unlock_irqrestore(&q->lock, flags); } } EXPORT_SYMBOL(finish_wait); -int autoremove_wake_function(wait_queue_entry_t *wait, unsigned mode, int sync, void *key) +int autoremove_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key) { - int ret = default_wake_function(wait, mode, sync, key); + int ret = default_wake_function(wq_entry, mode, sync, key); if (ret) - list_del_init(&wait->task_list); + list_del_init(&wq_entry->task_list); return ret; } EXPORT_SYMBOL(autoremove_wake_function); @@ -341,17 +341,17 @@ static inline bool is_kthread_should_stop(void) * * p->state = mode; condition = true; * smp_mb(); // A smp_wmb(); // C - * if (!wait->flags & WQ_FLAG_WOKEN) wait->flags |= WQ_FLAG_WOKEN; + * if (!wq_entry->flags & WQ_FLAG_WOKEN) wq_entry->flags |= WQ_FLAG_WOKEN; * schedule() try_to_wake_up(); * p->state = TASK_RUNNING; ~~~~~~~~~~~~~~~~~~ - * wait->flags &= ~WQ_FLAG_WOKEN; condition = true; + * wq_entry->flags &= ~WQ_FLAG_WOKEN; condition = true; * smp_mb() // B smp_wmb(); // C - * wait->flags |= WQ_FLAG_WOKEN; + * wq_entry->flags |= WQ_FLAG_WOKEN; * } * remove_wait_queue(&wq, &wait); * */ -long wait_woken(wait_queue_entry_t *wait, unsigned mode, long timeout) +long wait_woken(struct wait_queue_entry *wq_entry, unsigned mode, long timeout) { set_current_state(mode); /* A */ /* @@ -359,7 +359,7 @@ long wait_woken(wait_queue_entry_t *wait, unsigned mode, long timeout) * woken_wake_function() such that if we observe WQ_FLAG_WOKEN we must * also observe all state before the wakeup. */ - if (!(wait->flags & WQ_FLAG_WOKEN) && !is_kthread_should_stop()) + if (!(wq_entry->flags & WQ_FLAG_WOKEN) && !is_kthread_should_stop()) timeout = schedule_timeout(timeout); __set_current_state(TASK_RUNNING); @@ -369,13 +369,13 @@ long wait_woken(wait_queue_entry_t *wait, unsigned mode, long timeout) * condition being true _OR_ WQ_FLAG_WOKEN such that we will not miss * an event. */ - smp_store_mb(wait->flags, wait->flags & ~WQ_FLAG_WOKEN); /* B */ + smp_store_mb(wq_entry->flags, wq_entry->flags & ~WQ_FLAG_WOKEN); /* B */ return timeout; } EXPORT_SYMBOL(wait_woken); -int woken_wake_function(wait_queue_entry_t *wait, unsigned mode, int sync, void *key) +int woken_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key) { /* * Although this function is called under waitqueue lock, LOCK @@ -385,24 +385,24 @@ int woken_wake_function(wait_queue_entry_t *wait, unsigned mode, int sync, void * and is paired with smp_store_mb() in wait_woken(). */ smp_wmb(); /* C */ - wait->flags |= WQ_FLAG_WOKEN; + wq_entry->flags |= WQ_FLAG_WOKEN; - return default_wake_function(wait, mode, sync, key); + return default_wake_function(wq_entry, mode, sync, key); } EXPORT_SYMBOL(woken_wake_function); -int wake_bit_function(wait_queue_entry_t *wait, unsigned mode, int sync, void *arg) +int wake_bit_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *arg) { struct wait_bit_key *key = arg; struct wait_bit_queue *wait_bit - = container_of(wait, struct wait_bit_queue, wait); + = container_of(wq_entry, struct wait_bit_queue, wait); if (wait_bit->key.flags != key->flags || wait_bit->key.bit_nr != key->bit_nr || test_bit(key->bit_nr, key->flags)) return 0; else - return autoremove_wake_function(wait, mode, sync, key); + return autoremove_wake_function(wq_entry, mode, sync, key); } EXPORT_SYMBOL(wake_bit_function); @@ -534,19 +534,19 @@ static inline wait_queue_head_t *atomic_t_waitqueue(atomic_t *p) return bit_waitqueue(p, 0); } -static int wake_atomic_t_function(wait_queue_entry_t *wait, unsigned mode, int sync, +static int wake_atomic_t_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *arg) { struct wait_bit_key *key = arg; struct wait_bit_queue *wait_bit - = container_of(wait, struct wait_bit_queue, wait); + = container_of(wq_entry, struct wait_bit_queue, wait); atomic_t *val = key->flags; if (wait_bit->key.flags != key->flags || wait_bit->key.bit_nr != key->bit_nr || atomic_read(val) != 0) return 0; - return autoremove_wake_function(wait, mode, sync, key); + return autoremove_wake_function(wq_entry, mode, sync, key); } /* -- cgit v1.2.3 From 9d9d676f595b5081326be7a17dc681fcb38fb3b2 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 5 Mar 2017 11:10:18 +0100 Subject: sched/wait: Standardize internal naming of wait-queue heads The wait-queue head parameters and variables are named in a couple of ways, we have the following variants currently: wait_queue_head_t *q wait_queue_head_t *wq wait_queue_head_t *head In particular the 'wq' naming is ambiguous in the sense whether it's a wait-queue head or entry name - as entries were often named 'wait'. ( Not to mention the confusion of any readers coming over from workqueue-land. ) Standardize all this around a single, unambiguous parameter and variable name: struct wait_queue_head *wq_head which is easy to grep for and also rhymes nicely with the wait-queue entry naming: struct wait_queue_entry *wq_entry Also rename: struct __wait_queue_head => struct wait_queue_head ... and use this struct type to migrate from typedefs usage to 'struct' usage, which is more in line with existing kernel practices. Don't touch any external users and preserve the main wait_queue_head_t typedef. Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/wait.h | 76 ++++++++++++------------- kernel/sched/wait.c | 154 +++++++++++++++++++++++++-------------------------- 2 files changed, 115 insertions(+), 115 deletions(-) (limited to 'kernel') diff --git a/include/linux/wait.h b/include/linux/wait.h index 77fdea851d8b..c3d1cefc7853 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -41,11 +41,11 @@ struct wait_bit_queue { struct wait_queue_entry wait; }; -struct __wait_queue_head { +struct wait_queue_head { spinlock_t lock; struct list_head task_list; }; -typedef struct __wait_queue_head wait_queue_head_t; +typedef struct wait_queue_head wait_queue_head_t; struct task_struct; @@ -66,7 +66,7 @@ struct task_struct; .task_list = { &(name).task_list, &(name).task_list } } #define DECLARE_WAIT_QUEUE_HEAD(name) \ - wait_queue_head_t name = __WAIT_QUEUE_HEAD_INITIALIZER(name) + struct wait_queue_head name = __WAIT_QUEUE_HEAD_INITIALIZER(name) #define __WAIT_BIT_KEY_INITIALIZER(word, bit) \ { .flags = word, .bit_nr = bit, } @@ -74,20 +74,20 @@ struct task_struct; #define __WAIT_ATOMIC_T_KEY_INITIALIZER(p) \ { .flags = p, .bit_nr = WAIT_ATOMIC_T_BIT_NR, } -extern void __init_waitqueue_head(wait_queue_head_t *q, const char *name, struct lock_class_key *); +extern void __init_waitqueue_head(struct wait_queue_head *wq_head, const char *name, struct lock_class_key *); -#define init_waitqueue_head(q) \ +#define init_waitqueue_head(wq_head) \ do { \ static struct lock_class_key __key; \ \ - __init_waitqueue_head((q), #q, &__key); \ + __init_waitqueue_head((wq_head), #wq_head, &__key); \ } while (0) #ifdef CONFIG_LOCKDEP # define __WAIT_QUEUE_HEAD_INIT_ONSTACK(name) \ ({ init_waitqueue_head(&name); name; }) # define DECLARE_WAIT_QUEUE_HEAD_ONSTACK(name) \ - wait_queue_head_t name = __WAIT_QUEUE_HEAD_INIT_ONSTACK(name) + struct wait_queue_head name = __WAIT_QUEUE_HEAD_INIT_ONSTACK(name) #else # define DECLARE_WAIT_QUEUE_HEAD_ONSTACK(name) DECLARE_WAIT_QUEUE_HEAD(name) #endif @@ -109,14 +109,14 @@ init_waitqueue_func_entry(struct wait_queue_entry *wq_entry, wait_queue_func_t f /** * waitqueue_active -- locklessly test for waiters on the queue - * @q: the waitqueue to test for waiters + * @wq_head: the waitqueue to test for waiters * * returns true if the wait list is not empty * * NOTE: this function is lockless and requires care, incorrect usage _will_ * lead to sporadic and non-obvious failure. * - * Use either while holding wait_queue_head_t::lock or when used for wakeups + * Use either while holding wait_queue_head::lock or when used for wakeups * with an extra smp_mb() like: * * CPU0 - waker CPU1 - waiter @@ -137,9 +137,9 @@ init_waitqueue_func_entry(struct wait_queue_entry *wq_entry, wait_queue_func_t f * Also note that this 'optimization' trades a spin_lock() for an smp_mb(), * which (when the lock is uncontended) are of roughly equal cost. */ -static inline int waitqueue_active(wait_queue_head_t *q) +static inline int waitqueue_active(struct wait_queue_head *wq_head) { - return !list_empty(&q->task_list); + return !list_empty(&wq_head->task_list); } /** @@ -150,7 +150,7 @@ static inline int waitqueue_active(wait_queue_head_t *q) * * Please refer to the comment for waitqueue_active. */ -static inline bool wq_has_sleeper(wait_queue_head_t *wq) +static inline bool wq_has_sleeper(struct wait_queue_head *wq_head) { /* * We need to be sure we are in sync with the @@ -160,62 +160,62 @@ static inline bool wq_has_sleeper(wait_queue_head_t *wq) * waiting side. */ smp_mb(); - return waitqueue_active(wq); + return waitqueue_active(wq_head); } -extern void add_wait_queue(wait_queue_head_t *q, struct wait_queue_entry *wq_entry); -extern void add_wait_queue_exclusive(wait_queue_head_t *q, struct wait_queue_entry *wq_entry); -extern void remove_wait_queue(wait_queue_head_t *q, struct wait_queue_entry *wq_entry); +extern void add_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry); +extern void add_wait_queue_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry); +extern void remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry); -static inline void __add_wait_queue(wait_queue_head_t *head, struct wait_queue_entry *wq_entry) +static inline void __add_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry) { - list_add(&wq_entry->task_list, &head->task_list); + list_add(&wq_entry->task_list, &wq_head->task_list); } /* * Used for wake-one threads: */ static inline void -__add_wait_queue_exclusive(wait_queue_head_t *q, struct wait_queue_entry *wq_entry) +__add_wait_queue_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry) { wq_entry->flags |= WQ_FLAG_EXCLUSIVE; - __add_wait_queue(q, wq_entry); + __add_wait_queue(wq_head, wq_entry); } -static inline void __add_wait_queue_entry_tail(wait_queue_head_t *head, struct wait_queue_entry *wq_entry) +static inline void __add_wait_queue_entry_tail(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry) { - list_add_tail(&wq_entry->task_list, &head->task_list); + list_add_tail(&wq_entry->task_list, &wq_head->task_list); } static inline void -__add_wait_queue_entry_tail_exclusive(wait_queue_head_t *q, struct wait_queue_entry *wq_entry) +__add_wait_queue_entry_tail_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry) { wq_entry->flags |= WQ_FLAG_EXCLUSIVE; - __add_wait_queue_entry_tail(q, wq_entry); + __add_wait_queue_entry_tail(wq_head, wq_entry); } static inline void -__remove_wait_queue(wait_queue_head_t *head, struct wait_queue_entry *wq_entry) +__remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry) { list_del(&wq_entry->task_list); } typedef int wait_bit_action_f(struct wait_bit_key *, int mode); -void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr, void *key); -void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key); -void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, int nr, void *key); -void __wake_up_locked(wait_queue_head_t *q, unsigned int mode, int nr); -void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr); -void __wake_up_bit(wait_queue_head_t *, void *, int); -int __wait_on_bit(wait_queue_head_t *, struct wait_bit_queue *, wait_bit_action_f *, unsigned); -int __wait_on_bit_lock(wait_queue_head_t *, struct wait_bit_queue *, wait_bit_action_f *, unsigned); +void __wake_up(struct wait_queue_head *wq_head, unsigned int mode, int nr, void *key); +void __wake_up_locked_key(struct wait_queue_head *wq_head, unsigned int mode, void *key); +void __wake_up_sync_key(struct wait_queue_head *wq_head, unsigned int mode, int nr, void *key); +void __wake_up_locked(struct wait_queue_head *wq_head, unsigned int mode, int nr); +void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode, int nr); +void __wake_up_bit(struct wait_queue_head *, void *, int); +int __wait_on_bit(struct wait_queue_head *, struct wait_bit_queue *, wait_bit_action_f *, unsigned); +int __wait_on_bit_lock(struct wait_queue_head *, struct wait_bit_queue *, wait_bit_action_f *, unsigned); void wake_up_bit(void *, int); void wake_up_atomic_t(atomic_t *); int out_of_line_wait_on_bit(void *, int, wait_bit_action_f *, unsigned); int out_of_line_wait_on_bit_timeout(void *, int, wait_bit_action_f *, unsigned, unsigned long); int out_of_line_wait_on_bit_lock(void *, int, wait_bit_action_f *, unsigned); int out_of_line_wait_on_atomic_t(atomic_t *, int (*)(atomic_t *), unsigned); -wait_queue_head_t *bit_waitqueue(void *, int); +struct wait_queue_head *bit_waitqueue(void *, int); #define wake_up(x) __wake_up(x, TASK_NORMAL, 1, NULL) #define wake_up_nr(x, nr) __wake_up(x, TASK_NORMAL, nr, NULL) @@ -970,10 +970,10 @@ do { \ /* * Waitqueues which are removed from the waitqueue_head at wakeup time */ -void prepare_to_wait(wait_queue_head_t *q, struct wait_queue_entry *wq_entry, int state); -void prepare_to_wait_exclusive(wait_queue_head_t *q, struct wait_queue_entry *wq_entry, int state); -long prepare_to_wait_event(wait_queue_head_t *q, struct wait_queue_entry *wq_entry, int state); -void finish_wait(wait_queue_head_t *q, struct wait_queue_entry *wq_entry); +void prepare_to_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state); +void prepare_to_wait_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state); +long prepare_to_wait_event(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state); +void finish_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry); long wait_woken(struct wait_queue_entry *wq_entry, unsigned mode, long timeout); int woken_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key); int autoremove_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key); diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c index c37b3140763e..203aeea96f16 100644 --- a/kernel/sched/wait.c +++ b/kernel/sched/wait.c @@ -12,44 +12,44 @@ #include #include -void __init_waitqueue_head(wait_queue_head_t *q, const char *name, struct lock_class_key *key) +void __init_waitqueue_head(struct wait_queue_head *wq_head, const char *name, struct lock_class_key *key) { - spin_lock_init(&q->lock); - lockdep_set_class_and_name(&q->lock, key, name); - INIT_LIST_HEAD(&q->task_list); + spin_lock_init(&wq_head->lock); + lockdep_set_class_and_name(&wq_head->lock, key, name); + INIT_LIST_HEAD(&wq_head->task_list); } EXPORT_SYMBOL(__init_waitqueue_head); -void add_wait_queue(wait_queue_head_t *q, struct wait_queue_entry *wq_entry) +void add_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry) { unsigned long flags; wq_entry->flags &= ~WQ_FLAG_EXCLUSIVE; - spin_lock_irqsave(&q->lock, flags); - __add_wait_queue_entry_tail(q, wq_entry); - spin_unlock_irqrestore(&q->lock, flags); + spin_lock_irqsave(&wq_head->lock, flags); + __add_wait_queue_entry_tail(wq_head, wq_entry); + spin_unlock_irqrestore(&wq_head->lock, flags); } EXPORT_SYMBOL(add_wait_queue); -void add_wait_queue_exclusive(wait_queue_head_t *q, struct wait_queue_entry *wq_entry) +void add_wait_queue_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry) { unsigned long flags; wq_entry->flags |= WQ_FLAG_EXCLUSIVE; - spin_lock_irqsave(&q->lock, flags); - __add_wait_queue_entry_tail(q, wq_entry); - spin_unlock_irqrestore(&q->lock, flags); + spin_lock_irqsave(&wq_head->lock, flags); + __add_wait_queue_entry_tail(wq_head, wq_entry); + spin_unlock_irqrestore(&wq_head->lock, flags); } EXPORT_SYMBOL(add_wait_queue_exclusive); -void remove_wait_queue(wait_queue_head_t *q, struct wait_queue_entry *wq_entry) +void remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry) { unsigned long flags; - spin_lock_irqsave(&q->lock, flags); - __remove_wait_queue(q, wq_entry); - spin_unlock_irqrestore(&q->lock, flags); + spin_lock_irqsave(&wq_head->lock, flags); + __remove_wait_queue(wq_head, wq_entry); + spin_unlock_irqrestore(&wq_head->lock, flags); } EXPORT_SYMBOL(remove_wait_queue); @@ -63,12 +63,12 @@ EXPORT_SYMBOL(remove_wait_queue); * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns * zero in this (rare) case, and we handle it by continuing to scan the queue. */ -static void __wake_up_common(wait_queue_head_t *q, unsigned int mode, +static void __wake_up_common(struct wait_queue_head *wq_head, unsigned int mode, int nr_exclusive, int wake_flags, void *key) { wait_queue_entry_t *curr, *next; - list_for_each_entry_safe(curr, next, &q->task_list, task_list) { + list_for_each_entry_safe(curr, next, &wq_head->task_list, task_list) { unsigned flags = curr->flags; if (curr->func(curr, mode, wake_flags, key) && @@ -79,7 +79,7 @@ static void __wake_up_common(wait_queue_head_t *q, unsigned int mode, /** * __wake_up - wake up threads blocked on a waitqueue. - * @q: the waitqueue + * @wq_head: the waitqueue * @mode: which threads * @nr_exclusive: how many wake-one or wake-many threads to wake up * @key: is directly passed to the wakeup function @@ -87,35 +87,35 @@ static void __wake_up_common(wait_queue_head_t *q, unsigned int mode, * It may be assumed that this function implies a write memory barrier before * changing the task state if and only if any tasks are woken up. */ -void __wake_up(wait_queue_head_t *q, unsigned int mode, +void __wake_up(struct wait_queue_head *wq_head, unsigned int mode, int nr_exclusive, void *key) { unsigned long flags; - spin_lock_irqsave(&q->lock, flags); - __wake_up_common(q, mode, nr_exclusive, 0, key); - spin_unlock_irqrestore(&q->lock, flags); + spin_lock_irqsave(&wq_head->lock, flags); + __wake_up_common(wq_head, mode, nr_exclusive, 0, key); + spin_unlock_irqrestore(&wq_head->lock, flags); } EXPORT_SYMBOL(__wake_up); /* * Same as __wake_up but called with the spinlock in wait_queue_head_t held. */ -void __wake_up_locked(wait_queue_head_t *q, unsigned int mode, int nr) +void __wake_up_locked(struct wait_queue_head *wq_head, unsigned int mode, int nr) { - __wake_up_common(q, mode, nr, 0, NULL); + __wake_up_common(wq_head, mode, nr, 0, NULL); } EXPORT_SYMBOL_GPL(__wake_up_locked); -void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key) +void __wake_up_locked_key(struct wait_queue_head *wq_head, unsigned int mode, void *key) { - __wake_up_common(q, mode, 1, 0, key); + __wake_up_common(wq_head, mode, 1, 0, key); } EXPORT_SYMBOL_GPL(__wake_up_locked_key); /** * __wake_up_sync_key - wake up threads blocked on a waitqueue. - * @q: the waitqueue + * @wq_head: the waitqueue * @mode: which threads * @nr_exclusive: how many wake-one or wake-many threads to wake up * @key: opaque value to be passed to wakeup targets @@ -130,30 +130,30 @@ EXPORT_SYMBOL_GPL(__wake_up_locked_key); * It may be assumed that this function implies a write memory barrier before * changing the task state if and only if any tasks are woken up. */ -void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, +void __wake_up_sync_key(struct wait_queue_head *wq_head, unsigned int mode, int nr_exclusive, void *key) { unsigned long flags; int wake_flags = 1; /* XXX WF_SYNC */ - if (unlikely(!q)) + if (unlikely(!wq_head)) return; if (unlikely(nr_exclusive != 1)) wake_flags = 0; - spin_lock_irqsave(&q->lock, flags); - __wake_up_common(q, mode, nr_exclusive, wake_flags, key); - spin_unlock_irqrestore(&q->lock, flags); + spin_lock_irqsave(&wq_head->lock, flags); + __wake_up_common(wq_head, mode, nr_exclusive, wake_flags, key); + spin_unlock_irqrestore(&wq_head->lock, flags); } EXPORT_SYMBOL_GPL(__wake_up_sync_key); /* * __wake_up_sync - see __wake_up_sync_key() */ -void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive) +void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode, int nr_exclusive) { - __wake_up_sync_key(q, mode, nr_exclusive, NULL); + __wake_up_sync_key(wq_head, mode, nr_exclusive, NULL); } EXPORT_SYMBOL_GPL(__wake_up_sync); /* For internal use only */ @@ -170,30 +170,30 @@ EXPORT_SYMBOL_GPL(__wake_up_sync); /* For internal use only */ * loads to move into the critical region). */ void -prepare_to_wait(wait_queue_head_t *q, struct wait_queue_entry *wq_entry, int state) +prepare_to_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state) { unsigned long flags; wq_entry->flags &= ~WQ_FLAG_EXCLUSIVE; - spin_lock_irqsave(&q->lock, flags); + spin_lock_irqsave(&wq_head->lock, flags); if (list_empty(&wq_entry->task_list)) - __add_wait_queue(q, wq_entry); + __add_wait_queue(wq_head, wq_entry); set_current_state(state); - spin_unlock_irqrestore(&q->lock, flags); + spin_unlock_irqrestore(&wq_head->lock, flags); } EXPORT_SYMBOL(prepare_to_wait); void -prepare_to_wait_exclusive(wait_queue_head_t *q, struct wait_queue_entry *wq_entry, int state) +prepare_to_wait_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state) { unsigned long flags; wq_entry->flags |= WQ_FLAG_EXCLUSIVE; - spin_lock_irqsave(&q->lock, flags); + spin_lock_irqsave(&wq_head->lock, flags); if (list_empty(&wq_entry->task_list)) - __add_wait_queue_entry_tail(q, wq_entry); + __add_wait_queue_entry_tail(wq_head, wq_entry); set_current_state(state); - spin_unlock_irqrestore(&q->lock, flags); + spin_unlock_irqrestore(&wq_head->lock, flags); } EXPORT_SYMBOL(prepare_to_wait_exclusive); @@ -206,12 +206,12 @@ void init_wait_entry(struct wait_queue_entry *wq_entry, int flags) } EXPORT_SYMBOL(init_wait_entry); -long prepare_to_wait_event(wait_queue_head_t *q, struct wait_queue_entry *wq_entry, int state) +long prepare_to_wait_event(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state) { unsigned long flags; long ret = 0; - spin_lock_irqsave(&q->lock, flags); + spin_lock_irqsave(&wq_head->lock, flags); if (unlikely(signal_pending_state(state, current))) { /* * Exclusive waiter must not fail if it was selected by wakeup, @@ -219,7 +219,7 @@ long prepare_to_wait_event(wait_queue_head_t *q, struct wait_queue_entry *wq_ent * * The caller will recheck the condition and return success if * we were already woken up, we can not miss the event because - * wakeup locks/unlocks the same q->lock. + * wakeup locks/unlocks the same wq_head->lock. * * But we need to ensure that set-condition + wakeup after that * can't see us, it should wake up another exclusive waiter if @@ -230,13 +230,13 @@ long prepare_to_wait_event(wait_queue_head_t *q, struct wait_queue_entry *wq_ent } else { if (list_empty(&wq_entry->task_list)) { if (wq_entry->flags & WQ_FLAG_EXCLUSIVE) - __add_wait_queue_entry_tail(q, wq_entry); + __add_wait_queue_entry_tail(wq_head, wq_entry); else - __add_wait_queue(q, wq_entry); + __add_wait_queue(wq_head, wq_entry); } set_current_state(state); } - spin_unlock_irqrestore(&q->lock, flags); + spin_unlock_irqrestore(&wq_head->lock, flags); return ret; } @@ -283,14 +283,14 @@ EXPORT_SYMBOL(do_wait_intr_irq); /** * finish_wait - clean up after waiting in a queue - * @q: waitqueue waited on + * @wq_head: waitqueue waited on * @wq_entry: wait descriptor * * Sets current thread back to running state and removes * the wait descriptor from the given waitqueue if still * queued. */ -void finish_wait(wait_queue_head_t *q, struct wait_queue_entry *wq_entry) +void finish_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry) { unsigned long flags; @@ -309,9 +309,9 @@ void finish_wait(wait_queue_head_t *q, struct wait_queue_entry *wq_entry) * the list). */ if (!list_empty_careful(&wq_entry->task_list)) { - spin_lock_irqsave(&q->lock, flags); + spin_lock_irqsave(&wq_head->lock, flags); list_del_init(&wq_entry->task_list); - spin_unlock_irqrestore(&q->lock, flags); + spin_unlock_irqrestore(&wq_head->lock, flags); } } EXPORT_SYMBOL(finish_wait); @@ -334,7 +334,7 @@ static inline bool is_kthread_should_stop(void) /* * DEFINE_WAIT_FUNC(wait, woken_wake_func); * - * add_wait_queue(&wq, &wait); + * add_wait_queue(&wq_head, &wait); * for (;;) { * if (condition) * break; @@ -348,7 +348,7 @@ static inline bool is_kthread_should_stop(void) * smp_mb() // B smp_wmb(); // C * wq_entry->flags |= WQ_FLAG_WOKEN; * } - * remove_wait_queue(&wq, &wait); + * remove_wait_queue(&wq_head, &wait); * */ long wait_woken(struct wait_queue_entry *wq_entry, unsigned mode, long timeout) @@ -412,17 +412,17 @@ EXPORT_SYMBOL(wake_bit_function); * permitted return codes. Nonzero return codes halt waiting and return. */ int __sched -__wait_on_bit(wait_queue_head_t *wq, struct wait_bit_queue *q, +__wait_on_bit(struct wait_queue_head *wq_head, struct wait_bit_queue *q, wait_bit_action_f *action, unsigned mode) { int ret = 0; do { - prepare_to_wait(wq, &q->wait, mode); + prepare_to_wait(wq_head, &q->wait, mode); if (test_bit(q->key.bit_nr, q->key.flags)) ret = (*action)(&q->key, mode); } while (test_bit(q->key.bit_nr, q->key.flags) && !ret); - finish_wait(wq, &q->wait); + finish_wait(wq_head, &q->wait); return ret; } EXPORT_SYMBOL(__wait_on_bit); @@ -430,10 +430,10 @@ EXPORT_SYMBOL(__wait_on_bit); int __sched out_of_line_wait_on_bit(void *word, int bit, wait_bit_action_f *action, unsigned mode) { - wait_queue_head_t *wq = bit_waitqueue(word, bit); + struct wait_queue_head *wq_head = bit_waitqueue(word, bit); DEFINE_WAIT_BIT(wait, word, bit); - return __wait_on_bit(wq, &wait, action, mode); + return __wait_on_bit(wq_head, &wait, action, mode); } EXPORT_SYMBOL(out_of_line_wait_on_bit); @@ -441,36 +441,36 @@ int __sched out_of_line_wait_on_bit_timeout( void *word, int bit, wait_bit_action_f *action, unsigned mode, unsigned long timeout) { - wait_queue_head_t *wq = bit_waitqueue(word, bit); + struct wait_queue_head *wq_head = bit_waitqueue(word, bit); DEFINE_WAIT_BIT(wait, word, bit); wait.key.timeout = jiffies + timeout; - return __wait_on_bit(wq, &wait, action, mode); + return __wait_on_bit(wq_head, &wait, action, mode); } EXPORT_SYMBOL_GPL(out_of_line_wait_on_bit_timeout); int __sched -__wait_on_bit_lock(wait_queue_head_t *wq, struct wait_bit_queue *q, +__wait_on_bit_lock(struct wait_queue_head *wq_head, struct wait_bit_queue *q, wait_bit_action_f *action, unsigned mode) { int ret = 0; for (;;) { - prepare_to_wait_exclusive(wq, &q->wait, mode); + prepare_to_wait_exclusive(wq_head, &q->wait, mode); if (test_bit(q->key.bit_nr, q->key.flags)) { ret = action(&q->key, mode); /* * See the comment in prepare_to_wait_event(). - * finish_wait() does not necessarily takes wq->lock, + * finish_wait() does not necessarily takes wwq_head->lock, * but test_and_set_bit() implies mb() which pairs with * smp_mb__after_atomic() before wake_up_page(). */ if (ret) - finish_wait(wq, &q->wait); + finish_wait(wq_head, &q->wait); } if (!test_and_set_bit(q->key.bit_nr, q->key.flags)) { if (!ret) - finish_wait(wq, &q->wait); + finish_wait(wq_head, &q->wait); return 0; } else if (ret) { return ret; @@ -482,18 +482,18 @@ EXPORT_SYMBOL(__wait_on_bit_lock); int __sched out_of_line_wait_on_bit_lock(void *word, int bit, wait_bit_action_f *action, unsigned mode) { - wait_queue_head_t *wq = bit_waitqueue(word, bit); + struct wait_queue_head *wq_head = bit_waitqueue(word, bit); DEFINE_WAIT_BIT(wait, word, bit); - return __wait_on_bit_lock(wq, &wait, action, mode); + return __wait_on_bit_lock(wq_head, &wait, action, mode); } EXPORT_SYMBOL(out_of_line_wait_on_bit_lock); -void __wake_up_bit(wait_queue_head_t *wq, void *word, int bit) +void __wake_up_bit(struct wait_queue_head *wq_head, void *word, int bit) { struct wait_bit_key key = __WAIT_BIT_KEY_INITIALIZER(word, bit); - if (waitqueue_active(wq)) - __wake_up(wq, TASK_NORMAL, 1, &key); + if (waitqueue_active(wq_head)) + __wake_up(wq_head, TASK_NORMAL, 1, &key); } EXPORT_SYMBOL(__wake_up_bit); @@ -555,20 +555,20 @@ static int wake_atomic_t_function(struct wait_queue_entry *wq_entry, unsigned mo * return codes halt waiting and return. */ static __sched -int __wait_on_atomic_t(wait_queue_head_t *wq, struct wait_bit_queue *q, +int __wait_on_atomic_t(struct wait_queue_head *wq_head, struct wait_bit_queue *q, int (*action)(atomic_t *), unsigned mode) { atomic_t *val; int ret = 0; do { - prepare_to_wait(wq, &q->wait, mode); + prepare_to_wait(wq_head, &q->wait, mode); val = q->key.flags; if (atomic_read(val) == 0) break; ret = (*action)(val); } while (!ret && atomic_read(val) != 0); - finish_wait(wq, &q->wait); + finish_wait(wq_head, &q->wait); return ret; } @@ -586,10 +586,10 @@ int __wait_on_atomic_t(wait_queue_head_t *wq, struct wait_bit_queue *q, __sched int out_of_line_wait_on_atomic_t(atomic_t *p, int (*action)(atomic_t *), unsigned mode) { - wait_queue_head_t *wq = atomic_t_waitqueue(p); + struct wait_queue_head *wq_head = atomic_t_waitqueue(p); DEFINE_WAIT_ATOMIC_T(wait, p); - return __wait_on_atomic_t(wq, &wait, action, mode); + return __wait_on_atomic_t(wq_head, &wait, action, mode); } EXPORT_SYMBOL(out_of_line_wait_on_atomic_t); -- cgit v1.2.3 From 2141713616c652aeabf2dd5c1e89bc601c4fed6a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 5 Mar 2017 11:25:39 +0100 Subject: sched/wait: Standardize 'struct wait_bit_queue' wait-queue entry field name Rename 'struct wait_bit_queue::wait' to ::wq_entry, to more clearly name it as a wait-queue entry. Propagate it to a couple of usage sites where the wait-bit-queue internals are exposed. Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- fs/inode.c | 8 ++++---- fs/jbd2/journal.c | 4 ++-- fs/xfs/xfs_icache.c | 4 ++-- fs/xfs/xfs_inode.c | 8 ++++---- include/linux/wait.h | 6 +++--- kernel/sched/wait.c | 41 ++++++++++++++++++++--------------------- 6 files changed, 35 insertions(+), 36 deletions(-) (limited to 'kernel') diff --git a/fs/inode.c b/fs/inode.c index db5914783a71..70761d6cafcd 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1891,11 +1891,11 @@ static void __wait_on_freeing_inode(struct inode *inode) wait_queue_head_t *wq; DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW); wq = bit_waitqueue(&inode->i_state, __I_NEW); - prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE); + prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE); spin_unlock(&inode->i_lock); spin_unlock(&inode_hash_lock); schedule(); - finish_wait(wq, &wait.wait); + finish_wait(wq, &wait.wq_entry); spin_lock(&inode_hash_lock); } @@ -2038,11 +2038,11 @@ static void __inode_dio_wait(struct inode *inode) DEFINE_WAIT_BIT(q, &inode->i_state, __I_DIO_WAKEUP); do { - prepare_to_wait(wq, &q.wait, TASK_UNINTERRUPTIBLE); + prepare_to_wait(wq, &q.wq_entry, TASK_UNINTERRUPTIBLE); if (atomic_read(&inode->i_dio_count)) schedule(); } while (atomic_read(&inode->i_dio_count)); - finish_wait(wq, &q.wait); + finish_wait(wq, &q.wq_entry); } /** diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index ebad34266bcf..7d5ef3bf3f3e 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -2579,10 +2579,10 @@ restart: wait_queue_head_t *wq; DEFINE_WAIT_BIT(wait, &jinode->i_flags, __JI_COMMIT_RUNNING); wq = bit_waitqueue(&jinode->i_flags, __JI_COMMIT_RUNNING); - prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE); + prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE); spin_unlock(&journal->j_list_lock); schedule(); - finish_wait(wq, &wait.wait); + finish_wait(wq, &wait.wq_entry); goto restart; } diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 990210fcb9c3..b9c12e1cc23a 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -269,12 +269,12 @@ xfs_inew_wait( DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_INEW_BIT); do { - prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE); + prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE); if (!xfs_iflags_test(ip, XFS_INEW)) break; schedule(); } while (true); - finish_wait(wq, &wait.wait); + finish_wait(wq, &wait.wq_entry); } /* diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index ec9826c56500..c0a1e840a588 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -622,12 +622,12 @@ __xfs_iflock( DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_IFLOCK_BIT); do { - prepare_to_wait_exclusive(wq, &wait.wait, TASK_UNINTERRUPTIBLE); + prepare_to_wait_exclusive(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE); if (xfs_isiflocked(ip)) io_schedule(); } while (!xfs_iflock_nowait(ip)); - finish_wait(wq, &wait.wait); + finish_wait(wq, &wait.wq_entry); } STATIC uint @@ -2486,11 +2486,11 @@ __xfs_iunpin_wait( xfs_iunpin(ip); do { - prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE); + prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE); if (xfs_ipincount(ip)) io_schedule(); } while (xfs_ipincount(ip)); - finish_wait(wq, &wait.wait); + finish_wait(wq, &wait.wq_entry); } void diff --git a/include/linux/wait.h b/include/linux/wait.h index c3d1cefc7853..1c8add685f22 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -38,7 +38,7 @@ struct wait_bit_key { struct wait_bit_queue { struct wait_bit_key key; - struct wait_queue_entry wait; + struct wait_queue_entry wq_entry; }; struct wait_queue_head { @@ -991,11 +991,11 @@ int wake_bit_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync #define DEFINE_WAIT_BIT(name, word, bit) \ struct wait_bit_queue name = { \ .key = __WAIT_BIT_KEY_INITIALIZER(word, bit), \ - .wait = { \ + .wq_entry = { \ .private = current, \ .func = wake_bit_function, \ .task_list = \ - LIST_HEAD_INIT((name).wait.task_list), \ + LIST_HEAD_INIT((name).wq_entry.task_list), \ }, \ } diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c index 203aeea96f16..f1ba0625b8be 100644 --- a/kernel/sched/wait.c +++ b/kernel/sched/wait.c @@ -395,7 +395,7 @@ int wake_bit_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync { struct wait_bit_key *key = arg; struct wait_bit_queue *wait_bit - = container_of(wq_entry, struct wait_bit_queue, wait); + = container_of(wq_entry, struct wait_bit_queue, wq_entry); if (wait_bit->key.flags != key->flags || wait_bit->key.bit_nr != key->bit_nr || @@ -418,11 +418,11 @@ __wait_on_bit(struct wait_queue_head *wq_head, struct wait_bit_queue *q, int ret = 0; do { - prepare_to_wait(wq_head, &q->wait, mode); + prepare_to_wait(wq_head, &q->wq_entry, mode); if (test_bit(q->key.bit_nr, q->key.flags)) ret = (*action)(&q->key, mode); } while (test_bit(q->key.bit_nr, q->key.flags) && !ret); - finish_wait(wq_head, &q->wait); + finish_wait(wq_head, &q->wq_entry); return ret; } EXPORT_SYMBOL(__wait_on_bit); @@ -431,9 +431,9 @@ int __sched out_of_line_wait_on_bit(void *word, int bit, wait_bit_action_f *action, unsigned mode) { struct wait_queue_head *wq_head = bit_waitqueue(word, bit); - DEFINE_WAIT_BIT(wait, word, bit); + DEFINE_WAIT_BIT(wq_entry, word, bit); - return __wait_on_bit(wq_head, &wait, action, mode); + return __wait_on_bit(wq_head, &wq_entry, action, mode); } EXPORT_SYMBOL(out_of_line_wait_on_bit); @@ -442,10 +442,10 @@ int __sched out_of_line_wait_on_bit_timeout( unsigned mode, unsigned long timeout) { struct wait_queue_head *wq_head = bit_waitqueue(word, bit); - DEFINE_WAIT_BIT(wait, word, bit); + DEFINE_WAIT_BIT(wq_entry, word, bit); - wait.key.timeout = jiffies + timeout; - return __wait_on_bit(wq_head, &wait, action, mode); + wq_entry.key.timeout = jiffies + timeout; + return __wait_on_bit(wq_head, &wq_entry, action, mode); } EXPORT_SYMBOL_GPL(out_of_line_wait_on_bit_timeout); @@ -456,7 +456,7 @@ __wait_on_bit_lock(struct wait_queue_head *wq_head, struct wait_bit_queue *q, int ret = 0; for (;;) { - prepare_to_wait_exclusive(wq_head, &q->wait, mode); + prepare_to_wait_exclusive(wq_head, &q->wq_entry, mode); if (test_bit(q->key.bit_nr, q->key.flags)) { ret = action(&q->key, mode); /* @@ -466,11 +466,11 @@ __wait_on_bit_lock(struct wait_queue_head *wq_head, struct wait_bit_queue *q, * smp_mb__after_atomic() before wake_up_page(). */ if (ret) - finish_wait(wq_head, &q->wait); + finish_wait(wq_head, &q->wq_entry); } if (!test_and_set_bit(q->key.bit_nr, q->key.flags)) { if (!ret) - finish_wait(wq_head, &q->wait); + finish_wait(wq_head, &q->wq_entry); return 0; } else if (ret) { return ret; @@ -483,9 +483,9 @@ int __sched out_of_line_wait_on_bit_lock(void *word, int bit, wait_bit_action_f *action, unsigned mode) { struct wait_queue_head *wq_head = bit_waitqueue(word, bit); - DEFINE_WAIT_BIT(wait, word, bit); + DEFINE_WAIT_BIT(wq_entry, word, bit); - return __wait_on_bit_lock(wq_head, &wait, action, mode); + return __wait_on_bit_lock(wq_head, &wq_entry, action, mode); } EXPORT_SYMBOL(out_of_line_wait_on_bit_lock); @@ -538,8 +538,7 @@ static int wake_atomic_t_function(struct wait_queue_entry *wq_entry, unsigned mo void *arg) { struct wait_bit_key *key = arg; - struct wait_bit_queue *wait_bit - = container_of(wq_entry, struct wait_bit_queue, wait); + struct wait_bit_queue *wait_bit = container_of(wq_entry, struct wait_bit_queue, wq_entry); atomic_t *val = key->flags; if (wait_bit->key.flags != key->flags || @@ -562,24 +561,24 @@ int __wait_on_atomic_t(struct wait_queue_head *wq_head, struct wait_bit_queue *q int ret = 0; do { - prepare_to_wait(wq_head, &q->wait, mode); + prepare_to_wait(wq_head, &q->wq_entry, mode); val = q->key.flags; if (atomic_read(val) == 0) break; ret = (*action)(val); } while (!ret && atomic_read(val) != 0); - finish_wait(wq_head, &q->wait); + finish_wait(wq_head, &q->wq_entry); return ret; } #define DEFINE_WAIT_ATOMIC_T(name, p) \ struct wait_bit_queue name = { \ .key = __WAIT_ATOMIC_T_KEY_INITIALIZER(p), \ - .wait = { \ + .wq_entry = { \ .private = current, \ .func = wake_atomic_t_function, \ .task_list = \ - LIST_HEAD_INIT((name).wait.task_list), \ + LIST_HEAD_INIT((name).wq_entry.task_list), \ }, \ } @@ -587,9 +586,9 @@ __sched int out_of_line_wait_on_atomic_t(atomic_t *p, int (*action)(atomic_t *), unsigned mode) { struct wait_queue_head *wq_head = atomic_t_waitqueue(p); - DEFINE_WAIT_ATOMIC_T(wait, p); + DEFINE_WAIT_ATOMIC_T(wq_entry, p); - return __wait_on_atomic_t(wq_head, &wait, action, mode); + return __wait_on_atomic_t(wq_head, &wq_entry, action, mode); } EXPORT_SYMBOL(out_of_line_wait_on_atomic_t); -- cgit v1.2.3 From 76c85ddc4695bb7b8209bfeff11f5156088f9197 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 5 Mar 2017 11:35:27 +0100 Subject: sched/wait: Standardize wait_bit_queue naming So wait-bit-queue head variables are often named: struct wait_bit_queue *q ... which is a bit ambiguous and super confusing, because they clearly suggest wait-queue head semantics and behavior (they rhyme with the old wait_queue_t *q naming), while they are extended wait-queue _entries_, not heads! They are misnomers in two ways: - the 'wait_bit_queue' leaves open the question of whether it's an entry or a head - the 'q' parameter and local variable naming falsely implies that it's a 'queue' - while it's an entry. This resulted in sometimes confusing cases such as: finish_wait(wq, &q->wait); where the 'q' is not a wait-queue head, but a wait-bit-queue entry. So improve this all by standardizing wait-bit-queue nomenclature similar to wait-queue head naming: struct wait_bit_queue => struct wait_bit_queue_entry q => wbq_entry Which makes it all a much clearer: struct wait_bit_queue_entry *wbq_entry ... and turns the former confusing piece of code into: finish_wait(wq_head, &wbq_entry->wq_entry; which IMHO makes it apparently clear what we are doing, without having to analyze the context of the code: we are adding a wait-queue entry to a regular wait-queue head, which entry is embedded in a wait-bit-queue entry. I'm not a big fan of acronyms, but repeating wait_bit_queue_entry in field and local variable names is too long, so Hopefully it's clear enough that 'wq_' prefixes stand for wait-queues, while 'wbq_' prefixes stand for wait-bit-queues. Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/wait.h | 8 ++++---- kernel/sched/wait.c | 41 ++++++++++++++++++++--------------------- 2 files changed, 24 insertions(+), 25 deletions(-) (limited to 'kernel') diff --git a/include/linux/wait.h b/include/linux/wait.h index 1c8add685f22..fc7c32d82120 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -36,7 +36,7 @@ struct wait_bit_key { unsigned long timeout; }; -struct wait_bit_queue { +struct wait_bit_queue_entry { struct wait_bit_key key; struct wait_queue_entry wq_entry; }; @@ -207,8 +207,8 @@ void __wake_up_sync_key(struct wait_queue_head *wq_head, unsigned int mode, int void __wake_up_locked(struct wait_queue_head *wq_head, unsigned int mode, int nr); void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode, int nr); void __wake_up_bit(struct wait_queue_head *, void *, int); -int __wait_on_bit(struct wait_queue_head *, struct wait_bit_queue *, wait_bit_action_f *, unsigned); -int __wait_on_bit_lock(struct wait_queue_head *, struct wait_bit_queue *, wait_bit_action_f *, unsigned); +int __wait_on_bit(struct wait_queue_head *, struct wait_bit_queue_entry *, wait_bit_action_f *, unsigned); +int __wait_on_bit_lock(struct wait_queue_head *, struct wait_bit_queue_entry *, wait_bit_action_f *, unsigned); void wake_up_bit(void *, int); void wake_up_atomic_t(atomic_t *); int out_of_line_wait_on_bit(void *, int, wait_bit_action_f *, unsigned); @@ -989,7 +989,7 @@ int wake_bit_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync #define DEFINE_WAIT(name) DEFINE_WAIT_FUNC(name, autoremove_wake_function) #define DEFINE_WAIT_BIT(name, word, bit) \ - struct wait_bit_queue name = { \ + struct wait_bit_queue_entry name = { \ .key = __WAIT_BIT_KEY_INITIALIZER(word, bit), \ .wq_entry = { \ .private = current, \ diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c index f1ba0625b8be..95e6d3820cba 100644 --- a/kernel/sched/wait.c +++ b/kernel/sched/wait.c @@ -394,8 +394,7 @@ EXPORT_SYMBOL(woken_wake_function); int wake_bit_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *arg) { struct wait_bit_key *key = arg; - struct wait_bit_queue *wait_bit - = container_of(wq_entry, struct wait_bit_queue, wq_entry); + struct wait_bit_queue_entry *wait_bit = container_of(wq_entry, struct wait_bit_queue_entry, wq_entry); if (wait_bit->key.flags != key->flags || wait_bit->key.bit_nr != key->bit_nr || @@ -412,17 +411,17 @@ EXPORT_SYMBOL(wake_bit_function); * permitted return codes. Nonzero return codes halt waiting and return. */ int __sched -__wait_on_bit(struct wait_queue_head *wq_head, struct wait_bit_queue *q, +__wait_on_bit(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry, wait_bit_action_f *action, unsigned mode) { int ret = 0; do { - prepare_to_wait(wq_head, &q->wq_entry, mode); - if (test_bit(q->key.bit_nr, q->key.flags)) - ret = (*action)(&q->key, mode); - } while (test_bit(q->key.bit_nr, q->key.flags) && !ret); - finish_wait(wq_head, &q->wq_entry); + prepare_to_wait(wq_head, &wbq_entry->wq_entry, mode); + if (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags)) + ret = (*action)(&wbq_entry->key, mode); + } while (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags) && !ret); + finish_wait(wq_head, &wbq_entry->wq_entry); return ret; } EXPORT_SYMBOL(__wait_on_bit); @@ -450,15 +449,15 @@ int __sched out_of_line_wait_on_bit_timeout( EXPORT_SYMBOL_GPL(out_of_line_wait_on_bit_timeout); int __sched -__wait_on_bit_lock(struct wait_queue_head *wq_head, struct wait_bit_queue *q, +__wait_on_bit_lock(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry, wait_bit_action_f *action, unsigned mode) { int ret = 0; for (;;) { - prepare_to_wait_exclusive(wq_head, &q->wq_entry, mode); - if (test_bit(q->key.bit_nr, q->key.flags)) { - ret = action(&q->key, mode); + prepare_to_wait_exclusive(wq_head, &wbq_entry->wq_entry, mode); + if (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags)) { + ret = action(&wbq_entry->key, mode); /* * See the comment in prepare_to_wait_event(). * finish_wait() does not necessarily takes wwq_head->lock, @@ -466,11 +465,11 @@ __wait_on_bit_lock(struct wait_queue_head *wq_head, struct wait_bit_queue *q, * smp_mb__after_atomic() before wake_up_page(). */ if (ret) - finish_wait(wq_head, &q->wq_entry); + finish_wait(wq_head, &wbq_entry->wq_entry); } - if (!test_and_set_bit(q->key.bit_nr, q->key.flags)) { + if (!test_and_set_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags)) { if (!ret) - finish_wait(wq_head, &q->wq_entry); + finish_wait(wq_head, &wbq_entry->wq_entry); return 0; } else if (ret) { return ret; @@ -538,7 +537,7 @@ static int wake_atomic_t_function(struct wait_queue_entry *wq_entry, unsigned mo void *arg) { struct wait_bit_key *key = arg; - struct wait_bit_queue *wait_bit = container_of(wq_entry, struct wait_bit_queue, wq_entry); + struct wait_bit_queue_entry *wait_bit = container_of(wq_entry, struct wait_bit_queue_entry, wq_entry); atomic_t *val = key->flags; if (wait_bit->key.flags != key->flags || @@ -554,25 +553,25 @@ static int wake_atomic_t_function(struct wait_queue_entry *wq_entry, unsigned mo * return codes halt waiting and return. */ static __sched -int __wait_on_atomic_t(struct wait_queue_head *wq_head, struct wait_bit_queue *q, +int __wait_on_atomic_t(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry, int (*action)(atomic_t *), unsigned mode) { atomic_t *val; int ret = 0; do { - prepare_to_wait(wq_head, &q->wq_entry, mode); - val = q->key.flags; + prepare_to_wait(wq_head, &wbq_entry->wq_entry, mode); + val = wbq_entry->key.flags; if (atomic_read(val) == 0) break; ret = (*action)(val); } while (!ret && atomic_read(val) != 0); - finish_wait(wq_head, &q->wq_entry); + finish_wait(wq_head, &wbq_entry->wq_entry); return ret; } #define DEFINE_WAIT_ATOMIC_T(name, p) \ - struct wait_bit_queue name = { \ + struct wait_bit_queue_entry name = { \ .key = __WAIT_ATOMIC_T_KEY_INITIALIZER(p), \ .wq_entry = { \ .private = current, \ -- cgit v1.2.3 From 5dd43ce2f69d42a71dcacdb13d17d8c0ac1fe8f7 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 20 Jun 2017 12:19:09 +0200 Subject: sched/wait: Split out the wait_bit*() APIs from into The wait_bit*() types and APIs are mixed into wait.h, but they are a pretty orthogonal extension of wait-queues. Furthermore, only about 50 kernel files use these APIs, while over 1000 use the regular wait-queue functionality. So clean up the main wait.h by moving the wait-bit functionality out of it, into a separate .h and .c file: include/linux/wait_bit.h for types and APIs kernel/sched/wait_bit.c for the implementation Update all header dependencies. This reduces the size of wait.h rather significantly, by about 30%. Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- fs/cachefiles/internal.h | 2 +- fs/cifs/inode.c | 1 + fs/nfs/internal.h | 1 + include/linux/fs.h | 2 +- include/linux/sunrpc/sched.h | 2 +- include/linux/wait.h | 250 ---------------------------------------- include/linux/wait_bit.h | 260 ++++++++++++++++++++++++++++++++++++++++++ kernel/sched/Makefile | 2 +- kernel/sched/wait.c | 257 ------------------------------------------ kernel/sched/wait_bit.c | 263 +++++++++++++++++++++++++++++++++++++++++++ security/keys/internal.h | 1 + 11 files changed, 530 insertions(+), 511 deletions(-) create mode 100644 include/linux/wait_bit.h create mode 100644 kernel/sched/wait_bit.c (limited to 'kernel') diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h index 54a4fcd679ed..bb3a02ca9da4 100644 --- a/fs/cachefiles/internal.h +++ b/fs/cachefiles/internal.h @@ -18,7 +18,7 @@ #include #include -#include +#include #include #include #include diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 4d1fcd76d022..a8693632235f 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include "cifsfs.h" diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 3e24392f2caa..8701d7617964 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -7,6 +7,7 @@ #include #include #include +#include #define NFS_MS_MASK (MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_SYNCHRONOUS) diff --git a/include/linux/fs.h b/include/linux/fs.h index 803e5a9b2654..53f7e49d8fe5 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2,7 +2,7 @@ #define _LINUX_FS_H #include -#include +#include #include #include #include diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 7ba040c797ec..9d7529ffc4ce 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include diff --git a/include/linux/wait.h b/include/linux/wait.h index 0805098f3589..629489746f8a 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -29,18 +29,6 @@ struct wait_queue_entry { struct list_head task_list; }; -struct wait_bit_key { - void *flags; - int bit_nr; -#define WAIT_ATOMIC_T_BIT_NR -1 - unsigned long timeout; -}; - -struct wait_bit_queue_entry { - struct wait_bit_key key; - struct wait_queue_entry wq_entry; -}; - struct wait_queue_head { spinlock_t lock; struct list_head task_list; @@ -68,12 +56,6 @@ struct task_struct; #define DECLARE_WAIT_QUEUE_HEAD(name) \ struct wait_queue_head name = __WAIT_QUEUE_HEAD_INITIALIZER(name) -#define __WAIT_BIT_KEY_INITIALIZER(word, bit) \ - { .flags = word, .bit_nr = bit, } - -#define __WAIT_ATOMIC_T_KEY_INITIALIZER(p) \ - { .flags = p, .bit_nr = WAIT_ATOMIC_T_BIT_NR, } - extern void __init_waitqueue_head(struct wait_queue_head *wq_head, const char *name, struct lock_class_key *); #define init_waitqueue_head(wq_head) \ @@ -200,22 +182,11 @@ __remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq list_del(&wq_entry->task_list); } -typedef int wait_bit_action_f(struct wait_bit_key *key, int mode); void __wake_up(struct wait_queue_head *wq_head, unsigned int mode, int nr, void *key); void __wake_up_locked_key(struct wait_queue_head *wq_head, unsigned int mode, void *key); void __wake_up_sync_key(struct wait_queue_head *wq_head, unsigned int mode, int nr, void *key); void __wake_up_locked(struct wait_queue_head *wq_head, unsigned int mode, int nr); void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode, int nr); -void __wake_up_bit(struct wait_queue_head *wq_head, void *word, int bit); -int __wait_on_bit(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry, wait_bit_action_f *action, unsigned int mode); -int __wait_on_bit_lock(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry, wait_bit_action_f *action, unsigned int mode); -void wake_up_bit(void *word, int bit); -void wake_up_atomic_t(atomic_t *p); -int out_of_line_wait_on_bit(void *word, int, wait_bit_action_f *action, unsigned int mode); -int out_of_line_wait_on_bit_timeout(void *word, int, wait_bit_action_f *action, unsigned int mode, unsigned long timeout); -int out_of_line_wait_on_bit_lock(void *word, int, wait_bit_action_f *action, unsigned int mode); -int out_of_line_wait_on_atomic_t(atomic_t *p, int (*)(atomic_t *), unsigned int mode); -struct wait_queue_head *bit_waitqueue(void *word, int bit); #define wake_up(x) __wake_up(x, TASK_NORMAL, 1, NULL) #define wake_up_nr(x, nr) __wake_up(x, TASK_NORMAL, nr, NULL) @@ -976,7 +947,6 @@ void finish_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_en long wait_woken(struct wait_queue_entry *wq_entry, unsigned mode, long timeout); int woken_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key); int autoremove_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key); -int wake_bit_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key); #define DEFINE_WAIT_FUNC(name, function) \ struct wait_queue_entry name = { \ @@ -987,17 +957,6 @@ int wake_bit_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync #define DEFINE_WAIT(name) DEFINE_WAIT_FUNC(name, autoremove_wake_function) -#define DEFINE_WAIT_BIT(name, word, bit) \ - struct wait_bit_queue_entry name = { \ - .key = __WAIT_BIT_KEY_INITIALIZER(word, bit), \ - .wq_entry = { \ - .private = current, \ - .func = wake_bit_function, \ - .task_list = \ - LIST_HEAD_INIT((name).wq_entry.task_list), \ - }, \ - } - #define init_wait(wait) \ do { \ (wait)->private = current; \ @@ -1006,213 +965,4 @@ int wake_bit_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync (wait)->flags = 0; \ } while (0) - -extern int bit_wait(struct wait_bit_key *key, int bit); -extern int bit_wait_io(struct wait_bit_key *key, int bit); -extern int bit_wait_timeout(struct wait_bit_key *key, int bit); -extern int bit_wait_io_timeout(struct wait_bit_key *key, int bit); - -/** - * wait_on_bit - wait for a bit to be cleared - * @word: the word being waited on, a kernel virtual address - * @bit: the bit of the word being waited on - * @mode: the task state to sleep in - * - * There is a standard hashed waitqueue table for generic use. This - * is the part of the hashtable's accessor API that waits on a bit. - * For instance, if one were to have waiters on a bitflag, one would - * call wait_on_bit() in threads waiting for the bit to clear. - * One uses wait_on_bit() where one is waiting for the bit to clear, - * but has no intention of setting it. - * Returned value will be zero if the bit was cleared, or non-zero - * if the process received a signal and the mode permitted wakeup - * on that signal. - */ -static inline int -wait_on_bit(unsigned long *word, int bit, unsigned mode) -{ - might_sleep(); - if (!test_bit(bit, word)) - return 0; - return out_of_line_wait_on_bit(word, bit, - bit_wait, - mode); -} - -/** - * wait_on_bit_io - wait for a bit to be cleared - * @word: the word being waited on, a kernel virtual address - * @bit: the bit of the word being waited on - * @mode: the task state to sleep in - * - * Use the standard hashed waitqueue table to wait for a bit - * to be cleared. This is similar to wait_on_bit(), but calls - * io_schedule() instead of schedule() for the actual waiting. - * - * Returned value will be zero if the bit was cleared, or non-zero - * if the process received a signal and the mode permitted wakeup - * on that signal. - */ -static inline int -wait_on_bit_io(unsigned long *word, int bit, unsigned mode) -{ - might_sleep(); - if (!test_bit(bit, word)) - return 0; - return out_of_line_wait_on_bit(word, bit, - bit_wait_io, - mode); -} - -/** - * wait_on_bit_timeout - wait for a bit to be cleared or a timeout elapses - * @word: the word being waited on, a kernel virtual address - * @bit: the bit of the word being waited on - * @mode: the task state to sleep in - * @timeout: timeout, in jiffies - * - * Use the standard hashed waitqueue table to wait for a bit - * to be cleared. This is similar to wait_on_bit(), except also takes a - * timeout parameter. - * - * Returned value will be zero if the bit was cleared before the - * @timeout elapsed, or non-zero if the @timeout elapsed or process - * received a signal and the mode permitted wakeup on that signal. - */ -static inline int -wait_on_bit_timeout(unsigned long *word, int bit, unsigned mode, - unsigned long timeout) -{ - might_sleep(); - if (!test_bit(bit, word)) - return 0; - return out_of_line_wait_on_bit_timeout(word, bit, - bit_wait_timeout, - mode, timeout); -} - -/** - * wait_on_bit_action - wait for a bit to be cleared - * @word: the word being waited on, a kernel virtual address - * @bit: the bit of the word being waited on - * @action: the function used to sleep, which may take special actions - * @mode: the task state to sleep in - * - * Use the standard hashed waitqueue table to wait for a bit - * to be cleared, and allow the waiting action to be specified. - * This is like wait_on_bit() but allows fine control of how the waiting - * is done. - * - * Returned value will be zero if the bit was cleared, or non-zero - * if the process received a signal and the mode permitted wakeup - * on that signal. - */ -static inline int -wait_on_bit_action(unsigned long *word, int bit, wait_bit_action_f *action, - unsigned mode) -{ - might_sleep(); - if (!test_bit(bit, word)) - return 0; - return out_of_line_wait_on_bit(word, bit, action, mode); -} - -/** - * wait_on_bit_lock - wait for a bit to be cleared, when wanting to set it - * @word: the word being waited on, a kernel virtual address - * @bit: the bit of the word being waited on - * @mode: the task state to sleep in - * - * There is a standard hashed waitqueue table for generic use. This - * is the part of the hashtable's accessor API that waits on a bit - * when one intends to set it, for instance, trying to lock bitflags. - * For instance, if one were to have waiters trying to set bitflag - * and waiting for it to clear before setting it, one would call - * wait_on_bit() in threads waiting to be able to set the bit. - * One uses wait_on_bit_lock() where one is waiting for the bit to - * clear with the intention of setting it, and when done, clearing it. - * - * Returns zero if the bit was (eventually) found to be clear and was - * set. Returns non-zero if a signal was delivered to the process and - * the @mode allows that signal to wake the process. - */ -static inline int -wait_on_bit_lock(unsigned long *word, int bit, unsigned mode) -{ - might_sleep(); - if (!test_and_set_bit(bit, word)) - return 0; - return out_of_line_wait_on_bit_lock(word, bit, bit_wait, mode); -} - -/** - * wait_on_bit_lock_io - wait for a bit to be cleared, when wanting to set it - * @word: the word being waited on, a kernel virtual address - * @bit: the bit of the word being waited on - * @mode: the task state to sleep in - * - * Use the standard hashed waitqueue table to wait for a bit - * to be cleared and then to atomically set it. This is similar - * to wait_on_bit(), but calls io_schedule() instead of schedule() - * for the actual waiting. - * - * Returns zero if the bit was (eventually) found to be clear and was - * set. Returns non-zero if a signal was delivered to the process and - * the @mode allows that signal to wake the process. - */ -static inline int -wait_on_bit_lock_io(unsigned long *word, int bit, unsigned mode) -{ - might_sleep(); - if (!test_and_set_bit(bit, word)) - return 0; - return out_of_line_wait_on_bit_lock(word, bit, bit_wait_io, mode); -} - -/** - * wait_on_bit_lock_action - wait for a bit to be cleared, when wanting to set it - * @word: the word being waited on, a kernel virtual address - * @bit: the bit of the word being waited on - * @action: the function used to sleep, which may take special actions - * @mode: the task state to sleep in - * - * Use the standard hashed waitqueue table to wait for a bit - * to be cleared and then to set it, and allow the waiting action - * to be specified. - * This is like wait_on_bit() but allows fine control of how the waiting - * is done. - * - * Returns zero if the bit was (eventually) found to be clear and was - * set. Returns non-zero if a signal was delivered to the process and - * the @mode allows that signal to wake the process. - */ -static inline int -wait_on_bit_lock_action(unsigned long *word, int bit, wait_bit_action_f *action, - unsigned mode) -{ - might_sleep(); - if (!test_and_set_bit(bit, word)) - return 0; - return out_of_line_wait_on_bit_lock(word, bit, action, mode); -} - -/** - * wait_on_atomic_t - Wait for an atomic_t to become 0 - * @val: The atomic value being waited on, a kernel virtual address - * @action: the function used to sleep, which may take special actions - * @mode: the task state to sleep in - * - * Wait for an atomic_t to become 0. We abuse the bit-wait waitqueue table for - * the purpose of getting a waitqueue, but we set the key to a bit number - * outside of the target 'word'. - */ -static inline -int wait_on_atomic_t(atomic_t *val, int (*action)(atomic_t *), unsigned mode) -{ - might_sleep(); - if (atomic_read(val) == 0) - return 0; - return out_of_line_wait_on_atomic_t(val, action, mode); -} - #endif /* _LINUX_WAIT_H */ diff --git a/include/linux/wait_bit.h b/include/linux/wait_bit.h new file mode 100644 index 000000000000..8c85c52d94b6 --- /dev/null +++ b/include/linux/wait_bit.h @@ -0,0 +1,260 @@ +#ifndef _LINUX_WAIT_BIT_H +#define _LINUX_WAIT_BIT_H + +/* + * Linux wait-bit related types and methods: + */ +#include + +struct wait_bit_key { + void *flags; + int bit_nr; +#define WAIT_ATOMIC_T_BIT_NR -1 + unsigned long timeout; +}; + +struct wait_bit_queue_entry { + struct wait_bit_key key; + struct wait_queue_entry wq_entry; +}; + +#define __WAIT_BIT_KEY_INITIALIZER(word, bit) \ + { .flags = word, .bit_nr = bit, } + +#define __WAIT_ATOMIC_T_KEY_INITIALIZER(p) \ + { .flags = p, .bit_nr = WAIT_ATOMIC_T_BIT_NR, } + +typedef int wait_bit_action_f(struct wait_bit_key *key, int mode); +void __wake_up_bit(struct wait_queue_head *wq_head, void *word, int bit); +int __wait_on_bit(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry, wait_bit_action_f *action, unsigned int mode); +int __wait_on_bit_lock(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry, wait_bit_action_f *action, unsigned int mode); +void wake_up_bit(void *word, int bit); +void wake_up_atomic_t(atomic_t *p); +int out_of_line_wait_on_bit(void *word, int, wait_bit_action_f *action, unsigned int mode); +int out_of_line_wait_on_bit_timeout(void *word, int, wait_bit_action_f *action, unsigned int mode, unsigned long timeout); +int out_of_line_wait_on_bit_lock(void *word, int, wait_bit_action_f *action, unsigned int mode); +int out_of_line_wait_on_atomic_t(atomic_t *p, int (*)(atomic_t *), unsigned int mode); +struct wait_queue_head *bit_waitqueue(void *word, int bit); + +int wake_bit_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key); + +#define DEFINE_WAIT_BIT(name, word, bit) \ + struct wait_bit_queue_entry name = { \ + .key = __WAIT_BIT_KEY_INITIALIZER(word, bit), \ + .wq_entry = { \ + .private = current, \ + .func = wake_bit_function, \ + .task_list = \ + LIST_HEAD_INIT((name).wq_entry.task_list), \ + }, \ + } + +extern int bit_wait(struct wait_bit_key *key, int bit); +extern int bit_wait_io(struct wait_bit_key *key, int bit); +extern int bit_wait_timeout(struct wait_bit_key *key, int bit); +extern int bit_wait_io_timeout(struct wait_bit_key *key, int bit); + +/** + * wait_on_bit - wait for a bit to be cleared + * @word: the word being waited on, a kernel virtual address + * @bit: the bit of the word being waited on + * @mode: the task state to sleep in + * + * There is a standard hashed waitqueue table for generic use. This + * is the part of the hashtable's accessor API that waits on a bit. + * For instance, if one were to have waiters on a bitflag, one would + * call wait_on_bit() in threads waiting for the bit to clear. + * One uses wait_on_bit() where one is waiting for the bit to clear, + * but has no intention of setting it. + * Returned value will be zero if the bit was cleared, or non-zero + * if the process received a signal and the mode permitted wakeup + * on that signal. + */ +static inline int +wait_on_bit(unsigned long *word, int bit, unsigned mode) +{ + might_sleep(); + if (!test_bit(bit, word)) + return 0; + return out_of_line_wait_on_bit(word, bit, + bit_wait, + mode); +} + +/** + * wait_on_bit_io - wait for a bit to be cleared + * @word: the word being waited on, a kernel virtual address + * @bit: the bit of the word being waited on + * @mode: the task state to sleep in + * + * Use the standard hashed waitqueue table to wait for a bit + * to be cleared. This is similar to wait_on_bit(), but calls + * io_schedule() instead of schedule() for the actual waiting. + * + * Returned value will be zero if the bit was cleared, or non-zero + * if the process received a signal and the mode permitted wakeup + * on that signal. + */ +static inline int +wait_on_bit_io(unsigned long *word, int bit, unsigned mode) +{ + might_sleep(); + if (!test_bit(bit, word)) + return 0; + return out_of_line_wait_on_bit(word, bit, + bit_wait_io, + mode); +} + +/** + * wait_on_bit_timeout - wait for a bit to be cleared or a timeout elapses + * @word: the word being waited on, a kernel virtual address + * @bit: the bit of the word being waited on + * @mode: the task state to sleep in + * @timeout: timeout, in jiffies + * + * Use the standard hashed waitqueue table to wait for a bit + * to be cleared. This is similar to wait_on_bit(), except also takes a + * timeout parameter. + * + * Returned value will be zero if the bit was cleared before the + * @timeout elapsed, or non-zero if the @timeout elapsed or process + * received a signal and the mode permitted wakeup on that signal. + */ +static inline int +wait_on_bit_timeout(unsigned long *word, int bit, unsigned mode, + unsigned long timeout) +{ + might_sleep(); + if (!test_bit(bit, word)) + return 0; + return out_of_line_wait_on_bit_timeout(word, bit, + bit_wait_timeout, + mode, timeout); +} + +/** + * wait_on_bit_action - wait for a bit to be cleared + * @word: the word being waited on, a kernel virtual address + * @bit: the bit of the word being waited on + * @action: the function used to sleep, which may take special actions + * @mode: the task state to sleep in + * + * Use the standard hashed waitqueue table to wait for a bit + * to be cleared, and allow the waiting action to be specified. + * This is like wait_on_bit() but allows fine control of how the waiting + * is done. + * + * Returned value will be zero if the bit was cleared, or non-zero + * if the process received a signal and the mode permitted wakeup + * on that signal. + */ +static inline int +wait_on_bit_action(unsigned long *word, int bit, wait_bit_action_f *action, + unsigned mode) +{ + might_sleep(); + if (!test_bit(bit, word)) + return 0; + return out_of_line_wait_on_bit(word, bit, action, mode); +} + +/** + * wait_on_bit_lock - wait for a bit to be cleared, when wanting to set it + * @word: the word being waited on, a kernel virtual address + * @bit: the bit of the word being waited on + * @mode: the task state to sleep in + * + * There is a standard hashed waitqueue table for generic use. This + * is the part of the hashtable's accessor API that waits on a bit + * when one intends to set it, for instance, trying to lock bitflags. + * For instance, if one were to have waiters trying to set bitflag + * and waiting for it to clear before setting it, one would call + * wait_on_bit() in threads waiting to be able to set the bit. + * One uses wait_on_bit_lock() where one is waiting for the bit to + * clear with the intention of setting it, and when done, clearing it. + * + * Returns zero if the bit was (eventually) found to be clear and was + * set. Returns non-zero if a signal was delivered to the process and + * the @mode allows that signal to wake the process. + */ +static inline int +wait_on_bit_lock(unsigned long *word, int bit, unsigned mode) +{ + might_sleep(); + if (!test_and_set_bit(bit, word)) + return 0; + return out_of_line_wait_on_bit_lock(word, bit, bit_wait, mode); +} + +/** + * wait_on_bit_lock_io - wait for a bit to be cleared, when wanting to set it + * @word: the word being waited on, a kernel virtual address + * @bit: the bit of the word being waited on + * @mode: the task state to sleep in + * + * Use the standard hashed waitqueue table to wait for a bit + * to be cleared and then to atomically set it. This is similar + * to wait_on_bit(), but calls io_schedule() instead of schedule() + * for the actual waiting. + * + * Returns zero if the bit was (eventually) found to be clear and was + * set. Returns non-zero if a signal was delivered to the process and + * the @mode allows that signal to wake the process. + */ +static inline int +wait_on_bit_lock_io(unsigned long *word, int bit, unsigned mode) +{ + might_sleep(); + if (!test_and_set_bit(bit, word)) + return 0; + return out_of_line_wait_on_bit_lock(word, bit, bit_wait_io, mode); +} + +/** + * wait_on_bit_lock_action - wait for a bit to be cleared, when wanting to set it + * @word: the word being waited on, a kernel virtual address + * @bit: the bit of the word being waited on + * @action: the function used to sleep, which may take special actions + * @mode: the task state to sleep in + * + * Use the standard hashed waitqueue table to wait for a bit + * to be cleared and then to set it, and allow the waiting action + * to be specified. + * This is like wait_on_bit() but allows fine control of how the waiting + * is done. + * + * Returns zero if the bit was (eventually) found to be clear and was + * set. Returns non-zero if a signal was delivered to the process and + * the @mode allows that signal to wake the process. + */ +static inline int +wait_on_bit_lock_action(unsigned long *word, int bit, wait_bit_action_f *action, + unsigned mode) +{ + might_sleep(); + if (!test_and_set_bit(bit, word)) + return 0; + return out_of_line_wait_on_bit_lock(word, bit, action, mode); +} + +/** + * wait_on_atomic_t - Wait for an atomic_t to become 0 + * @val: The atomic value being waited on, a kernel virtual address + * @action: the function used to sleep, which may take special actions + * @mode: the task state to sleep in + * + * Wait for an atomic_t to become 0. We abuse the bit-wait waitqueue table for + * the purpose of getting a waitqueue, but we set the key to a bit number + * outside of the target 'word'. + */ +static inline +int wait_on_atomic_t(atomic_t *val, int (*action)(atomic_t *), unsigned mode) +{ + might_sleep(); + if (atomic_read(val) == 0) + return 0; + return out_of_line_wait_on_atomic_t(val, action, mode); +} + +#endif /* _LINUX_WAIT_BIT_H */ diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile index 89ab6758667b..16277e2ed8ee 100644 --- a/kernel/sched/Makefile +++ b/kernel/sched/Makefile @@ -17,7 +17,7 @@ endif obj-y += core.o loadavg.o clock.o cputime.o obj-y += idle_task.o fair.o rt.o deadline.o stop_task.o -obj-y += wait.o swait.o completion.o idle.o +obj-y += wait.o wait_bit.o swait.o completion.o idle.o obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o obj-$(CONFIG_SCHEDSTATS) += stats.o diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c index 95e6d3820cba..6bcd7c3c4501 100644 --- a/kernel/sched/wait.c +++ b/kernel/sched/wait.c @@ -390,260 +390,3 @@ int woken_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int sy return default_wake_function(wq_entry, mode, sync, key); } EXPORT_SYMBOL(woken_wake_function); - -int wake_bit_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *arg) -{ - struct wait_bit_key *key = arg; - struct wait_bit_queue_entry *wait_bit = container_of(wq_entry, struct wait_bit_queue_entry, wq_entry); - - if (wait_bit->key.flags != key->flags || - wait_bit->key.bit_nr != key->bit_nr || - test_bit(key->bit_nr, key->flags)) - return 0; - else - return autoremove_wake_function(wq_entry, mode, sync, key); -} -EXPORT_SYMBOL(wake_bit_function); - -/* - * To allow interruptible waiting and asynchronous (i.e. nonblocking) - * waiting, the actions of __wait_on_bit() and __wait_on_bit_lock() are - * permitted return codes. Nonzero return codes halt waiting and return. - */ -int __sched -__wait_on_bit(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry, - wait_bit_action_f *action, unsigned mode) -{ - int ret = 0; - - do { - prepare_to_wait(wq_head, &wbq_entry->wq_entry, mode); - if (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags)) - ret = (*action)(&wbq_entry->key, mode); - } while (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags) && !ret); - finish_wait(wq_head, &wbq_entry->wq_entry); - return ret; -} -EXPORT_SYMBOL(__wait_on_bit); - -int __sched out_of_line_wait_on_bit(void *word, int bit, - wait_bit_action_f *action, unsigned mode) -{ - struct wait_queue_head *wq_head = bit_waitqueue(word, bit); - DEFINE_WAIT_BIT(wq_entry, word, bit); - - return __wait_on_bit(wq_head, &wq_entry, action, mode); -} -EXPORT_SYMBOL(out_of_line_wait_on_bit); - -int __sched out_of_line_wait_on_bit_timeout( - void *word, int bit, wait_bit_action_f *action, - unsigned mode, unsigned long timeout) -{ - struct wait_queue_head *wq_head = bit_waitqueue(word, bit); - DEFINE_WAIT_BIT(wq_entry, word, bit); - - wq_entry.key.timeout = jiffies + timeout; - return __wait_on_bit(wq_head, &wq_entry, action, mode); -} -EXPORT_SYMBOL_GPL(out_of_line_wait_on_bit_timeout); - -int __sched -__wait_on_bit_lock(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry, - wait_bit_action_f *action, unsigned mode) -{ - int ret = 0; - - for (;;) { - prepare_to_wait_exclusive(wq_head, &wbq_entry->wq_entry, mode); - if (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags)) { - ret = action(&wbq_entry->key, mode); - /* - * See the comment in prepare_to_wait_event(). - * finish_wait() does not necessarily takes wwq_head->lock, - * but test_and_set_bit() implies mb() which pairs with - * smp_mb__after_atomic() before wake_up_page(). - */ - if (ret) - finish_wait(wq_head, &wbq_entry->wq_entry); - } - if (!test_and_set_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags)) { - if (!ret) - finish_wait(wq_head, &wbq_entry->wq_entry); - return 0; - } else if (ret) { - return ret; - } - } -} -EXPORT_SYMBOL(__wait_on_bit_lock); - -int __sched out_of_line_wait_on_bit_lock(void *word, int bit, - wait_bit_action_f *action, unsigned mode) -{ - struct wait_queue_head *wq_head = bit_waitqueue(word, bit); - DEFINE_WAIT_BIT(wq_entry, word, bit); - - return __wait_on_bit_lock(wq_head, &wq_entry, action, mode); -} -EXPORT_SYMBOL(out_of_line_wait_on_bit_lock); - -void __wake_up_bit(struct wait_queue_head *wq_head, void *word, int bit) -{ - struct wait_bit_key key = __WAIT_BIT_KEY_INITIALIZER(word, bit); - if (waitqueue_active(wq_head)) - __wake_up(wq_head, TASK_NORMAL, 1, &key); -} -EXPORT_SYMBOL(__wake_up_bit); - -/** - * wake_up_bit - wake up a waiter on a bit - * @word: the word being waited on, a kernel virtual address - * @bit: the bit of the word being waited on - * - * There is a standard hashed waitqueue table for generic use. This - * is the part of the hashtable's accessor API that wakes up waiters - * on a bit. For instance, if one were to have waiters on a bitflag, - * one would call wake_up_bit() after clearing the bit. - * - * In order for this to function properly, as it uses waitqueue_active() - * internally, some kind of memory barrier must be done prior to calling - * this. Typically, this will be smp_mb__after_atomic(), but in some - * cases where bitflags are manipulated non-atomically under a lock, one - * may need to use a less regular barrier, such fs/inode.c's smp_mb(), - * because spin_unlock() does not guarantee a memory barrier. - */ -void wake_up_bit(void *word, int bit) -{ - __wake_up_bit(bit_waitqueue(word, bit), word, bit); -} -EXPORT_SYMBOL(wake_up_bit); - -/* - * Manipulate the atomic_t address to produce a better bit waitqueue table hash - * index (we're keying off bit -1, but that would produce a horrible hash - * value). - */ -static inline wait_queue_head_t *atomic_t_waitqueue(atomic_t *p) -{ - if (BITS_PER_LONG == 64) { - unsigned long q = (unsigned long)p; - return bit_waitqueue((void *)(q & ~1), q & 1); - } - return bit_waitqueue(p, 0); -} - -static int wake_atomic_t_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, - void *arg) -{ - struct wait_bit_key *key = arg; - struct wait_bit_queue_entry *wait_bit = container_of(wq_entry, struct wait_bit_queue_entry, wq_entry); - atomic_t *val = key->flags; - - if (wait_bit->key.flags != key->flags || - wait_bit->key.bit_nr != key->bit_nr || - atomic_read(val) != 0) - return 0; - return autoremove_wake_function(wq_entry, mode, sync, key); -} - -/* - * To allow interruptible waiting and asynchronous (i.e. nonblocking) waiting, - * the actions of __wait_on_atomic_t() are permitted return codes. Nonzero - * return codes halt waiting and return. - */ -static __sched -int __wait_on_atomic_t(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry, - int (*action)(atomic_t *), unsigned mode) -{ - atomic_t *val; - int ret = 0; - - do { - prepare_to_wait(wq_head, &wbq_entry->wq_entry, mode); - val = wbq_entry->key.flags; - if (atomic_read(val) == 0) - break; - ret = (*action)(val); - } while (!ret && atomic_read(val) != 0); - finish_wait(wq_head, &wbq_entry->wq_entry); - return ret; -} - -#define DEFINE_WAIT_ATOMIC_T(name, p) \ - struct wait_bit_queue_entry name = { \ - .key = __WAIT_ATOMIC_T_KEY_INITIALIZER(p), \ - .wq_entry = { \ - .private = current, \ - .func = wake_atomic_t_function, \ - .task_list = \ - LIST_HEAD_INIT((name).wq_entry.task_list), \ - }, \ - } - -__sched int out_of_line_wait_on_atomic_t(atomic_t *p, int (*action)(atomic_t *), - unsigned mode) -{ - struct wait_queue_head *wq_head = atomic_t_waitqueue(p); - DEFINE_WAIT_ATOMIC_T(wq_entry, p); - - return __wait_on_atomic_t(wq_head, &wq_entry, action, mode); -} -EXPORT_SYMBOL(out_of_line_wait_on_atomic_t); - -/** - * wake_up_atomic_t - Wake up a waiter on a atomic_t - * @p: The atomic_t being waited on, a kernel virtual address - * - * Wake up anyone waiting for the atomic_t to go to zero. - * - * Abuse the bit-waker function and its waitqueue hash table set (the atomic_t - * check is done by the waiter's wake function, not the by the waker itself). - */ -void wake_up_atomic_t(atomic_t *p) -{ - __wake_up_bit(atomic_t_waitqueue(p), p, WAIT_ATOMIC_T_BIT_NR); -} -EXPORT_SYMBOL(wake_up_atomic_t); - -__sched int bit_wait(struct wait_bit_key *word, int mode) -{ - schedule(); - if (signal_pending_state(mode, current)) - return -EINTR; - return 0; -} -EXPORT_SYMBOL(bit_wait); - -__sched int bit_wait_io(struct wait_bit_key *word, int mode) -{ - io_schedule(); - if (signal_pending_state(mode, current)) - return -EINTR; - return 0; -} -EXPORT_SYMBOL(bit_wait_io); - -__sched int bit_wait_timeout(struct wait_bit_key *word, int mode) -{ - unsigned long now = READ_ONCE(jiffies); - if (time_after_eq(now, word->timeout)) - return -EAGAIN; - schedule_timeout(word->timeout - now); - if (signal_pending_state(mode, current)) - return -EINTR; - return 0; -} -EXPORT_SYMBOL_GPL(bit_wait_timeout); - -__sched int bit_wait_io_timeout(struct wait_bit_key *word, int mode) -{ - unsigned long now = READ_ONCE(jiffies); - if (time_after_eq(now, word->timeout)) - return -EAGAIN; - io_schedule_timeout(word->timeout - now); - if (signal_pending_state(mode, current)) - return -EINTR; - return 0; -} -EXPORT_SYMBOL_GPL(bit_wait_io_timeout); diff --git a/kernel/sched/wait_bit.c b/kernel/sched/wait_bit.c new file mode 100644 index 000000000000..463bac84dfd1 --- /dev/null +++ b/kernel/sched/wait_bit.c @@ -0,0 +1,263 @@ +/* + * The implementation of the wait_bit*() and related waiting APIs: + */ +#include +#include +#include + +int wake_bit_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *arg) +{ + struct wait_bit_key *key = arg; + struct wait_bit_queue_entry *wait_bit = container_of(wq_entry, struct wait_bit_queue_entry, wq_entry); + + if (wait_bit->key.flags != key->flags || + wait_bit->key.bit_nr != key->bit_nr || + test_bit(key->bit_nr, key->flags)) + return 0; + else + return autoremove_wake_function(wq_entry, mode, sync, key); +} +EXPORT_SYMBOL(wake_bit_function); + +/* + * To allow interruptible waiting and asynchronous (i.e. nonblocking) + * waiting, the actions of __wait_on_bit() and __wait_on_bit_lock() are + * permitted return codes. Nonzero return codes halt waiting and return. + */ +int __sched +__wait_on_bit(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry, + wait_bit_action_f *action, unsigned mode) +{ + int ret = 0; + + do { + prepare_to_wait(wq_head, &wbq_entry->wq_entry, mode); + if (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags)) + ret = (*action)(&wbq_entry->key, mode); + } while (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags) && !ret); + finish_wait(wq_head, &wbq_entry->wq_entry); + return ret; +} +EXPORT_SYMBOL(__wait_on_bit); + +int __sched out_of_line_wait_on_bit(void *word, int bit, + wait_bit_action_f *action, unsigned mode) +{ + struct wait_queue_head *wq_head = bit_waitqueue(word, bit); + DEFINE_WAIT_BIT(wq_entry, word, bit); + + return __wait_on_bit(wq_head, &wq_entry, action, mode); +} +EXPORT_SYMBOL(out_of_line_wait_on_bit); + +int __sched out_of_line_wait_on_bit_timeout( + void *word, int bit, wait_bit_action_f *action, + unsigned mode, unsigned long timeout) +{ + struct wait_queue_head *wq_head = bit_waitqueue(word, bit); + DEFINE_WAIT_BIT(wq_entry, word, bit); + + wq_entry.key.timeout = jiffies + timeout; + return __wait_on_bit(wq_head, &wq_entry, action, mode); +} +EXPORT_SYMBOL_GPL(out_of_line_wait_on_bit_timeout); + +int __sched +__wait_on_bit_lock(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry, + wait_bit_action_f *action, unsigned mode) +{ + int ret = 0; + + for (;;) { + prepare_to_wait_exclusive(wq_head, &wbq_entry->wq_entry, mode); + if (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags)) { + ret = action(&wbq_entry->key, mode); + /* + * See the comment in prepare_to_wait_event(). + * finish_wait() does not necessarily takes wwq_head->lock, + * but test_and_set_bit() implies mb() which pairs with + * smp_mb__after_atomic() before wake_up_page(). + */ + if (ret) + finish_wait(wq_head, &wbq_entry->wq_entry); + } + if (!test_and_set_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags)) { + if (!ret) + finish_wait(wq_head, &wbq_entry->wq_entry); + return 0; + } else if (ret) { + return ret; + } + } +} +EXPORT_SYMBOL(__wait_on_bit_lock); + +int __sched out_of_line_wait_on_bit_lock(void *word, int bit, + wait_bit_action_f *action, unsigned mode) +{ + struct wait_queue_head *wq_head = bit_waitqueue(word, bit); + DEFINE_WAIT_BIT(wq_entry, word, bit); + + return __wait_on_bit_lock(wq_head, &wq_entry, action, mode); +} +EXPORT_SYMBOL(out_of_line_wait_on_bit_lock); + +void __wake_up_bit(struct wait_queue_head *wq_head, void *word, int bit) +{ + struct wait_bit_key key = __WAIT_BIT_KEY_INITIALIZER(word, bit); + if (waitqueue_active(wq_head)) + __wake_up(wq_head, TASK_NORMAL, 1, &key); +} +EXPORT_SYMBOL(__wake_up_bit); + +/** + * wake_up_bit - wake up a waiter on a bit + * @word: the word being waited on, a kernel virtual address + * @bit: the bit of the word being waited on + * + * There is a standard hashed waitqueue table for generic use. This + * is the part of the hashtable's accessor API that wakes up waiters + * on a bit. For instance, if one were to have waiters on a bitflag, + * one would call wake_up_bit() after clearing the bit. + * + * In order for this to function properly, as it uses waitqueue_active() + * internally, some kind of memory barrier must be done prior to calling + * this. Typically, this will be smp_mb__after_atomic(), but in some + * cases where bitflags are manipulated non-atomically under a lock, one + * may need to use a less regular barrier, such fs/inode.c's smp_mb(), + * because spin_unlock() does not guarantee a memory barrier. + */ +void wake_up_bit(void *word, int bit) +{ + __wake_up_bit(bit_waitqueue(word, bit), word, bit); +} +EXPORT_SYMBOL(wake_up_bit); + +/* + * Manipulate the atomic_t address to produce a better bit waitqueue table hash + * index (we're keying off bit -1, but that would produce a horrible hash + * value). + */ +static inline wait_queue_head_t *atomic_t_waitqueue(atomic_t *p) +{ + if (BITS_PER_LONG == 64) { + unsigned long q = (unsigned long)p; + return bit_waitqueue((void *)(q & ~1), q & 1); + } + return bit_waitqueue(p, 0); +} + +static int wake_atomic_t_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, + void *arg) +{ + struct wait_bit_key *key = arg; + struct wait_bit_queue_entry *wait_bit = container_of(wq_entry, struct wait_bit_queue_entry, wq_entry); + atomic_t *val = key->flags; + + if (wait_bit->key.flags != key->flags || + wait_bit->key.bit_nr != key->bit_nr || + atomic_read(val) != 0) + return 0; + return autoremove_wake_function(wq_entry, mode, sync, key); +} + +/* + * To allow interruptible waiting and asynchronous (i.e. nonblocking) waiting, + * the actions of __wait_on_atomic_t() are permitted return codes. Nonzero + * return codes halt waiting and return. + */ +static __sched +int __wait_on_atomic_t(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry, + int (*action)(atomic_t *), unsigned mode) +{ + atomic_t *val; + int ret = 0; + + do { + prepare_to_wait(wq_head, &wbq_entry->wq_entry, mode); + val = wbq_entry->key.flags; + if (atomic_read(val) == 0) + break; + ret = (*action)(val); + } while (!ret && atomic_read(val) != 0); + finish_wait(wq_head, &wbq_entry->wq_entry); + return ret; +} + +#define DEFINE_WAIT_ATOMIC_T(name, p) \ + struct wait_bit_queue_entry name = { \ + .key = __WAIT_ATOMIC_T_KEY_INITIALIZER(p), \ + .wq_entry = { \ + .private = current, \ + .func = wake_atomic_t_function, \ + .task_list = \ + LIST_HEAD_INIT((name).wq_entry.task_list), \ + }, \ + } + +__sched int out_of_line_wait_on_atomic_t(atomic_t *p, int (*action)(atomic_t *), + unsigned mode) +{ + struct wait_queue_head *wq_head = atomic_t_waitqueue(p); + DEFINE_WAIT_ATOMIC_T(wq_entry, p); + + return __wait_on_atomic_t(wq_head, &wq_entry, action, mode); +} +EXPORT_SYMBOL(out_of_line_wait_on_atomic_t); + +/** + * wake_up_atomic_t - Wake up a waiter on a atomic_t + * @p: The atomic_t being waited on, a kernel virtual address + * + * Wake up anyone waiting for the atomic_t to go to zero. + * + * Abuse the bit-waker function and its waitqueue hash table set (the atomic_t + * check is done by the waiter's wake function, not the by the waker itself). + */ +void wake_up_atomic_t(atomic_t *p) +{ + __wake_up_bit(atomic_t_waitqueue(p), p, WAIT_ATOMIC_T_BIT_NR); +} +EXPORT_SYMBOL(wake_up_atomic_t); + +__sched int bit_wait(struct wait_bit_key *word, int mode) +{ + schedule(); + if (signal_pending_state(mode, current)) + return -EINTR; + return 0; +} +EXPORT_SYMBOL(bit_wait); + +__sched int bit_wait_io(struct wait_bit_key *word, int mode) +{ + io_schedule(); + if (signal_pending_state(mode, current)) + return -EINTR; + return 0; +} +EXPORT_SYMBOL(bit_wait_io); + +__sched int bit_wait_timeout(struct wait_bit_key *word, int mode) +{ + unsigned long now = READ_ONCE(jiffies); + if (time_after_eq(now, word->timeout)) + return -EAGAIN; + schedule_timeout(word->timeout - now); + if (signal_pending_state(mode, current)) + return -EINTR; + return 0; +} +EXPORT_SYMBOL_GPL(bit_wait_timeout); + +__sched int bit_wait_io_timeout(struct wait_bit_key *word, int mode) +{ + unsigned long now = READ_ONCE(jiffies); + if (time_after_eq(now, word->timeout)) + return -EAGAIN; + io_schedule_timeout(word->timeout - now); + if (signal_pending_state(mode, current)) + return -EINTR; + return 0; +} +EXPORT_SYMBOL_GPL(bit_wait_io_timeout); diff --git a/security/keys/internal.h b/security/keys/internal.h index c0f8682eba69..91bc6214ae57 100644 --- a/security/keys/internal.h +++ b/security/keys/internal.h @@ -13,6 +13,7 @@ #define _INTERNAL_H #include +#include #include #include #include -- cgit v1.2.3 From 5822a454d6d22297c5fcd66264120587b2ec21cd Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 5 Mar 2017 13:09:07 +0100 Subject: sched/wait: Move bit_wait_table[] and related functionality from sched/core.c to sched/wait_bit.c The key hashed waitqueue data structures and their initialization was done in the main scheduler file for no good reason, move them to sched/wait_bit.c instead. Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/wait_bit.h | 1 + kernel/sched/core.c | 18 ++---------------- kernel/sched/wait_bit.c | 23 +++++++++++++++++++++++ 3 files changed, 26 insertions(+), 16 deletions(-) (limited to 'kernel') diff --git a/include/linux/wait_bit.h b/include/linux/wait_bit.h index 8c85c52d94b6..9cc82114dbcb 100644 --- a/include/linux/wait_bit.h +++ b/include/linux/wait_bit.h @@ -35,6 +35,7 @@ int out_of_line_wait_on_bit_timeout(void *word, int, wait_bit_action_f *action, int out_of_line_wait_on_bit_lock(void *word, int, wait_bit_action_f *action, unsigned int mode); int out_of_line_wait_on_atomic_t(atomic_t *p, int (*)(atomic_t *), unsigned int mode); struct wait_queue_head *bit_waitqueue(void *word, int bit); +extern void __init wait_bit_init(void); int wake_bit_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 5b36644536ab..e7b9ef8df126 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -6026,28 +6027,13 @@ static struct kmem_cache *task_group_cache __read_mostly; DECLARE_PER_CPU(cpumask_var_t, load_balance_mask); DECLARE_PER_CPU(cpumask_var_t, select_idle_mask); -#define WAIT_TABLE_BITS 8 -#define WAIT_TABLE_SIZE (1 << WAIT_TABLE_BITS) -static wait_queue_head_t bit_wait_table[WAIT_TABLE_SIZE] __cacheline_aligned; - -wait_queue_head_t *bit_waitqueue(void *word, int bit) -{ - const int shift = BITS_PER_LONG == 32 ? 5 : 6; - unsigned long val = (unsigned long)word << shift | bit; - - return bit_wait_table + hash_long(val, WAIT_TABLE_BITS); -} -EXPORT_SYMBOL(bit_waitqueue); - void __init sched_init(void) { int i, j; unsigned long alloc_size = 0, ptr; sched_clock_init(); - - for (i = 0; i < WAIT_TABLE_SIZE; i++) - init_waitqueue_head(bit_wait_table + i); + wait_bit_init(); #ifdef CONFIG_FAIR_GROUP_SCHED alloc_size += 2 * nr_cpu_ids * sizeof(void **); diff --git a/kernel/sched/wait_bit.c b/kernel/sched/wait_bit.c index 463bac84dfd1..c891b34e1896 100644 --- a/kernel/sched/wait_bit.c +++ b/kernel/sched/wait_bit.c @@ -4,6 +4,21 @@ #include #include #include +#include + +#define WAIT_TABLE_BITS 8 +#define WAIT_TABLE_SIZE (1 << WAIT_TABLE_BITS) + +static wait_queue_head_t bit_wait_table[WAIT_TABLE_SIZE] __cacheline_aligned; + +wait_queue_head_t *bit_waitqueue(void *word, int bit) +{ + const int shift = BITS_PER_LONG == 32 ? 5 : 6; + unsigned long val = (unsigned long)word << shift | bit; + + return bit_wait_table + hash_long(val, WAIT_TABLE_BITS); +} +EXPORT_SYMBOL(bit_waitqueue); int wake_bit_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *arg) { @@ -261,3 +276,11 @@ __sched int bit_wait_io_timeout(struct wait_bit_key *word, int mode) return 0; } EXPORT_SYMBOL_GPL(bit_wait_io_timeout); + +void __init wait_bit_init(void) +{ + int i; + + for (i = 0; i < WAIT_TABLE_SIZE; i++) + init_waitqueue_head(bit_wait_table + i); +} -- cgit v1.2.3 From 2055da97389a605c8a00d163d40903afbe413921 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 20 Jun 2017 12:06:46 +0200 Subject: sched/wait: Disambiguate wq_entry->task_list and wq_head->task_list naming So I've noticed a number of instances where it was not obvious from the code whether ->task_list was for a wait-queue head or a wait-queue entry. Furthermore, there's a number of wait-queue users where the lists are not for 'tasks' but other entities (poll tables, etc.), in which case the 'task_list' name is actively confusing. To clear this all up, name the wait-queue head and entry list structure fields unambiguously: struct wait_queue_head::task_list => ::head struct wait_queue_entry::task_list => ::entry For example, this code: rqw->wait.task_list.next != &wait->task_list ... is was pretty unclear (to me) what it's doing, while now it's written this way: rqw->wait.head.next != &wait->entry ... which makes it pretty clear that we are iterating a list until we see the head. Other examples are: list_for_each_entry_safe(pos, next, &x->task_list, task_list) { list_for_each_entry(wq, &fence->wait.task_list, task_list) { ... where it's unclear (to me) what we are iterating, and during review it's hard to tell whether it's trying to walk a wait-queue entry (which would be a bug), while now it's written as: list_for_each_entry_safe(pos, next, &x->head, entry) { list_for_each_entry(wq, &fence->wait.head, entry) { Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- block/blk-mq.c | 2 +- block/blk-wbt.c | 2 +- block/kyber-iosched.c | 8 ++++---- drivers/gpu/drm/i915/i915_sw_fence.c | 21 ++++++++++----------- drivers/rtc/rtc-imxdi.c | 2 +- fs/cachefiles/rdwr.c | 2 +- fs/eventpoll.c | 2 +- fs/fs_pin.c | 2 +- fs/nilfs2/segment.c | 3 +-- fs/orangefs/orangefs-bufmap.c | 8 ++++---- fs/userfaultfd.c | 22 +++++++++++----------- include/linux/wait.h | 20 ++++++++++---------- include/linux/wait_bit.h | 4 ++-- kernel/sched/wait.c | 24 ++++++++++++------------ kernel/sched/wait_bit.c | 4 ++-- mm/filemap.c | 2 +- mm/memcontrol.c | 2 +- mm/shmem.c | 4 ++-- 18 files changed, 66 insertions(+), 68 deletions(-) (limited to 'kernel') diff --git a/block/blk-mq.c b/block/blk-mq.c index a083f95e04b1..121aa1dbb192 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -933,7 +933,7 @@ static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode, int fla hctx = container_of(wait, struct blk_mq_hw_ctx, dispatch_wait); - list_del(&wait->task_list); + list_del(&wait->entry); clear_bit_unlock(BLK_MQ_S_TAG_WAITING, &hctx->state); blk_mq_run_hw_queue(hctx, true); return 1; diff --git a/block/blk-wbt.c b/block/blk-wbt.c index 5f3a37c2784c..6a9a0f03a67b 100644 --- a/block/blk-wbt.c +++ b/block/blk-wbt.c @@ -520,7 +520,7 @@ static inline bool may_queue(struct rq_wb *rwb, struct rq_wait *rqw, * in line to be woken up, wait for our turn. */ if (waitqueue_active(&rqw->wait) && - rqw->wait.task_list.next != &wait->task_list) + rqw->wait.head.next != &wait->entry) return false; return atomic_inc_below(&rqw->inflight, get_limit(rwb, rw)); diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c index b95d6bd714c0..9bf1484365b2 100644 --- a/block/kyber-iosched.c +++ b/block/kyber-iosched.c @@ -385,7 +385,7 @@ static int kyber_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx) for (i = 0; i < KYBER_NUM_DOMAINS; i++) { INIT_LIST_HEAD(&khd->rqs[i]); - INIT_LIST_HEAD(&khd->domain_wait[i].task_list); + INIT_LIST_HEAD(&khd->domain_wait[i].entry); atomic_set(&khd->wait_index[i], 0); } @@ -512,7 +512,7 @@ static int kyber_domain_wake(wait_queue_entry_t *wait, unsigned mode, int flags, { struct blk_mq_hw_ctx *hctx = READ_ONCE(wait->private); - list_del_init(&wait->task_list); + list_del_init(&wait->entry); blk_mq_run_hw_queue(hctx, true); return 1; } @@ -536,7 +536,7 @@ static int kyber_get_domain_token(struct kyber_queue_data *kqd, * run when one becomes available. Note that this is serialized on * khd->lock, but we still need to be careful about the waker. */ - if (list_empty_careful(&wait->task_list)) { + if (list_empty_careful(&wait->entry)) { init_waitqueue_func_entry(wait, kyber_domain_wake); wait->private = hctx; ws = sbq_wait_ptr(domain_tokens, @@ -736,7 +736,7 @@ static int kyber_##name##_waiting_show(void *data, struct seq_file *m) \ struct kyber_hctx_data *khd = hctx->sched_data; \ wait_queue_entry_t *wait = &khd->domain_wait[domain]; \ \ - seq_printf(m, "%d\n", !list_empty_careful(&wait->task_list)); \ + seq_printf(m, "%d\n", !list_empty_careful(&wait->entry)); \ return 0; \ } KYBER_DEBUGFS_DOMAIN_ATTRS(KYBER_READ, read) diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c index 8669bfa33064..380de4360b8a 100644 --- a/drivers/gpu/drm/i915/i915_sw_fence.c +++ b/drivers/gpu/drm/i915/i915_sw_fence.c @@ -160,31 +160,30 @@ static void __i915_sw_fence_wake_up_all(struct i915_sw_fence *fence, /* * To prevent unbounded recursion as we traverse the graph of - * i915_sw_fences, we move the task_list from this, the next ready - * fence, to the tail of the original fence's task_list + * i915_sw_fences, we move the entry list from this, the next ready + * fence, to the tail of the original fence's entry list * (and so added to the list to be woken). */ spin_lock_irqsave_nested(&x->lock, flags, 1 + !!continuation); if (continuation) { - list_for_each_entry_safe(pos, next, &x->task_list, task_list) { + list_for_each_entry_safe(pos, next, &x->head, entry) { if (pos->func == autoremove_wake_function) pos->func(pos, TASK_NORMAL, 0, continuation); else - list_move_tail(&pos->task_list, continuation); + list_move_tail(&pos->entry, continuation); } } else { LIST_HEAD(extra); do { - list_for_each_entry_safe(pos, next, - &x->task_list, task_list) + list_for_each_entry_safe(pos, next, &x->head, entry) pos->func(pos, TASK_NORMAL, 0, &extra); if (list_empty(&extra)) break; - list_splice_tail_init(&extra, &x->task_list); + list_splice_tail_init(&extra, &x->head); } while (1); } spin_unlock_irqrestore(&x->lock, flags); @@ -256,7 +255,7 @@ void i915_sw_fence_commit(struct i915_sw_fence *fence) static int i915_sw_fence_wake(wait_queue_entry_t *wq, unsigned mode, int flags, void *key) { - list_del(&wq->task_list); + list_del(&wq->entry); __i915_sw_fence_complete(wq->private, key); i915_sw_fence_put(wq->private); if (wq->flags & I915_SW_FENCE_FLAG_ALLOC) @@ -275,7 +274,7 @@ static bool __i915_sw_fence_check_if_after(struct i915_sw_fence *fence, if (fence == signaler) return true; - list_for_each_entry(wq, &fence->wait.task_list, task_list) { + list_for_each_entry(wq, &fence->wait.head, entry) { if (wq->func != i915_sw_fence_wake) continue; @@ -293,7 +292,7 @@ static void __i915_sw_fence_clear_checked_bit(struct i915_sw_fence *fence) if (!__test_and_clear_bit(I915_SW_FENCE_CHECKED_BIT, &fence->flags)) return; - list_for_each_entry(wq, &fence->wait.task_list, task_list) { + list_for_each_entry(wq, &fence->wait.head, entry) { if (wq->func != i915_sw_fence_wake) continue; @@ -350,7 +349,7 @@ static int __i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence, pending |= I915_SW_FENCE_FLAG_ALLOC; } - INIT_LIST_HEAD(&wq->task_list); + INIT_LIST_HEAD(&wq->entry); wq->flags = pending; wq->func = i915_sw_fence_wake; wq->private = i915_sw_fence_get(fence); diff --git a/drivers/rtc/rtc-imxdi.c b/drivers/rtc/rtc-imxdi.c index 6b54f6c24c5f..80931114c899 100644 --- a/drivers/rtc/rtc-imxdi.c +++ b/drivers/rtc/rtc-imxdi.c @@ -709,7 +709,7 @@ static irqreturn_t dryice_irq(int irq, void *dev_id) /*If the write wait queue is empty then there is no pending operations. It means the interrupt is for DryIce -Security. IRQ must be returned as none.*/ - if (list_empty_careful(&imxdi->write_wait.task_list)) + if (list_empty_careful(&imxdi->write_wait.head)) return rc; /* DSR_WCF clears itself on DSR read */ diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index 8be33b33b981..18d7aa61ef0f 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c @@ -48,7 +48,7 @@ static int cachefiles_read_waiter(wait_queue_entry_t *wait, unsigned mode, } /* remove from the waitqueue */ - list_del(&wait->task_list); + list_del(&wait->entry); /* move onto the action list and queue for FS-Cache thread pool */ ASSERT(monitor->op); diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 5ac1cba5ef72..b1c8e23ddf65 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1094,7 +1094,7 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v * can't use __remove_wait_queue(). whead->lock is held by * the caller. */ - list_del_init(&wait->task_list); + list_del_init(&wait->entry); } spin_lock_irqsave(&ep->lock, flags); diff --git a/fs/fs_pin.c b/fs/fs_pin.c index 7b447a245760..e747b3d720ee 100644 --- a/fs/fs_pin.c +++ b/fs/fs_pin.c @@ -61,7 +61,7 @@ void pin_kill(struct fs_pin *p) rcu_read_unlock(); schedule(); rcu_read_lock(); - if (likely(list_empty(&wait.task_list))) + if (likely(list_empty(&wait.entry))) break; /* OK, we know p couldn't have been freed yet */ spin_lock_irq(&p->wait.lock); diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 775304e7f96f..70ded52dc1dd 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -2206,8 +2206,7 @@ static void nilfs_segctor_wakeup(struct nilfs_sc_info *sci, int err) unsigned long flags; spin_lock_irqsave(&sci->sc_wait_request.lock, flags); - list_for_each_entry_safe(wrq, n, &sci->sc_wait_request.task_list, - wq.task_list) { + list_for_each_entry_safe(wrq, n, &sci->sc_wait_request.head, wq.entry) { if (!atomic_read(&wrq->done) && nilfs_cnt32_ge(sci->sc_seq_done, wrq->seq)) { wrq->err = err; diff --git a/fs/orangefs/orangefs-bufmap.c b/fs/orangefs/orangefs-bufmap.c index 9e37b7028ea4..038d67545d9f 100644 --- a/fs/orangefs/orangefs-bufmap.c +++ b/fs/orangefs/orangefs-bufmap.c @@ -46,7 +46,7 @@ static void run_down(struct slot_map *m) spin_lock(&m->q.lock); if (m->c != -1) { for (;;) { - if (likely(list_empty(&wait.task_list))) + if (likely(list_empty(&wait.entry))) __add_wait_queue_entry_tail(&m->q, &wait); set_current_state(TASK_UNINTERRUPTIBLE); @@ -84,7 +84,7 @@ static int wait_for_free(struct slot_map *m) do { long n = left, t; - if (likely(list_empty(&wait.task_list))) + if (likely(list_empty(&wait.entry))) __add_wait_queue_entry_tail_exclusive(&m->q, &wait); set_current_state(TASK_INTERRUPTIBLE); @@ -108,8 +108,8 @@ static int wait_for_free(struct slot_map *m) left = -EINTR; } while (left > 0); - if (!list_empty(&wait.task_list)) - list_del(&wait.task_list); + if (!list_empty(&wait.entry)) + list_del(&wait.entry); else if (left <= 0 && waitqueue_active(&m->q)) __wake_up_locked_key(&m->q, TASK_INTERRUPTIBLE, NULL); __set_current_state(TASK_RUNNING); diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index bda64fcd8a0c..6148ccd6cccf 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -129,7 +129,7 @@ static int userfaultfd_wake_function(wait_queue_entry_t *wq, unsigned mode, * wouldn't be enough, the smp_mb__before_spinlock is * enough to avoid an explicit smp_mb() here. */ - list_del_init(&wq->task_list); + list_del_init(&wq->entry); out: return ret; } @@ -522,13 +522,13 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason) * and it's fine not to block on the spinlock. The uwq on this * kernel stack can be released after the list_del_init. */ - if (!list_empty_careful(&uwq.wq.task_list)) { + if (!list_empty_careful(&uwq.wq.entry)) { spin_lock(&ctx->fault_pending_wqh.lock); /* * No need of list_del_init(), the uwq on the stack * will be freed shortly anyway. */ - list_del(&uwq.wq.task_list); + list_del(&uwq.wq.entry); spin_unlock(&ctx->fault_pending_wqh.lock); } @@ -869,7 +869,7 @@ static inline struct userfaultfd_wait_queue *find_userfault_in( if (!waitqueue_active(wqh)) goto out; /* walk in reverse to provide FIFO behavior to read userfaults */ - wq = list_last_entry(&wqh->task_list, typeof(*wq), task_list); + wq = list_last_entry(&wqh->head, typeof(*wq), entry); uwq = container_of(wq, struct userfaultfd_wait_queue, wq); out: return uwq; @@ -1003,14 +1003,14 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait, * changes __remove_wait_queue() to use * list_del_init() in turn breaking the * !list_empty_careful() check in - * handle_userfault(). The uwq->wq.task_list + * handle_userfault(). The uwq->wq.head list * must never be empty at any time during the * refile, or the waitqueue could disappear * from under us. The "wait_queue_head_t" * parameter of __remove_wait_queue() is unused * anyway. */ - list_del(&uwq->wq.task_list); + list_del(&uwq->wq.entry); __add_wait_queue(&ctx->fault_wqh, &uwq->wq); write_seqcount_end(&ctx->refile_seq); @@ -1032,7 +1032,7 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait, fork_nctx = (struct userfaultfd_ctx *) (unsigned long) uwq->msg.arg.reserved.reserved1; - list_move(&uwq->wq.task_list, &fork_event); + list_move(&uwq->wq.entry, &fork_event); spin_unlock(&ctx->event_wqh.lock); ret = 0; break; @@ -1069,8 +1069,8 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait, if (!list_empty(&fork_event)) { uwq = list_first_entry(&fork_event, typeof(*uwq), - wq.task_list); - list_del(&uwq->wq.task_list); + wq.entry); + list_del(&uwq->wq.entry); __add_wait_queue(&ctx->event_wqh, &uwq->wq); userfaultfd_event_complete(ctx, uwq); } @@ -1752,12 +1752,12 @@ static void userfaultfd_show_fdinfo(struct seq_file *m, struct file *f) unsigned long pending = 0, total = 0; spin_lock(&ctx->fault_pending_wqh.lock); - list_for_each_entry(wq, &ctx->fault_pending_wqh.task_list, task_list) { + list_for_each_entry(wq, &ctx->fault_pending_wqh.head, entry) { uwq = container_of(wq, struct userfaultfd_wait_queue, wq); pending++; total++; } - list_for_each_entry(wq, &ctx->fault_wqh.task_list, task_list) { + list_for_each_entry(wq, &ctx->fault_wqh.head, entry) { uwq = container_of(wq, struct userfaultfd_wait_queue, wq); total++; } diff --git a/include/linux/wait.h b/include/linux/wait.h index 629489746f8a..b289c96151ee 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -26,12 +26,12 @@ struct wait_queue_entry { unsigned int flags; void *private; wait_queue_func_t func; - struct list_head task_list; + struct list_head entry; }; struct wait_queue_head { spinlock_t lock; - struct list_head task_list; + struct list_head head; }; typedef struct wait_queue_head wait_queue_head_t; @@ -44,14 +44,14 @@ struct task_struct; #define __WAITQUEUE_INITIALIZER(name, tsk) { \ .private = tsk, \ .func = default_wake_function, \ - .task_list = { NULL, NULL } } + .entry = { NULL, NULL } } #define DECLARE_WAITQUEUE(name, tsk) \ struct wait_queue_entry name = __WAITQUEUE_INITIALIZER(name, tsk) #define __WAIT_QUEUE_HEAD_INITIALIZER(name) { \ .lock = __SPIN_LOCK_UNLOCKED(name.lock), \ - .task_list = { &(name).task_list, &(name).task_list } } + .head = { &(name).head, &(name).head } } #define DECLARE_WAIT_QUEUE_HEAD(name) \ struct wait_queue_head name = __WAIT_QUEUE_HEAD_INITIALIZER(name) @@ -121,7 +121,7 @@ init_waitqueue_func_entry(struct wait_queue_entry *wq_entry, wait_queue_func_t f */ static inline int waitqueue_active(struct wait_queue_head *wq_head) { - return !list_empty(&wq_head->task_list); + return !list_empty(&wq_head->head); } /** @@ -151,7 +151,7 @@ extern void remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue static inline void __add_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry) { - list_add(&wq_entry->task_list, &wq_head->task_list); + list_add(&wq_entry->entry, &wq_head->head); } /* @@ -166,7 +166,7 @@ __add_wait_queue_exclusive(struct wait_queue_head *wq_head, struct wait_queue_en static inline void __add_wait_queue_entry_tail(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry) { - list_add_tail(&wq_entry->task_list, &wq_head->task_list); + list_add_tail(&wq_entry->entry, &wq_head->head); } static inline void @@ -179,7 +179,7 @@ __add_wait_queue_entry_tail_exclusive(struct wait_queue_head *wq_head, struct wa static inline void __remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry) { - list_del(&wq_entry->task_list); + list_del(&wq_entry->entry); } void __wake_up(struct wait_queue_head *wq_head, unsigned int mode, int nr, void *key); @@ -952,7 +952,7 @@ int autoremove_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, i struct wait_queue_entry name = { \ .private = current, \ .func = function, \ - .task_list = LIST_HEAD_INIT((name).task_list), \ + .entry = LIST_HEAD_INIT((name).entry), \ } #define DEFINE_WAIT(name) DEFINE_WAIT_FUNC(name, autoremove_wake_function) @@ -961,7 +961,7 @@ int autoremove_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, i do { \ (wait)->private = current; \ (wait)->func = autoremove_wake_function; \ - INIT_LIST_HEAD(&(wait)->task_list); \ + INIT_LIST_HEAD(&(wait)->entry); \ (wait)->flags = 0; \ } while (0) diff --git a/include/linux/wait_bit.h b/include/linux/wait_bit.h index 9cc82114dbcb..12b26660d7e9 100644 --- a/include/linux/wait_bit.h +++ b/include/linux/wait_bit.h @@ -45,8 +45,8 @@ int wake_bit_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync .wq_entry = { \ .private = current, \ .func = wake_bit_function, \ - .task_list = \ - LIST_HEAD_INIT((name).wq_entry.task_list), \ + .entry = \ + LIST_HEAD_INIT((name).wq_entry.entry), \ }, \ } diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c index 6bcd7c3c4501..17f11c6b0a9f 100644 --- a/kernel/sched/wait.c +++ b/kernel/sched/wait.c @@ -16,7 +16,7 @@ void __init_waitqueue_head(struct wait_queue_head *wq_head, const char *name, st { spin_lock_init(&wq_head->lock); lockdep_set_class_and_name(&wq_head->lock, key, name); - INIT_LIST_HEAD(&wq_head->task_list); + INIT_LIST_HEAD(&wq_head->head); } EXPORT_SYMBOL(__init_waitqueue_head); @@ -68,7 +68,7 @@ static void __wake_up_common(struct wait_queue_head *wq_head, unsigned int mode, { wait_queue_entry_t *curr, *next; - list_for_each_entry_safe(curr, next, &wq_head->task_list, task_list) { + list_for_each_entry_safe(curr, next, &wq_head->head, entry) { unsigned flags = curr->flags; if (curr->func(curr, mode, wake_flags, key) && @@ -176,7 +176,7 @@ prepare_to_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_ent wq_entry->flags &= ~WQ_FLAG_EXCLUSIVE; spin_lock_irqsave(&wq_head->lock, flags); - if (list_empty(&wq_entry->task_list)) + if (list_empty(&wq_entry->entry)) __add_wait_queue(wq_head, wq_entry); set_current_state(state); spin_unlock_irqrestore(&wq_head->lock, flags); @@ -190,7 +190,7 @@ prepare_to_wait_exclusive(struct wait_queue_head *wq_head, struct wait_queue_ent wq_entry->flags |= WQ_FLAG_EXCLUSIVE; spin_lock_irqsave(&wq_head->lock, flags); - if (list_empty(&wq_entry->task_list)) + if (list_empty(&wq_entry->entry)) __add_wait_queue_entry_tail(wq_head, wq_entry); set_current_state(state); spin_unlock_irqrestore(&wq_head->lock, flags); @@ -202,7 +202,7 @@ void init_wait_entry(struct wait_queue_entry *wq_entry, int flags) wq_entry->flags = flags; wq_entry->private = current; wq_entry->func = autoremove_wake_function; - INIT_LIST_HEAD(&wq_entry->task_list); + INIT_LIST_HEAD(&wq_entry->entry); } EXPORT_SYMBOL(init_wait_entry); @@ -225,10 +225,10 @@ long prepare_to_wait_event(struct wait_queue_head *wq_head, struct wait_queue_en * can't see us, it should wake up another exclusive waiter if * we fail. */ - list_del_init(&wq_entry->task_list); + list_del_init(&wq_entry->entry); ret = -ERESTARTSYS; } else { - if (list_empty(&wq_entry->task_list)) { + if (list_empty(&wq_entry->entry)) { if (wq_entry->flags & WQ_FLAG_EXCLUSIVE) __add_wait_queue_entry_tail(wq_head, wq_entry); else @@ -251,7 +251,7 @@ EXPORT_SYMBOL(prepare_to_wait_event); */ int do_wait_intr(wait_queue_head_t *wq, wait_queue_entry_t *wait) { - if (likely(list_empty(&wait->task_list))) + if (likely(list_empty(&wait->entry))) __add_wait_queue_entry_tail(wq, wait); set_current_state(TASK_INTERRUPTIBLE); @@ -267,7 +267,7 @@ EXPORT_SYMBOL(do_wait_intr); int do_wait_intr_irq(wait_queue_head_t *wq, wait_queue_entry_t *wait) { - if (likely(list_empty(&wait->task_list))) + if (likely(list_empty(&wait->entry))) __add_wait_queue_entry_tail(wq, wait); set_current_state(TASK_INTERRUPTIBLE); @@ -308,9 +308,9 @@ void finish_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_en * have _one_ other CPU that looks at or modifies * the list). */ - if (!list_empty_careful(&wq_entry->task_list)) { + if (!list_empty_careful(&wq_entry->entry)) { spin_lock_irqsave(&wq_head->lock, flags); - list_del_init(&wq_entry->task_list); + list_del_init(&wq_entry->entry); spin_unlock_irqrestore(&wq_head->lock, flags); } } @@ -321,7 +321,7 @@ int autoremove_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, i int ret = default_wake_function(wq_entry, mode, sync, key); if (ret) - list_del_init(&wq_entry->task_list); + list_del_init(&wq_entry->entry); return ret; } EXPORT_SYMBOL(autoremove_wake_function); diff --git a/kernel/sched/wait_bit.c b/kernel/sched/wait_bit.c index c891b34e1896..f8159698aa4d 100644 --- a/kernel/sched/wait_bit.c +++ b/kernel/sched/wait_bit.c @@ -205,8 +205,8 @@ int __wait_on_atomic_t(struct wait_queue_head *wq_head, struct wait_bit_queue_en .wq_entry = { \ .private = current, \ .func = wake_atomic_t_function, \ - .task_list = \ - LIST_HEAD_INIT((name).wq_entry.task_list), \ + .entry = \ + LIST_HEAD_INIT((name).wq_entry.entry), \ }, \ } diff --git a/mm/filemap.c b/mm/filemap.c index 80c19ee81e95..926484561624 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -845,7 +845,7 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q, for (;;) { spin_lock_irq(&q->lock); - if (likely(list_empty(&wait->task_list))) { + if (likely(list_empty(&wait->entry))) { if (lock) __add_wait_queue_entry_tail_exclusive(q, wait); else diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 9a90b096dc6b..d75b38b66ef6 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1570,7 +1570,7 @@ bool mem_cgroup_oom_synchronize(bool handle) owait.wait.flags = 0; owait.wait.func = memcg_oom_wake_function; owait.wait.private = current; - INIT_LIST_HEAD(&owait.wait.task_list); + INIT_LIST_HEAD(&owait.wait.entry); prepare_to_wait(&memcg_oom_waitq, &owait.wait, TASK_KILLABLE); mem_cgroup_mark_under_oom(memcg); diff --git a/mm/shmem.c b/mm/shmem.c index a6c7dece4660..fdc413f82a99 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1905,7 +1905,7 @@ unlock: static int synchronous_wake_function(wait_queue_entry_t *wait, unsigned mode, int sync, void *key) { int ret = default_wake_function(wait, mode, sync, key); - list_del_init(&wait->task_list); + list_del_init(&wait->entry); return ret; } @@ -2840,7 +2840,7 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset, spin_lock(&inode->i_lock); inode->i_private = NULL; wake_up_all(&shmem_falloc_waitq); - WARN_ON_ONCE(!list_empty(&shmem_falloc_waitq.task_list)); + WARN_ON_ONCE(!list_empty(&shmem_falloc_waitq.head)); spin_unlock(&inode->i_lock); error = 0; goto out; -- cgit v1.2.3