From 76358ab547357516ec2dace95ae70c1572d6708c Mon Sep 17 00:00:00 2001 From: Alex Shi Date: Fri, 18 Dec 2020 14:01:28 -0800 Subject: mm/memcg: bail early from swap accounting if memcg disabled Patch series "bail out early for memcg disable". These 2 patches are indepenedent from per memcg lru lock, and may encounter unexpected warning, so let's move out them from per memcg lru locking patchset. This patch (of 2): We could bail out early when memcg wasn't enabled. Link: https://lkml.kernel.org/r/1604283436-18880-1-git-send-email-alex.shi@linux.alibaba.com Link: https://lkml.kernel.org/r/1604283436-18880-2-git-send-email-alex.shi@linux.alibaba.com Signed-off-by: Alex Shi Reviewed-by: Roman Gushchin Acked-by: Michal Hocko Acked-by: Hugh Dickins Acked-by: Johannes Weiner Cc: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index e3c7ca7dc174..5c3b054066f5 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -7178,6 +7178,9 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry) VM_BUG_ON_PAGE(PageLRU(page), page); VM_BUG_ON_PAGE(page_count(page), page); + if (mem_cgroup_disabled()) + return; + if (cgroup_subsys_on_dfl(memory_cgrp_subsys)) return; @@ -7242,6 +7245,9 @@ int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry) struct mem_cgroup *memcg; unsigned short oldid; + if (mem_cgroup_disabled()) + return 0; + if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) return 0; -- cgit v1.2.3 From a4055888629bc0467d12d912cd7c90acdf3d9b12 Mon Sep 17 00:00:00 2001 From: Alex Shi Date: Fri, 18 Dec 2020 14:01:31 -0800 Subject: mm/memcg: warning on !memcg after readahead page charged Add VM_WARN_ON_ONCE_PAGE() macro. Since readahead page is charged on memcg too, in theory we don't have to check this exception now. Before safely remove them all, add a warning for the unexpected !memcg. Link: https://lkml.kernel.org/r/1604283436-18880-3-git-send-email-alex.shi@linux.alibaba.com Signed-off-by: Alex Shi Acked-by: Michal Hocko Acked-by: Hugh Dickins Acked-by: Johannes Weiner Cc: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmdebug.h | 13 +++++++++++++ mm/memcontrol.c | 10 ++++------ 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h index 2ad72d2c8cc5..5d0767cb424a 100644 --- a/include/linux/mmdebug.h +++ b/include/linux/mmdebug.h @@ -37,6 +37,18 @@ void dump_mm(const struct mm_struct *mm); BUG(); \ } \ } while (0) +#define VM_WARN_ON_ONCE_PAGE(cond, page) ({ \ + static bool __section(".data.once") __warned; \ + int __ret_warn_once = !!(cond); \ + \ + if (unlikely(__ret_warn_once && !__warned)) { \ + dump_page(page, "VM_WARN_ON_ONCE_PAGE(" __stringify(cond)")");\ + __warned = true; \ + WARN_ON(1); \ + } \ + unlikely(__ret_warn_once); \ +}) + #define VM_WARN_ON(cond) (void)WARN_ON(cond) #define VM_WARN_ON_ONCE(cond) (void)WARN_ON_ONCE(cond) #define VM_WARN_ONCE(cond, format...) (void)WARN_ONCE(cond, format) @@ -48,6 +60,7 @@ void dump_mm(const struct mm_struct *mm); #define VM_BUG_ON_MM(cond, mm) VM_BUG_ON(cond) #define VM_WARN_ON(cond) BUILD_BUG_ON_INVALID(cond) #define VM_WARN_ON_ONCE(cond) BUILD_BUG_ON_INVALID(cond) +#define VM_WARN_ON_ONCE_PAGE(cond, page) BUILD_BUG_ON_INVALID(cond) #define VM_WARN_ONCE(cond, format...) BUILD_BUG_ON_INVALID(cond) #define VM_WARN(cond, format...) BUILD_BUG_ON_INVALID(cond) #endif diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 5c3b054066f5..7b9766789a27 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1362,10 +1362,7 @@ struct lruvec *mem_cgroup_page_lruvec(struct page *page, struct pglist_data *pgd } memcg = page_memcg(page); - /* - * Swapcache readahead pages are added to the LRU - and - * possibly migrated - before they are charged. - */ + VM_WARN_ON_ONCE_PAGE(!memcg, page); if (!memcg) memcg = root_mem_cgroup; @@ -6987,6 +6984,7 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage) return; memcg = page_memcg(oldpage); + VM_WARN_ON_ONCE_PAGE(!memcg, oldpage); if (!memcg) return; @@ -7186,7 +7184,7 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry) memcg = page_memcg(page); - /* Readahead page, never charged */ + VM_WARN_ON_ONCE_PAGE(!memcg, page); if (!memcg) return; @@ -7253,7 +7251,7 @@ int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry) memcg = page_memcg(page); - /* Readahead page, never charged */ + VM_WARN_ON_ONCE_PAGE(!memcg, page); if (!memcg) return 0; -- cgit v1.2.3 From bec78efd0061365a76f88e498affd7106b256823 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Fri, 18 Dec 2020 14:01:35 -0800 Subject: mm/memcg: remove unused definitions Some definitions are left unused, just clean them. Link: https://lkml.kernel.org/r/20201108003834.12669-1-richard.weiyang@gmail.com Signed-off-by: Wei Yang Acked-by: Michal Hocko Reviewed-by: Shakeel Butt Reviewed-by: Roman Gushchin Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 118 --------------------------------------------- 1 file changed, 118 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 08ed57e02b73..196441f5dc99 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -913,41 +913,6 @@ static inline void mod_memcg_state(struct mem_cgroup *memcg, local_irq_restore(flags); } -/** - * mod_memcg_page_state - update page state statistics - * @page: the page - * @idx: page state item to account - * @val: number of pages (positive or negative) - * - * The @page must be locked or the caller must use lock_page_memcg() - * to prevent double accounting when the page is concurrently being - * moved to another memcg: - * - * lock_page(page) or lock_page_memcg(page) - * if (TestClearPageState(page)) - * mod_memcg_page_state(page, state, -1); - * unlock_page(page) or unlock_page_memcg(page) - * - * Kernel pages are an exception to this, since they'll never move. - */ -static inline void __mod_memcg_page_state(struct page *page, - int idx, int val) -{ - struct mem_cgroup *memcg = page_memcg(page); - - if (memcg) - __mod_memcg_state(memcg, idx, val); -} - -static inline void mod_memcg_page_state(struct page *page, - int idx, int val) -{ - struct mem_cgroup *memcg = page_memcg(page); - - if (memcg) - mod_memcg_state(memcg, idx, val); -} - static inline unsigned long lruvec_page_state(struct lruvec *lruvec, enum node_stat_item idx) { @@ -1395,18 +1360,6 @@ static inline void mod_memcg_state(struct mem_cgroup *memcg, { } -static inline void __mod_memcg_page_state(struct page *page, - int idx, - int nr) -{ -} - -static inline void mod_memcg_page_state(struct page *page, - int idx, - int nr) -{ -} - static inline unsigned long lruvec_page_state(struct lruvec *lruvec, enum node_stat_item idx) { @@ -1479,34 +1432,6 @@ static inline void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page) } #endif /* CONFIG_MEMCG */ -/* idx can be of type enum memcg_stat_item or node_stat_item */ -static inline void __inc_memcg_state(struct mem_cgroup *memcg, - int idx) -{ - __mod_memcg_state(memcg, idx, 1); -} - -/* idx can be of type enum memcg_stat_item or node_stat_item */ -static inline void __dec_memcg_state(struct mem_cgroup *memcg, - int idx) -{ - __mod_memcg_state(memcg, idx, -1); -} - -/* idx can be of type enum memcg_stat_item or node_stat_item */ -static inline void __inc_memcg_page_state(struct page *page, - int idx) -{ - __mod_memcg_page_state(page, idx, 1); -} - -/* idx can be of type enum memcg_stat_item or node_stat_item */ -static inline void __dec_memcg_page_state(struct page *page, - int idx) -{ - __mod_memcg_page_state(page, idx, -1); -} - static inline void __inc_lruvec_kmem_state(void *p, enum node_stat_item idx) { __mod_lruvec_kmem_state(p, idx, 1); @@ -1517,34 +1442,6 @@ static inline void __dec_lruvec_kmem_state(void *p, enum node_stat_item idx) __mod_lruvec_kmem_state(p, idx, -1); } -/* idx can be of type enum memcg_stat_item or node_stat_item */ -static inline void inc_memcg_state(struct mem_cgroup *memcg, - int idx) -{ - mod_memcg_state(memcg, idx, 1); -} - -/* idx can be of type enum memcg_stat_item or node_stat_item */ -static inline void dec_memcg_state(struct mem_cgroup *memcg, - int idx) -{ - mod_memcg_state(memcg, idx, -1); -} - -/* idx can be of type enum memcg_stat_item or node_stat_item */ -static inline void inc_memcg_page_state(struct page *page, - int idx) -{ - mod_memcg_page_state(page, idx, 1); -} - -/* idx can be of type enum memcg_stat_item or node_stat_item */ -static inline void dec_memcg_page_state(struct page *page, - int idx) -{ - mod_memcg_page_state(page, idx, -1); -} - static inline struct lruvec *parent_lruvec(struct lruvec *lruvec) { struct mem_cgroup *memcg; @@ -1733,21 +1630,6 @@ static inline void memcg_kmem_uncharge_page(struct page *page, int order) __memcg_kmem_uncharge_page(page, order); } -static inline int memcg_kmem_charge(struct mem_cgroup *memcg, gfp_t gfp, - unsigned int nr_pages) -{ - if (memcg_kmem_enabled()) - return __memcg_kmem_charge(memcg, gfp, nr_pages); - return 0; -} - -static inline void memcg_kmem_uncharge(struct mem_cgroup *memcg, - unsigned int nr_pages) -{ - if (memcg_kmem_enabled()) - __memcg_kmem_uncharge(memcg, nr_pages); -} - /* * A helper for accessing memcg's kmem_id, used for getting * corresponding LRU lists. -- cgit v1.2.3 From 93bb59ca5e97a99922a01a43934c18dc7adfd968 Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Fri, 18 Dec 2020 14:01:38 -0800 Subject: mm, kvm: account kvm_vcpu_mmap to kmemcg A VCPU of a VM can allocate couple of pages which can be mmap'ed by the user space application. At the moment this memory is not charged to the memcg of the VMM. On a large machine running large number of VMs or small number of VMs having large number of VCPUs, this unaccounted memory can be very significant. So, charge this memory to the memcg of the VMM. Please note that lifetime of these allocations corresponds to the lifetime of the VMM. Link: https://lkml.kernel.org/r/20201106202923.2087414-1-shakeelb@google.com Signed-off-by: Shakeel Butt Acked-by: Roman Gushchin Acked-by: Paolo Bonzini Cc: Johannes Weiner Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/kvm/x86.c | 2 +- virt/kvm/coalesced_mmio.c | 2 +- virt/kvm/kvm_main.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e545a8a613b1..b05aec109242 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -9869,7 +9869,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) r = -ENOMEM; - page = alloc_page(GFP_KERNEL | __GFP_ZERO); + page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); if (!page) goto fail_free_lapic; vcpu->arch.pio_data = page_address(page); diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c index e2c197fd4f9d..62bd908ecd58 100644 --- a/virt/kvm/coalesced_mmio.c +++ b/virt/kvm/coalesced_mmio.c @@ -111,7 +111,7 @@ int kvm_coalesced_mmio_init(struct kvm *kvm) { struct page *page; - page = alloc_page(GFP_KERNEL | __GFP_ZERO); + page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); if (!page) return -ENOMEM; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 2541a17ff1c4..f69357a29688 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -3116,7 +3116,7 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) } BUILD_BUG_ON(sizeof(struct kvm_run) > PAGE_SIZE); - page = alloc_page(GFP_KERNEL | __GFP_ZERO); + page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); if (!page) { r = -ENOMEM; goto vcpu_free; -- cgit v1.2.3 From 9a1ac2288cf16f9406ca54ef221bfcf262393b15 Mon Sep 17 00:00:00 2001 From: Hui Su Date: Fri, 18 Dec 2020 14:01:41 -0800 Subject: mm/memcontrol:rewrite mem_cgroup_page_lruvec() mem_cgroup_page_lruvec() in memcontrol.c and mem_cgroup_lruvec() in memcontrol.h is very similar except for the param(page and memcg) which also can be convert to each other. So rewrite mem_cgroup_page_lruvec() with mem_cgroup_lruvec(). [alex.shi@linux.alibaba.com: add missed warning in mem_cgroup_lruvec] Link: https://lkml.kernel.org/r/94f17bb7-ec61-5b72-3555-fabeb5a4d73b@linux.alibaba.com [lstoakes@gmail.com: warn on missing memcg on mem_cgroup_page_lruvec()] Link: https://lkml.kernel.org/r/20201125112202.387009-1-lstoakes@gmail.com Link: https://lkml.kernel.org/r/20201108143731.GA74138@rlk Signed-off-by: Hui Su Signed-off-by: Alex Shi Signed-off-by: Lorenzo Stoakes Acked-by: Michal Hocko Acked-by: Johannes Weiner Reviewed-by: Shakeel Butt Acked-by: Roman Gushchin Cc: Vladimir Davydov Cc: Yafang Shao Cc: Chris Down Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 19 +++++++++++++++++-- mm/memcontrol.c | 37 ------------------------------------- 2 files changed, 17 insertions(+), 39 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 196441f5dc99..d827bd7f3bfe 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -620,9 +620,10 @@ mem_cgroup_nodeinfo(struct mem_cgroup *memcg, int nid) /** * mem_cgroup_lruvec - get the lru list vector for a memcg & node * @memcg: memcg of the wanted lruvec + * @pgdat: pglist_data * * Returns the lru list vector holding pages for a given @memcg & - * @node combination. This can be the node lruvec, if the memory + * @pgdat combination. This can be the node lruvec, if the memory * controller is disabled. */ static inline struct lruvec *mem_cgroup_lruvec(struct mem_cgroup *memcg, @@ -652,7 +653,21 @@ out: return lruvec; } -struct lruvec *mem_cgroup_page_lruvec(struct page *, struct pglist_data *); +/** + * mem_cgroup_page_lruvec - return lruvec for isolating/putting an LRU page + * @page: the page + * @pgdat: pgdat of the page + * + * This function relies on page->mem_cgroup being stable. + */ +static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page, + struct pglist_data *pgdat) +{ + struct mem_cgroup *memcg = page_memcg(page); + + VM_WARN_ON_ONCE_PAGE(!memcg, page); + return mem_cgroup_lruvec(memcg, pgdat); +} static inline bool lruvec_holds_page_lru_lock(struct page *page, struct lruvec *lruvec) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 7b9766789a27..605f671203ef 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1342,43 +1342,6 @@ void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page) } #endif -/** - * mem_cgroup_page_lruvec - return lruvec for isolating/putting an LRU page - * @page: the page - * @pgdat: pgdat of the page - * - * This function relies on page's memcg being stable - see the - * access rules in commit_charge(). - */ -struct lruvec *mem_cgroup_page_lruvec(struct page *page, struct pglist_data *pgdat) -{ - struct mem_cgroup_per_node *mz; - struct mem_cgroup *memcg; - struct lruvec *lruvec; - - if (mem_cgroup_disabled()) { - lruvec = &pgdat->__lruvec; - goto out; - } - - memcg = page_memcg(page); - VM_WARN_ON_ONCE_PAGE(!memcg, page); - if (!memcg) - memcg = root_mem_cgroup; - - mz = mem_cgroup_page_nodeinfo(memcg, page); - lruvec = &mz->lruvec; -out: - /* - * Since a node can be onlined after the mem_cgroup was created, - * we have to be prepared to initialize lruvec->zone here; - * and if offlined then reonlined, we need to reinitialize it. - */ - if (unlikely(lruvec->pgdat != pgdat)) - lruvec->pgdat = pgdat; - return lruvec; -} - /** * lock_page_lruvec - lock and return lruvec for a given page. * @page: the page -- cgit v1.2.3 From 289caf5d8f6c61c6d2b7fd752a7f483cd153f182 Mon Sep 17 00:00:00 2001 From: Soheil Hassas Yeganeh Date: Fri, 18 Dec 2020 14:01:44 -0800 Subject: epoll: check for events when removing a timed out thread from the wait queue Patch series "simplify ep_poll". This patch series is a followup based on the suggestions and feedback by Linus: https://lkml.kernel.org/r/CAHk-=wizk=OxUyQPbO8MS41w2Pag1kniUV5WdD5qWL-gq1kjDA@mail.gmail.com The first patch in the series is a fix for the epoll race in presence of timeouts, so that it can be cleanly backported to all affected stable kernels. The rest of the patch series simplify the ep_poll() implementation. Some of these simplifications result in minor performance enhancements as well. We have kept these changes under self tests and internal benchmarks for a few days, and there are minor (1-2%) performance enhancements as a result. This patch (of 8): After abc610e01c66 ("fs/epoll: avoid barrier after an epoll_wait(2) timeout"), we break out of the ep_poll loop upon timeout, without checking whether there is any new events available. Prior to that patch-series we always called ep_events_available() after exiting the loop. This can cause races and missed wakeups. For example, consider the following scenario reported by Guantao Liu: Suppose we have an eventfd added using EPOLLET to an epollfd. Thread 1: Sleeps for just below 5ms and then writes to an eventfd. Thread 2: Calls epoll_wait with a timeout of 5 ms. If it sees an event of the eventfd, it will write back on that fd. Thread 3: Calls epoll_wait with a negative timeout. Prior to abc610e01c66, it is guaranteed that Thread 3 will wake up either by Thread 1 or Thread 2. After abc610e01c66, Thread 3 can be blocked indefinitely if Thread 2 sees a timeout right before the write to the eventfd by Thread 1. Thread 2 will be woken up from schedule_hrtimeout_range and, with evail 0, it will not call ep_send_events(). To fix this issue: 1) Simplify the timed_out case as suggested by Linus. 2) while holding the lock, recheck whether the thread was woken up after its time out has reached. Note that (2) is different from Linus' original suggestion: It do not set "eavail = ep_events_available(ep)" to avoid unnecessary contention (when there are too many timed-out threads and a small number of events), as well as races mentioned in the discussion thread. This is the first patch in the series so that the backport to stable releases is straightforward. Link: https://lkml.kernel.org/r/20201106231635.3528496-1-soheil.kdev@gmail.com Link: https://lkml.kernel.org/r/CAHk-=wizk=OxUyQPbO8MS41w2Pag1kniUV5WdD5qWL-gq1kjDA@mail.gmail.com Link: https://lkml.kernel.org/r/20201106231635.3528496-2-soheil.kdev@gmail.com Fixes: abc610e01c66 ("fs/epoll: avoid barrier after an epoll_wait(2) timeout") Signed-off-by: Soheil Hassas Yeganeh Tested-by: Guantao Liu Suggested-by: Linus Torvalds Reported-by: Guantao Liu Reviewed-by: Eric Dumazet Reviewed-by: Willem de Bruijn Reviewed-by: Khazhismel Kumykov Reviewed-by: Davidlohr Bueso Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/eventpoll.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 10b81e69db74..935c66c809ba 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1817,23 +1817,30 @@ fetch_events: } write_unlock_irq(&ep->lock); - if (eavail || res) - break; + if (!eavail && !res) + timed_out = !schedule_hrtimeout_range(to, slack, + HRTIMER_MODE_ABS); - if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS)) { - timed_out = 1; - break; - } - - /* We were woken up, thus go and try to harvest some events */ + /* + * We were woken up, thus go and try to harvest some events. + * If timed out and still on the wait queue, recheck eavail + * carefully under lock, below. + */ eavail = 1; - } while (0); __set_current_state(TASK_RUNNING); if (!list_empty_careful(&wait.entry)) { write_lock_irq(&ep->lock); + /* + * If the thread timed out and is not on the wait queue, it + * means that the thread was woken up after its timeout expired + * before it could reacquire the lock. Thus, when wait.entry is + * empty, it needs to harvest events. + */ + if (timed_out) + eavail = list_empty(&wait.entry); __remove_wait_queue(&ep->wq, &wait); write_unlock_irq(&ep->lock); } -- cgit v1.2.3 From 2efdaf7660c408d57721cc6dacb0167f866cb451 Mon Sep 17 00:00:00 2001 From: Soheil Hassas Yeganeh Date: Fri, 18 Dec 2020 14:01:48 -0800 Subject: epoll: simplify signal handling Check signals before locking ep->lock, and immediately return -EINTR if there is any signal pending. This saves a few loads, stores, and branches from the hot path and simplifies the loop structure for follow up patches. Link: https://lkml.kernel.org/r/20201106231635.3528496-3-soheil.kdev@gmail.com Signed-off-by: Soheil Hassas Yeganeh Suggested-by: Linus Torvalds Reviewed-by: Eric Dumazet Reviewed-by: Willem de Bruijn Reviewed-by: Khazhismel Kumykov Cc: Guantao Liu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/eventpoll.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 935c66c809ba..4f460be723d0 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1733,7 +1733,7 @@ static inline struct timespec64 ep_set_mstimeout(long ms) static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, int maxevents, long timeout) { - int res = 0, eavail, timed_out = 0; + int res, eavail, timed_out = 0; u64 slack = 0; wait_queue_entry_t wait; ktime_t expires, *to = NULL; @@ -1780,6 +1780,9 @@ fetch_events: ep_reset_busy_poll_napi_id(ep); do { + if (signal_pending(current)) + return -EINTR; + /* * Internally init_wait() uses autoremove_wake_function(), * thus wait entry is removed from the wait queue on each @@ -1809,15 +1812,12 @@ fetch_events: * important. */ eavail = ep_events_available(ep); - if (!eavail) { - if (signal_pending(current)) - res = -EINTR; - else - __add_wait_queue_exclusive(&ep->wq, &wait); - } + if (!eavail) + __add_wait_queue_exclusive(&ep->wq, &wait); + write_unlock_irq(&ep->lock); - if (!eavail && !res) + if (!eavail) timed_out = !schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS); @@ -1853,14 +1853,14 @@ send_events: * finding more events available and fetching * repeatedly. */ - res = -EINTR; + return -EINTR; } /* * Try to transfer events to user space. In case we get 0 events and * there's still timeout left over, we go trying again in search of * more luck. */ - if (!res && eavail && + if (eavail && !(res = ep_send_events(ep, events, maxevents)) && !timed_out) goto fetch_events; -- cgit v1.2.3 From cccd29bf0823bdfeb087b7661b06856b1b73bced Mon Sep 17 00:00:00 2001 From: Soheil Hassas Yeganeh Date: Fri, 18 Dec 2020 14:01:51 -0800 Subject: epoll: pull fatal signal checks into ep_send_events() To simplify the code, pull in checking the fatal signals into ep_send_events(). ep_send_events() is called only from ep_poll(). Note that, previously, we were always checking fatal events, but it is checked only if eavail is true. This should be fine because the goal of that check is to quickly return from epoll_wait() when there is a pending fatal signal. Link: https://lkml.kernel.org/r/20201106231635.3528496-4-soheil.kdev@gmail.com Signed-off-by: Soheil Hassas Yeganeh Suggested-by: Willem de Bruijn Reviewed-by: Eric Dumazet Reviewed-by: Willem de Bruijn Reviewed-by: Khazhismel Kumykov Cc: Guantao Liu Cc: Linus Torvalds Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/eventpoll.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 4f460be723d0..b9d87745fae6 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1625,6 +1625,14 @@ static int ep_send_events(struct eventpoll *ep, poll_table pt; int res = 0; + /* + * Always short-circuit for fatal signals to allow threads to make a + * timely exit without the chance of finding more events available and + * fetching repeatedly. + */ + if (fatal_signal_pending(current)) + return -EINTR; + init_poll_funcptr(&pt, NULL); mutex_lock(&ep->mtx); @@ -1846,15 +1854,6 @@ fetch_events: } send_events: - if (fatal_signal_pending(current)) { - /* - * Always short-circuit for fatal signals to allow - * threads to make a timely exit without the chance of - * finding more events available and fetching - * repeatedly. - */ - return -EINTR; - } /* * Try to transfer events to user space. In case we get 0 events and * there's still timeout left over, we go trying again in search of -- cgit v1.2.3 From e411596d48b5b77632deb91afcbc3185b9b658cb Mon Sep 17 00:00:00 2001 From: Soheil Hassas Yeganeh Date: Fri, 18 Dec 2020 14:01:54 -0800 Subject: epoll: move eavail next to the list_empty_careful check This is a no-op change and simply to make the code more coherent. Link: https://lkml.kernel.org/r/20201106231635.3528496-5-soheil.kdev@gmail.com Signed-off-by: Soheil Hassas Yeganeh Suggested-by: Linus Torvalds Reviewed-by: Eric Dumazet Reviewed-by: Willem de Bruijn Reviewed-by: Khazhismel Kumykov Cc: Guantao Liu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/eventpoll.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index b9d87745fae6..d8eef2157587 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1828,6 +1828,7 @@ fetch_events: if (!eavail) timed_out = !schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS); + __set_current_state(TASK_RUNNING); /* * We were woken up, thus go and try to harvest some events. @@ -1837,8 +1838,6 @@ fetch_events: eavail = 1; } while (0); - __set_current_state(TASK_RUNNING); - if (!list_empty_careful(&wait.entry)) { write_lock_irq(&ep->lock); /* -- cgit v1.2.3 From 1493c47fb140ddd9e5c291f0c0da3fb03741c766 Mon Sep 17 00:00:00 2001 From: Soheil Hassas Yeganeh Date: Fri, 18 Dec 2020 14:01:57 -0800 Subject: epoll: simplify and optimize busy loop logic ep_events_available() is called multiple times around the busy loop logic, even though the logic is generally not used. ep_reset_busy_poll_napi_id() is similarly always called, even when busy loop is not used. Eliminate ep_reset_busy_poll_napi_id() and inline it inside ep_busy_loop(). Make ep_busy_loop() return whether there are any events available after the busy loop. This will eliminate unnecessary loads and branches, and simplifies the loop. Link: https://lkml.kernel.org/r/20201106231635.3528496-6-soheil.kdev@gmail.com Signed-off-by: Soheil Hassas Yeganeh Reviewed-by: Eric Dumazet Reviewed-by: Willem de Bruijn Reviewed-by: Khazhismel Kumykov Cc: Guantao Liu Cc: Linus Torvalds Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/eventpoll.c | 40 +++++++++++++++++----------------------- 1 file changed, 17 insertions(+), 23 deletions(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index d8eef2157587..03d0ac076a16 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -389,19 +389,24 @@ static bool ep_busy_loop_end(void *p, unsigned long start_time) * * we must do our busy polling with irqs enabled */ -static void ep_busy_loop(struct eventpoll *ep, int nonblock) +static bool ep_busy_loop(struct eventpoll *ep, int nonblock) { unsigned int napi_id = READ_ONCE(ep->napi_id); - if ((napi_id >= MIN_NAPI_ID) && net_busy_loop_on()) + if ((napi_id >= MIN_NAPI_ID) && net_busy_loop_on()) { napi_busy_loop(napi_id, nonblock ? NULL : ep_busy_loop_end, ep, false, BUSY_POLL_BUDGET); -} - -static inline void ep_reset_busy_poll_napi_id(struct eventpoll *ep) -{ - if (ep->napi_id) + if (ep_events_available(ep)) + return true; + /* + * Busy poll timed out. Drop NAPI ID for now, we can add + * it back in when we have moved a socket with a valid NAPI + * ID onto the ready list. + */ ep->napi_id = 0; + return false; + } + return false; } /* @@ -441,12 +446,9 @@ static inline void ep_set_busy_poll_napi_id(struct epitem *epi) #else -static inline void ep_busy_loop(struct eventpoll *ep, int nonblock) -{ -} - -static inline void ep_reset_busy_poll_napi_id(struct eventpoll *ep) +static inline bool ep_busy_loop(struct eventpoll *ep, int nonblock) { + return false; } static inline void ep_set_busy_poll_napi_id(struct epitem *epi) @@ -1772,21 +1774,13 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, } fetch_events: - - if (!ep_events_available(ep)) - ep_busy_loop(ep, timed_out); - eavail = ep_events_available(ep); + if (!eavail) + eavail = ep_busy_loop(ep, timed_out); + if (eavail) goto send_events; - /* - * Busy poll timed out. Drop NAPI ID for now, we can add - * it back in when we have moved a socket with a valid NAPI - * ID onto the ready list. - */ - ep_reset_busy_poll_napi_id(ep); - do { if (signal_pending(current)) return -EINTR; -- cgit v1.2.3 From e8c85328b1e88f4ee7f84a1fdbff2f2c7965e026 Mon Sep 17 00:00:00 2001 From: Soheil Hassas Yeganeh Date: Fri, 18 Dec 2020 14:02:00 -0800 Subject: epoll: pull all code between fetch_events and send_event into the loop This is a no-op change which simplifies the follow up patches. Link: https://lkml.kernel.org/r/20201106231635.3528496-7-soheil.kdev@gmail.com Signed-off-by: Soheil Hassas Yeganeh Suggested-by: Linus Torvalds Reviewed-by: Eric Dumazet Reviewed-by: Willem de Bruijn Reviewed-by: Khazhismel Kumykov Cc: Guantao Liu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/eventpoll.c | 41 +++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 03d0ac076a16..63a7a6e13dfc 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1774,14 +1774,14 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, } fetch_events: - eavail = ep_events_available(ep); - if (!eavail) - eavail = ep_busy_loop(ep, timed_out); + do { + eavail = ep_events_available(ep); + if (!eavail) + eavail = ep_busy_loop(ep, timed_out); - if (eavail) - goto send_events; + if (eavail) + goto send_events; - do { if (signal_pending(current)) return -EINTR; @@ -1830,21 +1830,22 @@ fetch_events: * carefully under lock, below. */ eavail = 1; - } while (0); - if (!list_empty_careful(&wait.entry)) { - write_lock_irq(&ep->lock); - /* - * If the thread timed out and is not on the wait queue, it - * means that the thread was woken up after its timeout expired - * before it could reacquire the lock. Thus, when wait.entry is - * empty, it needs to harvest events. - */ - if (timed_out) - eavail = list_empty(&wait.entry); - __remove_wait_queue(&ep->wq, &wait); - write_unlock_irq(&ep->lock); - } + if (!list_empty_careful(&wait.entry)) { + write_lock_irq(&ep->lock); + /* + * If the thread timed out and is not on the wait queue, + * it means that the thread was woken up after its + * timeout expired before it could reacquire the lock. + * Thus, when wait.entry is empty, it needs to harvest + * events. + */ + if (timed_out) + eavail = list_empty(&wait.entry); + __remove_wait_queue(&ep->wq, &wait); + write_unlock_irq(&ep->lock); + } + } while (0); send_events: /* -- cgit v1.2.3 From 00b27634bc471e0198f93d48694171121af2e159 Mon Sep 17 00:00:00 2001 From: Soheil Hassas Yeganeh Date: Fri, 18 Dec 2020 14:02:03 -0800 Subject: epoll: replace gotos with a proper loop The existing loop is pointless, and the labels make it really hard to follow the structure. Replace that control structure with a simple loop that returns when there are new events, there is a signal, or the thread has timed out. Link: https://lkml.kernel.org/r/20201106231635.3528496-8-soheil.kdev@gmail.com Signed-off-by: Soheil Hassas Yeganeh Suggested-by: Linus Torvalds Reviewed-by: Eric Dumazet Reviewed-by: Willem de Bruijn Reviewed-by: Khazhismel Kumykov Cc: Guantao Liu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/eventpoll.c | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 63a7a6e13dfc..1e0030cb805b 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1743,7 +1743,7 @@ static inline struct timespec64 ep_set_mstimeout(long ms) static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, int maxevents, long timeout) { - int res, eavail, timed_out = 0; + int res, eavail = 0, timed_out = 0; u64 slack = 0; wait_queue_entry_t wait; ktime_t expires, *to = NULL; @@ -1769,18 +1769,30 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, write_lock_irq(&ep->lock); eavail = ep_events_available(ep); write_unlock_irq(&ep->lock); - - goto send_events; } -fetch_events: - do { + while (1) { + if (eavail) { + /* + * Try to transfer events to user space. In case we get + * 0 events and there's still timeout left over, we go + * trying again in search of more luck. + */ + res = ep_send_events(ep, events, maxevents); + if (res) + return res; + } + + if (timed_out) + return 0; + eavail = ep_events_available(ep); - if (!eavail) - eavail = ep_busy_loop(ep, timed_out); + if (eavail) + continue; + eavail = ep_busy_loop(ep, timed_out); if (eavail) - goto send_events; + continue; if (signal_pending(current)) return -EINTR; @@ -1845,19 +1857,7 @@ fetch_events: __remove_wait_queue(&ep->wq, &wait); write_unlock_irq(&ep->lock); } - } while (0); - -send_events: - /* - * Try to transfer events to user space. In case we get 0 events and - * there's still timeout left over, we go trying again in search of - * more luck. - */ - if (eavail && - !(res = ep_send_events(ep, events, maxevents)) && !timed_out) - goto fetch_events; - - return res; + } } /** -- cgit v1.2.3 From e59d3c64cba69b57263dff1d62838bc6a819ae37 Mon Sep 17 00:00:00 2001 From: Soheil Hassas Yeganeh Date: Fri, 18 Dec 2020 14:02:06 -0800 Subject: epoll: eliminate unnecessary lock for zero timeout We call ep_events_available() under lock when timeout is 0, and then call it without locks in the loop for the other cases. Instead, call ep_events_available() without lock for all cases. For non-zero timeouts, we will recheck after adding the thread to the wait queue. For zero timeout cases, by definition, user is opportunistically polling and will have to call epoll_wait again in the future. Note that this lock was kept in c5a282e9635e9 because the whole loop was historically under lock. This patch results in a 1% CPU/RPC reduction in RPC benchmarks. Link: https://lkml.kernel.org/r/20201106231635.3528496-9-soheil.kdev@gmail.com Signed-off-by: Soheil Hassas Yeganeh Suggested-by: Eric Dumazet Reviewed-by: Eric Dumazet Reviewed-by: Willem de Bruijn Reviewed-by: Khazhismel Kumykov Cc: Guantao Liu Cc: Linus Torvalds Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/eventpoll.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 1e0030cb805b..9efb553b2b2b 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1743,7 +1743,7 @@ static inline struct timespec64 ep_set_mstimeout(long ms) static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, int maxevents, long timeout) { - int res, eavail = 0, timed_out = 0; + int res, eavail, timed_out = 0; u64 slack = 0; wait_queue_entry_t wait; ktime_t expires, *to = NULL; @@ -1759,18 +1759,21 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, } else if (timeout == 0) { /* * Avoid the unnecessary trip to the wait queue loop, if the - * caller specified a non blocking operation. We still need - * lock because we could race and not see an epi being added - * to the ready list while in irq callback. Thus incorrectly - * returning 0 back to userspace. + * caller specified a non blocking operation. */ timed_out = 1; - - write_lock_irq(&ep->lock); - eavail = ep_events_available(ep); - write_unlock_irq(&ep->lock); } + /* + * This call is racy: We may or may not see events that are being added + * to the ready list under the lock (e.g., in IRQ callbacks). For, cases + * with a non-zero timeout, this thread will check the ready list under + * lock and will added to the wait queue. For, cases with a zero + * timeout, the user by definition should not care and will have to + * recheck again. + */ + eavail = ep_events_available(ep); + while (1) { if (eavail) { /* @@ -1786,10 +1789,6 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, if (timed_out) return 0; - eavail = ep_events_available(ep); - if (eavail) - continue; - eavail = ep_busy_loop(ep, timed_out); if (eavail) continue; -- cgit v1.2.3 From 7cdf7c20e97141eadb05121cc521c8eff47e7d93 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Fri, 18 Dec 2020 14:05:35 -0800 Subject: epoll: convert internal api to timespec64 Patch series "add epoll_pwait2 syscall", v4. Enable nanosecond timeouts for epoll. Analogous to pselect and ppoll, introduce an epoll_wait syscall variant that takes a struct timespec instead of int timeout. This patch (of 4): Make epoll more consistent with select/poll: pass along the timeout as timespec64 pointer. In anticipation of additional changes affecting all three polling mechanisms: - add epoll_pwait2 syscall with timespec semantics, and share poll_select_set_timeout implementation. - compute slack before conversion to absolute time, to save one ktime_get_ts64 call. Link: https://lkml.kernel.org/r/20201121144401.3727659-1-willemdebruijn.kernel@gmail.com Link: https://lkml.kernel.org/r/20201121144401.3727659-2-willemdebruijn.kernel@gmail.com Signed-off-by: Willem de Bruijn Cc: Al Viro Cc: Matthew Wilcox (Oracle) Cc: Arnd Bergmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/eventpoll.c | 57 +++++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 37 insertions(+), 20 deletions(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 9efb553b2b2b..1ba406615991 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1712,15 +1712,25 @@ static int ep_send_events(struct eventpoll *ep, return res; } -static inline struct timespec64 ep_set_mstimeout(long ms) +static struct timespec64 *ep_timeout_to_timespec(struct timespec64 *to, long ms) { - struct timespec64 now, ts = { - .tv_sec = ms / MSEC_PER_SEC, - .tv_nsec = NSEC_PER_MSEC * (ms % MSEC_PER_SEC), - }; + struct timespec64 now; + + if (ms < 0) + return NULL; + + if (!ms) { + to->tv_sec = 0; + to->tv_nsec = 0; + return to; + } + + to->tv_sec = ms / MSEC_PER_SEC; + to->tv_nsec = NSEC_PER_MSEC * (ms % MSEC_PER_SEC); ktime_get_ts64(&now); - return timespec64_add_safe(now, ts); + *to = timespec64_add_safe(now, *to); + return to; } /** @@ -1732,8 +1742,8 @@ static inline struct timespec64 ep_set_mstimeout(long ms) * stored. * @maxevents: Size (in terms of number of events) of the caller event buffer. * @timeout: Maximum timeout for the ready events fetch operation, in - * milliseconds. If the @timeout is zero, the function will not block, - * while if the @timeout is less than zero, the function will block + * timespec. If the timeout is zero, the function will not block, + * while if the @timeout ptr is NULL, the function will block * until at least one event has been retrieved (or an error * occurred). * @@ -1741,7 +1751,7 @@ static inline struct timespec64 ep_set_mstimeout(long ms) * error code, in case of error. */ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, - int maxevents, long timeout) + int maxevents, struct timespec64 *timeout) { int res, eavail, timed_out = 0; u64 slack = 0; @@ -1750,13 +1760,11 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, lockdep_assert_irqs_enabled(); - if (timeout > 0) { - struct timespec64 end_time = ep_set_mstimeout(timeout); - - slack = select_estimate_accuracy(&end_time); + if (timeout && (timeout->tv_sec | timeout->tv_nsec)) { + slack = select_estimate_accuracy(timeout); to = &expires; - *to = timespec64_to_ktime(end_time); - } else if (timeout == 0) { + *to = timespec64_to_ktime(*timeout); + } else if (timeout) { /* * Avoid the unnecessary trip to the wait queue loop, if the * caller specified a non blocking operation. @@ -2175,7 +2183,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, * part of the user space epoll_wait(2). */ static int do_epoll_wait(int epfd, struct epoll_event __user *events, - int maxevents, int timeout) + int maxevents, struct timespec64 *to) { int error; struct fd f; @@ -2209,7 +2217,7 @@ static int do_epoll_wait(int epfd, struct epoll_event __user *events, ep = f.file->private_data; /* Time to fish for events ... */ - error = ep_poll(ep, events, maxevents, timeout); + error = ep_poll(ep, events, maxevents, to); error_fput: fdput(f); @@ -2219,7 +2227,10 @@ error_fput: SYSCALL_DEFINE4(epoll_wait, int, epfd, struct epoll_event __user *, events, int, maxevents, int, timeout) { - return do_epoll_wait(epfd, events, maxevents, timeout); + struct timespec64 to; + + return do_epoll_wait(epfd, events, maxevents, + ep_timeout_to_timespec(&to, timeout)); } /* @@ -2230,6 +2241,7 @@ SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events, int, maxevents, int, timeout, const sigset_t __user *, sigmask, size_t, sigsetsize) { + struct timespec64 to; int error; /* @@ -2240,7 +2252,9 @@ SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events, if (error) return error; - error = do_epoll_wait(epfd, events, maxevents, timeout); + error = do_epoll_wait(epfd, events, maxevents, + ep_timeout_to_timespec(&to, timeout)); + restore_saved_sigmask_unless(error == -EINTR); return error; @@ -2253,6 +2267,7 @@ COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd, const compat_sigset_t __user *, sigmask, compat_size_t, sigsetsize) { + struct timespec64 to; long err; /* @@ -2263,7 +2278,9 @@ COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd, if (err) return err; - err = do_epoll_wait(epfd, events, maxevents, timeout); + err = do_epoll_wait(epfd, events, maxevents, + ep_timeout_to_timespec(&to, timeout)); + restore_saved_sigmask_unless(err == -EINTR); return err; -- cgit v1.2.3 From 58169a52ebc9a733aeb5bea857bc5daa71a301bb Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Fri, 18 Dec 2020 14:05:38 -0800 Subject: epoll: add syscall epoll_pwait2 Add syscall epoll_pwait2, an epoll_wait variant with nsec resolution that replaces int timeout with struct timespec. It is equivalent otherwise. int epoll_pwait2(int fd, struct epoll_event *events, int maxevents, const struct timespec *timeout, const sigset_t *sigset); The underlying hrtimer is already programmed with nsec resolution. pselect and ppoll also set nsec resolution timeout with timespec. The sigset_t in epoll_pwait has a compat variant. epoll_pwait2 needs the same. For timespec, only support this new interface on 2038 aware platforms that define __kernel_timespec_t. So no CONFIG_COMPAT_32BIT_TIME. Link: https://lkml.kernel.org/r/20201121144401.3727659-3-willemdebruijn.kernel@gmail.com Signed-off-by: Willem de Bruijn Cc: Al Viro Cc: Arnd Bergmann Cc: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/eventpoll.c | 87 ++++++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 73 insertions(+), 14 deletions(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 1ba406615991..a829af074eb5 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -2237,11 +2237,10 @@ SYSCALL_DEFINE4(epoll_wait, int, epfd, struct epoll_event __user *, events, * Implement the event wait interface for the eventpoll file. It is the kernel * part of the user space epoll_pwait(2). */ -SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events, - int, maxevents, int, timeout, const sigset_t __user *, sigmask, - size_t, sigsetsize) +static int do_epoll_pwait(int epfd, struct epoll_event __user *events, + int maxevents, struct timespec64 *to, + const sigset_t __user *sigmask, size_t sigsetsize) { - struct timespec64 to; int error; /* @@ -2252,22 +2251,48 @@ SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events, if (error) return error; - error = do_epoll_wait(epfd, events, maxevents, - ep_timeout_to_timespec(&to, timeout)); + error = do_epoll_wait(epfd, events, maxevents, to); restore_saved_sigmask_unless(error == -EINTR); return error; } -#ifdef CONFIG_COMPAT -COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd, - struct epoll_event __user *, events, - int, maxevents, int, timeout, - const compat_sigset_t __user *, sigmask, - compat_size_t, sigsetsize) +SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events, + int, maxevents, int, timeout, const sigset_t __user *, sigmask, + size_t, sigsetsize) { struct timespec64 to; + + return do_epoll_pwait(epfd, events, maxevents, + ep_timeout_to_timespec(&to, timeout), + sigmask, sigsetsize); +} + +SYSCALL_DEFINE6(epoll_pwait2, int, epfd, struct epoll_event __user *, events, + int, maxevents, const struct __kernel_timespec __user *, timeout, + const sigset_t __user *, sigmask, size_t, sigsetsize) +{ + struct timespec64 ts, *to = NULL; + + if (timeout) { + if (get_timespec64(&ts, timeout)) + return -EFAULT; + to = &ts; + if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec)) + return -EINVAL; + } + + return do_epoll_pwait(epfd, events, maxevents, to, + sigmask, sigsetsize); +} + +#ifdef CONFIG_COMPAT +static int do_compat_epoll_pwait(int epfd, struct epoll_event __user *events, + int maxevents, struct timespec64 *timeout, + const compat_sigset_t __user *sigmask, + compat_size_t sigsetsize) +{ long err; /* @@ -2278,13 +2303,47 @@ COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd, if (err) return err; - err = do_epoll_wait(epfd, events, maxevents, - ep_timeout_to_timespec(&to, timeout)); + err = do_epoll_wait(epfd, events, maxevents, timeout); restore_saved_sigmask_unless(err == -EINTR); return err; } + +COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd, + struct epoll_event __user *, events, + int, maxevents, int, timeout, + const compat_sigset_t __user *, sigmask, + compat_size_t, sigsetsize) +{ + struct timespec64 to; + + return do_compat_epoll_pwait(epfd, events, maxevents, + ep_timeout_to_timespec(&to, timeout), + sigmask, sigsetsize); +} + +COMPAT_SYSCALL_DEFINE6(epoll_pwait2, int, epfd, + struct epoll_event __user *, events, + int, maxevents, + const struct __kernel_timespec __user *, timeout, + const compat_sigset_t __user *, sigmask, + compat_size_t, sigsetsize) +{ + struct timespec64 ts, *to = NULL; + + if (timeout) { + if (get_timespec64(&ts, timeout)) + return -EFAULT; + to = &ts; + if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec)) + return -EINVAL; + } + + return do_compat_epoll_pwait(epfd, events, maxevents, to, + sigmask, sigsetsize); +} + #endif static int __init eventpoll_init(void) -- cgit v1.2.3 From b0a0c2615f6f199a656ed8549d7dce625d77aa77 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Fri, 18 Dec 2020 14:05:41 -0800 Subject: epoll: wire up syscall epoll_pwait2 Split off from prev patch in the series that implements the syscall. Link: https://lkml.kernel.org/r/20201121144401.3727659-4-willemdebruijn.kernel@gmail.com Signed-off-by: Willem de Bruijn Cc: Al Viro Cc: Arnd Bergmann Cc: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/kernel/syscalls/syscall.tbl | 1 + arch/arm/tools/syscall.tbl | 1 + arch/arm64/include/asm/unistd.h | 2 +- arch/arm64/include/asm/unistd32.h | 2 ++ arch/ia64/kernel/syscalls/syscall.tbl | 1 + arch/m68k/kernel/syscalls/syscall.tbl | 1 + arch/microblaze/kernel/syscalls/syscall.tbl | 1 + arch/mips/kernel/syscalls/syscall_n32.tbl | 1 + arch/mips/kernel/syscalls/syscall_n64.tbl | 1 + arch/mips/kernel/syscalls/syscall_o32.tbl | 1 + arch/parisc/kernel/syscalls/syscall.tbl | 1 + arch/powerpc/kernel/syscalls/syscall.tbl | 1 + arch/s390/kernel/syscalls/syscall.tbl | 1 + arch/sh/kernel/syscalls/syscall.tbl | 1 + arch/sparc/kernel/syscalls/syscall.tbl | 1 + arch/x86/entry/syscalls/syscall_32.tbl | 1 + arch/x86/entry/syscalls/syscall_64.tbl | 1 + arch/xtensa/kernel/syscalls/syscall.tbl | 1 + include/linux/compat.h | 6 ++++++ include/linux/syscalls.h | 5 +++++ include/uapi/asm-generic/unistd.h | 4 +++- kernel/sys_ni.c | 2 ++ 22 files changed, 35 insertions(+), 2 deletions(-) diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl index ee7b01bb7346..a6617067dbe6 100644 --- a/arch/alpha/kernel/syscalls/syscall.tbl +++ b/arch/alpha/kernel/syscalls/syscall.tbl @@ -480,3 +480,4 @@ 548 common pidfd_getfd sys_pidfd_getfd 549 common faccessat2 sys_faccessat2 550 common process_madvise sys_process_madvise +551 common epoll_pwait2 sys_epoll_pwait2 diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index d056a548358e..20e1170e2e0a 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -454,3 +454,4 @@ 438 common pidfd_getfd sys_pidfd_getfd 439 common faccessat2 sys_faccessat2 440 common process_madvise sys_process_madvise +441 common epoll_pwait2 sys_epoll_pwait2 diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index b3b2019f8d16..86a9d7b3eabe 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -38,7 +38,7 @@ #define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5) #define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800) -#define __NR_compat_syscalls 441 +#define __NR_compat_syscalls 442 #endif #define __ARCH_WANT_SYS_CLONE diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index 107f08e03b9f..f4bca2b90218 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -889,6 +889,8 @@ __SYSCALL(__NR_pidfd_getfd, sys_pidfd_getfd) __SYSCALL(__NR_faccessat2, sys_faccessat2) #define __NR_process_madvise 440 __SYSCALL(__NR_process_madvise, sys_process_madvise) +#define __NR_epoll_pwait2 441 +__SYSCALL(__NR_epoll_pwait2, sys_epoll_pwait2) /* * Please add new compat syscalls above this comment and update diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl index b96ed8b8a508..bfc00f2bd437 100644 --- a/arch/ia64/kernel/syscalls/syscall.tbl +++ b/arch/ia64/kernel/syscalls/syscall.tbl @@ -361,3 +361,4 @@ 438 common pidfd_getfd sys_pidfd_getfd 439 common faccessat2 sys_faccessat2 440 common process_madvise sys_process_madvise +441 common epoll_pwait2 sys_epoll_pwait2 diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl index 625fb6d32842..7fe4e45c864c 100644 --- a/arch/m68k/kernel/syscalls/syscall.tbl +++ b/arch/m68k/kernel/syscalls/syscall.tbl @@ -440,3 +440,4 @@ 438 common pidfd_getfd sys_pidfd_getfd 439 common faccessat2 sys_faccessat2 440 common process_madvise sys_process_madvise +441 common epoll_pwait2 sys_epoll_pwait2 diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl index aae729c95cf9..a522adf194ab 100644 --- a/arch/microblaze/kernel/syscalls/syscall.tbl +++ b/arch/microblaze/kernel/syscalls/syscall.tbl @@ -446,3 +446,4 @@ 438 common pidfd_getfd sys_pidfd_getfd 439 common faccessat2 sys_faccessat2 440 common process_madvise sys_process_madvise +441 common epoll_pwait2 sys_epoll_pwait2 diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl index 32817c954435..ad9c3dd0ab1f 100644 --- a/arch/mips/kernel/syscalls/syscall_n32.tbl +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl @@ -379,3 +379,4 @@ 438 n32 pidfd_getfd sys_pidfd_getfd 439 n32 faccessat2 sys_faccessat2 440 n32 process_madvise sys_process_madvise +441 n32 epoll_pwait2 sys_epoll_pwait2 diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl index 9e4ea3c31b1c..91649690b52f 100644 --- a/arch/mips/kernel/syscalls/syscall_n64.tbl +++ b/arch/mips/kernel/syscalls/syscall_n64.tbl @@ -355,3 +355,4 @@ 438 n64 pidfd_getfd sys_pidfd_getfd 439 n64 faccessat2 sys_faccessat2 440 n64 process_madvise sys_process_madvise +441 n64 epoll_pwait2 sys_epoll_pwait2 diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl index 29f5f28cf5ce..4bad0c40aed6 100644 --- a/arch/mips/kernel/syscalls/syscall_o32.tbl +++ b/arch/mips/kernel/syscalls/syscall_o32.tbl @@ -428,3 +428,4 @@ 438 o32 pidfd_getfd sys_pidfd_getfd 439 o32 faccessat2 sys_faccessat2 440 o32 process_madvise sys_process_madvise +441 o32 epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2 diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl index f375ea528e59..6bcc31966b44 100644 --- a/arch/parisc/kernel/syscalls/syscall.tbl +++ b/arch/parisc/kernel/syscalls/syscall.tbl @@ -438,3 +438,4 @@ 438 common pidfd_getfd sys_pidfd_getfd 439 common faccessat2 sys_faccessat2 440 common process_madvise sys_process_madvise +441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2 diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index 1275daec7fec..f744eb5cba88 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -530,3 +530,4 @@ 438 common pidfd_getfd sys_pidfd_getfd 439 common faccessat2 sys_faccessat2 440 common process_madvise sys_process_madvise +441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2 diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index 28c168000483..14f6525886a8 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -443,3 +443,4 @@ 438 common pidfd_getfd sys_pidfd_getfd sys_pidfd_getfd 439 common faccessat2 sys_faccessat2 sys_faccessat2 440 common process_madvise sys_process_madvise sys_process_madvise +441 common epoll_pwait2 sys_epoll_pwait2 sys_epoll_pwait2 diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl index 783738448ff5..9df40ac0ebc0 100644 --- a/arch/sh/kernel/syscalls/syscall.tbl +++ b/arch/sh/kernel/syscalls/syscall.tbl @@ -443,3 +443,4 @@ 438 common pidfd_getfd sys_pidfd_getfd 439 common faccessat2 sys_faccessat2 440 common process_madvise sys_process_madvise +441 common epoll_pwait2 sys_epoll_pwait2 diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl index 78160260991b..c7da4c3271e6 100644 --- a/arch/sparc/kernel/syscalls/syscall.tbl +++ b/arch/sparc/kernel/syscalls/syscall.tbl @@ -486,3 +486,4 @@ 438 common pidfd_getfd sys_pidfd_getfd 439 common faccessat2 sys_faccessat2 440 common process_madvise sys_process_madvise +441 common epoll_pwait2 sys_epoll_pwait2 diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 0d0667a9fbd7..874aeacde2dd 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -445,3 +445,4 @@ 438 i386 pidfd_getfd sys_pidfd_getfd 439 i386 faccessat2 sys_faccessat2 440 i386 process_madvise sys_process_madvise +441 i386 epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2 diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index 379819244b91..78672124d28b 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -362,6 +362,7 @@ 438 common pidfd_getfd sys_pidfd_getfd 439 common faccessat2 sys_faccessat2 440 common process_madvise sys_process_madvise +441 common epoll_pwait2 sys_epoll_pwait2 # # Due to a historical design error, certain syscalls are numbered differently diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl index b070f272995d..46116a28eeed 100644 --- a/arch/xtensa/kernel/syscalls/syscall.tbl +++ b/arch/xtensa/kernel/syscalls/syscall.tbl @@ -411,3 +411,4 @@ 438 common pidfd_getfd sys_pidfd_getfd 439 common faccessat2 sys_faccessat2 440 common process_madvise sys_process_madvise +441 common epoll_pwait2 sys_epoll_pwait2 diff --git a/include/linux/compat.h b/include/linux/compat.h index 400c0941c8af..6e65be753603 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -537,6 +537,12 @@ asmlinkage long compat_sys_epoll_pwait(int epfd, int maxevents, int timeout, const compat_sigset_t __user *sigmask, compat_size_t sigsetsize); +asmlinkage long compat_sys_epoll_pwait2(int epfd, + struct epoll_event __user *events, + int maxevents, + const struct __kernel_timespec __user *timeout, + const compat_sigset_t __user *sigmask, + compat_size_t sigsetsize); /* fs/fcntl.c */ asmlinkage long compat_sys_fcntl(unsigned int fd, unsigned int cmd, diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index df0c3c74609e..f3929aff39cf 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -362,6 +362,11 @@ asmlinkage long sys_epoll_pwait(int epfd, struct epoll_event __user *events, int maxevents, int timeout, const sigset_t __user *sigmask, size_t sigsetsize); +asmlinkage long sys_epoll_pwait2(int epfd, struct epoll_event __user *events, + int maxevents, + const struct __kernel_timespec __user *timeout, + const sigset_t __user *sigmask, + size_t sigsetsize); /* fs/fcntl.c */ asmlinkage long sys_dup(unsigned int fildes); diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index fc48c64700eb..728752917785 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -859,9 +859,11 @@ __SYSCALL(__NR_pidfd_getfd, sys_pidfd_getfd) __SYSCALL(__NR_faccessat2, sys_faccessat2) #define __NR_process_madvise 440 __SYSCALL(__NR_process_madvise, sys_process_madvise) +#define __NR_epoll_pwait2 441 +__SC_COMP(__NR_epoll_pwait2, sys_epoll_pwait2, compat_sys_epoll_pwait2) #undef __NR_syscalls -#define __NR_syscalls 441 +#define __NR_syscalls 442 /* * 32 bit systems traditionally used different diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index f27ac94d5fa7..19aa806890d5 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -68,6 +68,8 @@ COND_SYSCALL(epoll_create1); COND_SYSCALL(epoll_ctl); COND_SYSCALL(epoll_pwait); COND_SYSCALL_COMPAT(epoll_pwait); +COND_SYSCALL(epoll_pwait2); +COND_SYSCALL_COMPAT(epoll_pwait2); /* fs/fcntl.c */ -- cgit v1.2.3 From e9ce39b5b390e0e5944a46328cb0a18d132de532 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Fri, 18 Dec 2020 14:05:44 -0800 Subject: selftests/filesystems: expand epoll with epoll_pwait2 Code coverage for the epoll_pwait2 syscall. epoll62: Repeat basic test epoll1, but exercising the new syscall. epoll63: Pass a timespec and exercise the timeout wakeup path. Link: https://lkml.kernel.org/r/20201121144401.3727659-5-willemdebruijn.kernel@gmail.com Signed-off-by: Willem de Bruijn Cc: Al Viro Cc: Arnd Bergmann Cc: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- .../filesystems/epoll/epoll_wakeup_test.c | 72 ++++++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c b/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c index 8f82f99f7748..ad7fabd575f9 100644 --- a/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c +++ b/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c @@ -1,6 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 #define _GNU_SOURCE +#include +#include #include #include #include @@ -21,6 +23,19 @@ struct epoll_mtcontext pthread_t waiter; }; +#ifndef __NR_epoll_pwait2 +#define __NR_epoll_pwait2 -1 +#endif + +static inline int sys_epoll_pwait2(int fd, struct epoll_event *events, + int maxevents, + const struct __kernel_timespec *timeout, + const sigset_t *sigset, size_t sigsetsize) +{ + return syscall(__NR_epoll_pwait2, fd, events, maxevents, timeout, + sigset, sigsetsize); +} + static void signal_handler(int signum) { } @@ -3377,4 +3392,61 @@ TEST(epoll61) close(ctx.evfd); } +/* Equivalent to basic test epoll1, but exercising epoll_pwait2. */ +TEST(epoll62) +{ + int efd; + int sfd[2]; + struct epoll_event e; + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sfd), 0); + + efd = epoll_create(1); + ASSERT_GE(efd, 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(efd, EPOLL_CTL_ADD, sfd[0], &e), 0); + + ASSERT_EQ(write(sfd[1], "w", 1), 1); + + EXPECT_EQ(sys_epoll_pwait2(efd, &e, 1, NULL, NULL, 0), 1); + EXPECT_EQ(sys_epoll_pwait2(efd, &e, 1, NULL, NULL, 0), 1); + + close(efd); + close(sfd[0]); + close(sfd[1]); +} + +/* Epoll_pwait2 basic timeout test. */ +TEST(epoll63) +{ + const int cfg_delay_ms = 10; + unsigned long long tdiff; + struct __kernel_timespec ts; + int efd; + int sfd[2]; + struct epoll_event e; + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sfd), 0); + + efd = epoll_create(1); + ASSERT_GE(efd, 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(efd, EPOLL_CTL_ADD, sfd[0], &e), 0); + + ts.tv_sec = 0; + ts.tv_nsec = cfg_delay_ms * 1000 * 1000; + + tdiff = msecs(); + EXPECT_EQ(sys_epoll_pwait2(efd, &e, 1, &ts, NULL, 0), 0); + tdiff = msecs() - tdiff; + + EXPECT_GE(tdiff, cfg_delay_ms); + + close(efd); + close(sfd[0]); + close(sfd[1]); +} + TEST_HARNESS_MAIN -- cgit v1.2.3 From 01ab1ede9190b9fafa88b1c97129c034fa9aa437 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 18 Dec 2020 14:05:32 -0800 Subject: mm/Kconfig: fix spelling mistake "whats" -> "what's" There is a spelling mistake in the Kconfig help text. Fix it. Link: https://lkml.kernel.org/r/20201217172717.58203-1-colin.king@canonical.com Signed-off-by: Colin Ian King Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/Kconfig b/mm/Kconfig index 4275c25b5d8a..f730605b8dcf 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -713,7 +713,7 @@ config ZSMALLOC_STAT select DEBUG_FS help This option enables code in the zsmalloc to collect various - statistics about whats happening in zsmalloc and exports that + statistics about what's happening in zsmalloc and exports that information to userspace via debugfs. If unsure, say N. -- cgit v1.2.3