From 6d7225f0cc1a1fc32cf5dd01b4ab4b8a34c7cdb4 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Wed, 3 May 2017 14:53:05 -0700 Subject: lockdep: teach lockdep about memalloc_noio_save Patch series "scope GFP_NOFS api", v5. This patch (of 7): Commit 21caf2fc1931 ("mm: teach mm by current context info to not do I/O during memory allocation") added the memalloc_noio_(save|restore) functions to enable people to modify the MM behavior by disabling I/O during memory allocation. This was further extended in commit 934f3072c17c ("mm: clear __GFP_FS when PF_MEMALLOC_NOIO is set"). memalloc_noio_* functions prevent allocation paths recursing back into the filesystem without explicitly changing the flags for every allocation site. However, lockdep hasn't been keeping up with the changes and it entirely misses handling the memalloc_noio adjustments. Instead, it is left to the callers of __lockdep_trace_alloc to call the function after they have shaven the respective GFP flags which can lead to false positives: ================================= [ INFO: inconsistent lock state ] 4.10.0-nbor #134 Not tainted --------------------------------- inconsistent {IN-RECLAIM_FS-W} -> {RECLAIM_FS-ON-W} usage. fsstress/3365 [HC0[0]:SC0[0]:HE1:SE1] takes: (&xfs_nondir_ilock_class){++++?.}, at: xfs_ilock+0x141/0x230 {IN-RECLAIM_FS-W} state was registered at: __lock_acquire+0x62a/0x17c0 lock_acquire+0xc5/0x220 down_write_nested+0x4f/0x90 xfs_ilock+0x141/0x230 xfs_reclaim_inode+0x12a/0x320 xfs_reclaim_inodes_ag+0x2c8/0x4e0 xfs_reclaim_inodes_nr+0x33/0x40 xfs_fs_free_cached_objects+0x19/0x20 super_cache_scan+0x191/0x1a0 shrink_slab+0x26f/0x5f0 shrink_node+0xf9/0x2f0 kswapd+0x356/0x920 kthread+0x10c/0x140 ret_from_fork+0x31/0x40 irq event stamp: 173777 hardirqs last enabled at (173777): __local_bh_enable_ip+0x70/0xc0 hardirqs last disabled at (173775): __local_bh_enable_ip+0x37/0xc0 softirqs last enabled at (173776): _xfs_buf_find+0x67a/0xb70 softirqs last disabled at (173774): _xfs_buf_find+0x5db/0xb70 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&xfs_nondir_ilock_class); lock(&xfs_nondir_ilock_class); *** DEADLOCK *** 4 locks held by fsstress/3365: #0: (sb_writers#10){++++++}, at: mnt_want_write+0x24/0x50 #1: (&sb->s_type->i_mutex_key#12){++++++}, at: vfs_setxattr+0x6f/0xb0 #2: (sb_internal#2){++++++}, at: xfs_trans_alloc+0xfc/0x140 #3: (&xfs_nondir_ilock_class){++++?.}, at: xfs_ilock+0x141/0x230 stack backtrace: CPU: 0 PID: 3365 Comm: fsstress Not tainted 4.10.0-nbor #134 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 Call Trace: kmem_cache_alloc_node_trace+0x3a/0x2c0 vm_map_ram+0x2a1/0x510 _xfs_buf_map_pages+0x77/0x140 xfs_buf_get_map+0x185/0x2a0 xfs_attr_rmtval_set+0x233/0x430 xfs_attr_leaf_addname+0x2d2/0x500 xfs_attr_set+0x214/0x420 xfs_xattr_set+0x59/0xb0 __vfs_setxattr+0x76/0xa0 __vfs_setxattr_noperm+0x5e/0xf0 vfs_setxattr+0xae/0xb0 setxattr+0x15e/0x1a0 path_setxattr+0x8f/0xc0 SyS_lsetxattr+0x11/0x20 entry_SYSCALL_64_fastpath+0x23/0xc6 Let's fix this by making lockdep explicitly do the shaving of respective GFP flags. Fixes: 934f3072c17c ("mm: clear __GFP_FS when PF_MEMALLOC_NOIO is set") Link: http://lkml.kernel.org/r/20170306131408.9828-2-mhocko@kernel.org Signed-off-by: Nikolay Borisov Signed-off-by: Michal Hocko Acked-by: Peter Zijlstra (Intel) Cc: Dave Chinner Cc: Theodore Ts'o Cc: Chris Mason Cc: David Sterba Cc: Jan Kara Cc: Brian Foster Cc: Darrick J. Wong Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/locking/lockdep.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'kernel/locking') diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 98dd6231d43b..106f4dcf6679 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -2876,6 +2877,8 @@ static void __lockdep_trace_alloc(gfp_t gfp_mask, unsigned long flags) if (unlikely(!debug_locks)) return; + gfp_mask = memalloc_noio_flags(gfp_mask); + /* no reclaim without waiting on it */ if (!(gfp_mask & __GFP_DIRECT_RECLAIM)) return; @@ -3947,7 +3950,7 @@ EXPORT_SYMBOL_GPL(lock_unpin_lock); void lockdep_set_current_reclaim_state(gfp_t gfp_mask) { - current->lockdep_reclaim_gfp = gfp_mask; + current->lockdep_reclaim_gfp = memalloc_noio_flags(gfp_mask); } void lockdep_clear_current_reclaim_state(void) -- cgit v1.2.3 From 7e7844226f1053236b6f6d5d122a06509fb14fd9 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 3 May 2017 14:53:09 -0700 Subject: lockdep: allow to disable reclaim lockup detection The current implementation of the reclaim lockup detection can lead to false positives and those even happen and usually lead to tweak the code to silence the lockdep by using GFP_NOFS even though the context can use __GFP_FS just fine. See http://lkml.kernel.org/r/20160512080321.GA18496@dastard as an example. ================================= [ INFO: inconsistent lock state ] 4.5.0-rc2+ #4 Tainted: G O --------------------------------- inconsistent {RECLAIM_FS-ON-R} -> {IN-RECLAIM_FS-W} usage. kswapd0/543 [HC0[0]:SC0[0]:HE1:SE1] takes: (&xfs_nondir_ilock_class){++++-+}, at: xfs_ilock+0x177/0x200 [xfs] {RECLAIM_FS-ON-R} state was registered at: mark_held_locks+0x79/0xa0 lockdep_trace_alloc+0xb3/0x100 kmem_cache_alloc+0x33/0x230 kmem_zone_alloc+0x81/0x120 [xfs] xfs_refcountbt_init_cursor+0x3e/0xa0 [xfs] __xfs_refcount_find_shared+0x75/0x580 [xfs] xfs_refcount_find_shared+0x84/0xb0 [xfs] xfs_getbmap+0x608/0x8c0 [xfs] xfs_vn_fiemap+0xab/0xc0 [xfs] do_vfs_ioctl+0x498/0x670 SyS_ioctl+0x79/0x90 entry_SYSCALL_64_fastpath+0x12/0x6f CPU0 ---- lock(&xfs_nondir_ilock_class); lock(&xfs_nondir_ilock_class); *** DEADLOCK *** 3 locks held by kswapd0/543: stack backtrace: CPU: 0 PID: 543 Comm: kswapd0 Tainted: G O 4.5.0-rc2+ #4 Call Trace: lock_acquire+0xd8/0x1e0 down_write_nested+0x5e/0xc0 xfs_ilock+0x177/0x200 [xfs] xfs_reflink_cancel_cow_range+0x150/0x300 [xfs] xfs_fs_evict_inode+0xdc/0x1e0 [xfs] evict+0xc5/0x190 dispose_list+0x39/0x60 prune_icache_sb+0x4b/0x60 super_cache_scan+0x14f/0x1a0 shrink_slab.part.63.constprop.79+0x1e9/0x4e0 shrink_zone+0x15e/0x170 kswapd+0x4f1/0xa80 kthread+0xf2/0x110 ret_from_fork+0x3f/0x70 To quote Dave: "Ignoring whether reflink should be doing anything or not, that's a "xfs_refcountbt_init_cursor() gets called both outside and inside transactions" lockdep false positive case. The problem here is lockdep has seen this allocation from within a transaction, hence a GFP_NOFS allocation, and now it's seeing it in a GFP_KERNEL context. Also note that we have an active reference to this inode. So, because the reclaim annotations overload the interrupt level detections and it's seen the inode ilock been taken in reclaim ("interrupt") context, this triggers a reclaim context warning where it thinks it is unsafe to do this allocation in GFP_KERNEL context holding the inode ilock..." This sounds like a fundamental problem of the reclaim lock detection. It is really impossible to annotate such a special usecase IMHO unless the reclaim lockup detection is reworked completely. Until then it is much better to provide a way to add "I know what I am doing flag" and mark problematic places. This would prevent from abusing GFP_NOFS flag which has a runtime effect even on configurations which have lockdep disabled. Introduce __GFP_NOLOCKDEP flag which tells the lockdep gfp tracking to skip the current allocation request. While we are at it also make sure that the radix tree doesn't accidentaly override tags stored in the upper part of the gfp_mask. Link: http://lkml.kernel.org/r/20170306131408.9828-3-mhocko@kernel.org Signed-off-by: Michal Hocko Suggested-by: Peter Zijlstra Acked-by: Peter Zijlstra (Intel) Acked-by: Vlastimil Babka Cc: Dave Chinner Cc: Theodore Ts'o Cc: Chris Mason Cc: David Sterba Cc: Jan Kara Cc: Brian Foster Cc: Darrick J. Wong Cc: Nikolay Borisov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 10 +++++++++- kernel/locking/lockdep.c | 4 ++++ lib/radix-tree.c | 2 ++ 3 files changed, 15 insertions(+), 1 deletion(-) (limited to 'kernel/locking') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index db373b9d3223..978232a3b4ae 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -40,6 +40,11 @@ struct vm_area_struct; #define ___GFP_DIRECT_RECLAIM 0x400000u #define ___GFP_WRITE 0x800000u #define ___GFP_KSWAPD_RECLAIM 0x1000000u +#ifdef CONFIG_LOCKDEP +#define ___GFP_NOLOCKDEP 0x4000000u +#else +#define ___GFP_NOLOCKDEP 0 +#endif /* If the above are modified, __GFP_BITS_SHIFT may need updating */ /* @@ -179,8 +184,11 @@ struct vm_area_struct; #define __GFP_NOTRACK ((__force gfp_t)___GFP_NOTRACK) #define __GFP_NOTRACK_FALSE_POSITIVE (__GFP_NOTRACK) +/* Disable lockdep for GFP context tracking */ +#define __GFP_NOLOCKDEP ((__force gfp_t)___GFP_NOLOCKDEP) + /* Room for N __GFP_FOO bits */ -#define __GFP_BITS_SHIFT 25 +#define __GFP_BITS_SHIFT (25 + IS_ENABLED(CONFIG_LOCKDEP)) #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) /* diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 106f4dcf6679..f84294c9a018 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -2897,6 +2897,10 @@ static void __lockdep_trace_alloc(gfp_t gfp_mask, unsigned long flags) if (DEBUG_LOCKS_WARN_ON(irqs_disabled_flags(flags))) return; + /* Disable lockdep if explicitly requested */ + if (gfp_mask & __GFP_NOLOCKDEP) + return; + mark_held_locks(curr, RECLAIM_FS); } diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 691a9ad48497..898e87998417 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -2284,6 +2284,8 @@ static int radix_tree_cpu_dead(unsigned int cpu) void __init radix_tree_init(void) { int ret; + + BUILD_BUG_ON(RADIX_TREE_MAX_TAGS + __GFP_BITS_SHIFT > 32); radix_tree_node_cachep = kmem_cache_create("radix_tree_node", sizeof(struct radix_tree_node), 0, SLAB_PANIC | SLAB_RECLAIM_ACCOUNT, -- cgit v1.2.3 From 7dea19f9ee636cb244109a4dba426bbb3e5304b7 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 3 May 2017 14:53:15 -0700 Subject: mm: introduce memalloc_nofs_{save,restore} API GFP_NOFS context is used for the following 5 reasons currently: - to prevent from deadlocks when the lock held by the allocation context would be needed during the memory reclaim - to prevent from stack overflows during the reclaim because the allocation is performed from a deep context already - to prevent lockups when the allocation context depends on other reclaimers to make a forward progress indirectly - just in case because this would be safe from the fs POV - silence lockdep false positives Unfortunately overuse of this allocation context brings some problems to the MM. Memory reclaim is much weaker (especially during heavy FS metadata workloads), OOM killer cannot be invoked because the MM layer doesn't have enough information about how much memory is freeable by the FS layer. In many cases it is far from clear why the weaker context is even used and so it might be used unnecessarily. We would like to get rid of those as much as possible. One way to do that is to use the flag in scopes rather than isolated cases. Such a scope is declared when really necessary, tracked per task and all the allocation requests from within the context will simply inherit the GFP_NOFS semantic. Not only this is easier to understand and maintain because there are much less problematic contexts than specific allocation requests, this also helps code paths where FS layer interacts with other layers (e.g. crypto, security modules, MM etc...) and there is no easy way to convey the allocation context between the layers. Introduce memalloc_nofs_{save,restore} API to control the scope of GFP_NOFS allocation context. This is basically copying memalloc_noio_{save,restore} API we have for other restricted allocation context GFP_NOIO. The PF_MEMALLOC_NOFS flag already exists and it is just an alias for PF_FSTRANS which has been xfs specific until recently. There are no more PF_FSTRANS users anymore so let's just drop it. PF_MEMALLOC_NOFS is now checked in the MM layer and drops __GFP_FS implicitly same as PF_MEMALLOC_NOIO drops __GFP_IO. memalloc_noio_flags is renamed to current_gfp_context because it now cares about both PF_MEMALLOC_NOFS and PF_MEMALLOC_NOIO contexts. Xfs code paths preserve their semantic. kmem_flags_convert() doesn't need to evaluate the flag anymore. This patch shouldn't introduce any functional changes. Let's hope that filesystems will drop direct GFP_NOFS (resp. ~__GFP_FS) usage as much as possible and only use a properly documented memalloc_nofs_{save,restore} checkpoints where they are appropriate. [akpm@linux-foundation.org: fix comment typo, reflow comment] Link: http://lkml.kernel.org/r/20170306131408.9828-5-mhocko@kernel.org Signed-off-by: Michal Hocko Acked-by: Vlastimil Babka Cc: Dave Chinner Cc: Theodore Ts'o Cc: Chris Mason Cc: David Sterba Cc: Jan Kara Cc: Brian Foster Cc: Darrick J. Wong Cc: Nikolay Borisov Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/xfs/kmem.h | 2 +- include/linux/gfp.h | 8 ++++++++ include/linux/sched.h | 8 +++----- include/linux/sched/mm.h | 26 +++++++++++++++++++++++--- kernel/locking/lockdep.c | 6 +++--- mm/page_alloc.c | 10 ++++++---- mm/vmscan.c | 6 +++--- 7 files changed, 47 insertions(+), 19 deletions(-) (limited to 'kernel/locking') diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h index a6c8da40c70d..d6ea520162b2 100644 --- a/fs/xfs/kmem.h +++ b/fs/xfs/kmem.h @@ -50,7 +50,7 @@ kmem_flags_convert(xfs_km_flags_t flags) lflags = GFP_ATOMIC | __GFP_NOWARN; } else { lflags = GFP_KERNEL | __GFP_NOWARN; - if ((current->flags & PF_MEMALLOC_NOFS) || (flags & KM_NOFS)) + if (flags & KM_NOFS) lflags &= ~__GFP_FS; } diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 978232a3b4ae..2bfcfd33e476 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -210,8 +210,16 @@ struct vm_area_struct; * * GFP_NOIO will use direct reclaim to discard clean pages or slab pages * that do not require the starting of any physical IO. + * Please try to avoid using this flag directly and instead use + * memalloc_noio_{save,restore} to mark the whole scope which cannot + * perform any IO with a short explanation why. All allocation requests + * will inherit GFP_NOIO implicitly. * * GFP_NOFS will use direct reclaim but will not use any filesystem interfaces. + * Please try to avoid using this flag directly and instead use + * memalloc_nofs_{save,restore} to mark the whole scope which cannot/shouldn't + * recurse into the FS layer with a short explanation why. All allocation + * requests will inherit GFP_NOFS implicitly. * * GFP_USER is for userspace allocations that also need to be directly * accessibly by the kernel or hardware. It is typically used by hardware diff --git a/include/linux/sched.h b/include/linux/sched.h index 8ac11465ac5b..993e7e25a3a5 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1224,9 +1224,9 @@ extern struct pid *cad_pid; #define PF_USED_ASYNC 0x00004000 /* Used async_schedule*(), used by module init */ #define PF_NOFREEZE 0x00008000 /* This thread should not be frozen */ #define PF_FROZEN 0x00010000 /* Frozen for system suspend */ -#define PF_FSTRANS 0x00020000 /* Inside a filesystem transaction */ -#define PF_KSWAPD 0x00040000 /* I am kswapd */ -#define PF_MEMALLOC_NOIO 0x00080000 /* Allocating memory without IO involved */ +#define PF_KSWAPD 0x00020000 /* I am kswapd */ +#define PF_MEMALLOC_NOFS 0x00040000 /* All allocation requests will inherit GFP_NOFS */ +#define PF_MEMALLOC_NOIO 0x00080000 /* All allocation requests will inherit GFP_NOIO */ #define PF_LESS_THROTTLE 0x00100000 /* Throttle me less: I clean memory */ #define PF_KTHREAD 0x00200000 /* I am a kernel thread */ #define PF_RANDOMIZE 0x00400000 /* Randomize virtual address space */ @@ -1237,8 +1237,6 @@ extern struct pid *cad_pid; #define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezable */ #define PF_SUSPEND_TASK 0x80000000 /* This thread called freeze_processes() and should not be frozen */ -#define PF_MEMALLOC_NOFS PF_FSTRANS /* Transition to a more generic GFP_NOFS scope semantic */ - /* * Only the _current_ task can read/write to tsk->flags, but other * tasks can access tsk->flags in readonly mode for example diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index 830953ebb391..9daabe138c99 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -149,13 +149,21 @@ static inline bool in_vfork(struct task_struct *tsk) return ret; } -/* __GFP_IO isn't allowed if PF_MEMALLOC_NOIO is set in current->flags - * __GFP_FS is also cleared as it implies __GFP_IO. +/* + * Applies per-task gfp context to the given allocation flags. + * PF_MEMALLOC_NOIO implies GFP_NOIO + * PF_MEMALLOC_NOFS implies GFP_NOFS */ -static inline gfp_t memalloc_noio_flags(gfp_t flags) +static inline gfp_t current_gfp_context(gfp_t flags) { + /* + * NOIO implies both NOIO and NOFS and it is a weaker context + * so always make sure it makes precendence + */ if (unlikely(current->flags & PF_MEMALLOC_NOIO)) flags &= ~(__GFP_IO | __GFP_FS); + else if (unlikely(current->flags & PF_MEMALLOC_NOFS)) + flags &= ~__GFP_FS; return flags; } @@ -171,4 +179,16 @@ static inline void memalloc_noio_restore(unsigned int flags) current->flags = (current->flags & ~PF_MEMALLOC_NOIO) | flags; } +static inline unsigned int memalloc_nofs_save(void) +{ + unsigned int flags = current->flags & PF_MEMALLOC_NOFS; + current->flags |= PF_MEMALLOC_NOFS; + return flags; +} + +static inline void memalloc_nofs_restore(unsigned int flags) +{ + current->flags = (current->flags & ~PF_MEMALLOC_NOFS) | flags; +} + #endif /* _LINUX_SCHED_MM_H */ diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index f84294c9a018..fd440b5a3c75 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -2877,7 +2877,7 @@ static void __lockdep_trace_alloc(gfp_t gfp_mask, unsigned long flags) if (unlikely(!debug_locks)) return; - gfp_mask = memalloc_noio_flags(gfp_mask); + gfp_mask = current_gfp_context(gfp_mask); /* no reclaim without waiting on it */ if (!(gfp_mask & __GFP_DIRECT_RECLAIM)) @@ -2888,7 +2888,7 @@ static void __lockdep_trace_alloc(gfp_t gfp_mask, unsigned long flags) return; /* We're only interested __GFP_FS allocations for now */ - if (!(gfp_mask & __GFP_FS)) + if (!(gfp_mask & __GFP_FS) || (curr->flags & PF_MEMALLOC_NOFS)) return; /* @@ -3954,7 +3954,7 @@ EXPORT_SYMBOL_GPL(lock_unpin_lock); void lockdep_set_current_reclaim_state(gfp_t gfp_mask) { - current->lockdep_reclaim_gfp = memalloc_noio_flags(gfp_mask); + current->lockdep_reclaim_gfp = current_gfp_context(gfp_mask); } void lockdep_clear_current_reclaim_state(void) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 34ac32428de8..7a3751e53f91 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3951,10 +3951,12 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, goto out; /* - * Runtime PM, block IO and its error handling path can deadlock - * because I/O on the device might not complete. + * Apply scoped allocation constraints. This is mainly about GFP_NOFS + * resp. GFP_NOIO which has to be inherited for all allocation requests + * from a particular context which has been marked by + * memalloc_no{fs,io}_{save,restore}. */ - alloc_mask = memalloc_noio_flags(gfp_mask); + alloc_mask = current_gfp_context(gfp_mask); ac.spread_dirty_pages = false; /* @@ -7408,7 +7410,7 @@ int alloc_contig_range(unsigned long start, unsigned long end, .zone = page_zone(pfn_to_page(start)), .mode = MIGRATE_SYNC, .ignore_skip_hint = true, - .gfp_mask = memalloc_noio_flags(gfp_mask), + .gfp_mask = current_gfp_context(gfp_mask), }; INIT_LIST_HEAD(&cc.migratepages); diff --git a/mm/vmscan.c b/mm/vmscan.c index ec4555369e17..3ad66580b8b4 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2915,7 +2915,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order, unsigned long nr_reclaimed; struct scan_control sc = { .nr_to_reclaim = SWAP_CLUSTER_MAX, - .gfp_mask = (gfp_mask = memalloc_noio_flags(gfp_mask)), + .gfp_mask = (gfp_mask = current_gfp_context(gfp_mask)), .reclaim_idx = gfp_zone(gfp_mask), .order = order, .nodemask = nodemask, @@ -2995,7 +2995,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, int nid; struct scan_control sc = { .nr_to_reclaim = max(nr_pages, SWAP_CLUSTER_MAX), - .gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | + .gfp_mask = (current_gfp_context(gfp_mask) & GFP_RECLAIM_MASK) | (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK), .reclaim_idx = MAX_NR_ZONES - 1, .target_mem_cgroup = memcg, @@ -3702,7 +3702,7 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in int classzone_idx = gfp_zone(gfp_mask); struct scan_control sc = { .nr_to_reclaim = max(nr_pages, SWAP_CLUSTER_MAX), - .gfp_mask = (gfp_mask = memalloc_noio_flags(gfp_mask)), + .gfp_mask = (gfp_mask = current_gfp_context(gfp_mask)), .order = order, .priority = NODE_RECLAIM_PRIORITY, .may_writepage = !!(node_reclaim_mode & RECLAIM_WRITE), -- cgit v1.2.3