summaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig2
-rw-r--r--mm/backing-dev.c7
-rw-r--r--mm/damon/core.c20
-rw-r--r--mm/damon/dbgfs.c15
-rw-r--r--mm/damon/vaddr-test.h79
-rw-r--r--mm/damon/vaddr.c2
-rw-r--r--mm/filemap.c2
-rw-r--r--mm/hugetlb.c2
-rw-r--r--mm/kfence/core.c1
-rw-r--r--mm/memcontrol.c106
-rw-r--r--mm/memory-failure.c14
-rw-r--r--mm/memory_hotplug.c1
-rw-r--r--mm/mempolicy.c3
-rw-r--r--mm/slub.c15
-rw-r--r--mm/swap_slots.c1
-rw-r--r--mm/vmscan.c65
16 files changed, 191 insertions, 144 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index 28edafc820ad..356f4f2c779e 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -428,7 +428,7 @@ config THP_SWAP
# UP and nommu archs use km based percpu allocator
#
config NEED_PER_CPU_KM
- depends on !SMP
+ depends on !SMP || !MMU
bool
default y
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 1eead4761011..eae96dfe0261 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -945,6 +945,13 @@ void bdi_unregister(struct backing_dev_info *bdi)
wb_shutdown(&bdi->wb);
cgwb_bdi_unregister(bdi);
+ /*
+ * If this BDI's min ratio has been set, use bdi_set_min_ratio() to
+ * update the global bdi_min_ratio.
+ */
+ if (bdi->min_ratio)
+ bdi_set_min_ratio(bdi, 0);
+
if (bdi->dev) {
bdi_debug_unregister(bdi);
device_unregister(bdi->dev);
diff --git a/mm/damon/core.c b/mm/damon/core.c
index c381b3c525d0..e92497895202 100644
--- a/mm/damon/core.c
+++ b/mm/damon/core.c
@@ -282,7 +282,6 @@ int damon_set_targets(struct damon_ctx *ctx,
for (i = 0; i < nr_ids; i++) {
t = damon_new_target(ids[i]);
if (!t) {
- pr_err("Failed to alloc damon_target\n");
/* The caller should do cleanup of the ids itself */
damon_for_each_target_safe(t, next, ctx)
damon_destroy_target(t);
@@ -312,16 +311,10 @@ int damon_set_attrs(struct damon_ctx *ctx, unsigned long sample_int,
unsigned long aggr_int, unsigned long primitive_upd_int,
unsigned long min_nr_reg, unsigned long max_nr_reg)
{
- if (min_nr_reg < 3) {
- pr_err("min_nr_regions (%lu) must be at least 3\n",
- min_nr_reg);
+ if (min_nr_reg < 3)
return -EINVAL;
- }
- if (min_nr_reg > max_nr_reg) {
- pr_err("invalid nr_regions. min (%lu) > max (%lu)\n",
- min_nr_reg, max_nr_reg);
+ if (min_nr_reg > max_nr_reg)
return -EINVAL;
- }
ctx->sample_interval = sample_int;
ctx->aggr_interval = aggr_int;
@@ -980,10 +973,11 @@ static unsigned long damos_wmark_wait_us(struct damos *scheme)
static void kdamond_usleep(unsigned long usecs)
{
- if (usecs > 100 * 1000)
- schedule_timeout_interruptible(usecs_to_jiffies(usecs));
+ /* See Documentation/timers/timers-howto.rst for the thresholds */
+ if (usecs > 20 * USEC_PER_MSEC)
+ schedule_timeout_idle(usecs_to_jiffies(usecs));
else
- usleep_range(usecs, usecs + 1);
+ usleep_idle_range(usecs, usecs + 1);
}
/* Returns negative error code if it's not activated but should return */
@@ -1038,7 +1032,7 @@ static int kdamond_fn(void *data)
ctx->callback.after_sampling(ctx))
done = true;
- usleep_range(ctx->sample_interval, ctx->sample_interval + 1);
+ kdamond_usleep(ctx->sample_interval);
if (ctx->primitive.check_accesses)
max_nr_accesses = ctx->primitive.check_accesses(ctx);
diff --git a/mm/damon/dbgfs.c b/mm/damon/dbgfs.c
index 9b520bb4a3e7..ad65436756af 100644
--- a/mm/damon/dbgfs.c
+++ b/mm/damon/dbgfs.c
@@ -210,10 +210,8 @@ static struct damos **str_to_schemes(const char *str, ssize_t len,
&wmarks.low, &parsed);
if (ret != 18)
break;
- if (!damos_action_valid(action)) {
- pr_err("wrong action %d\n", action);
+ if (!damos_action_valid(action))
goto fail;
- }
pos += parsed;
scheme = damon_new_scheme(min_sz, max_sz, min_nr_a, max_nr_a,
@@ -355,6 +353,7 @@ static ssize_t dbgfs_target_ids_write(struct file *file,
const char __user *buf, size_t count, loff_t *ppos)
{
struct damon_ctx *ctx = file->private_data;
+ struct damon_target *t, *next_t;
bool id_is_pid = true;
char *kbuf, *nrs;
unsigned long *targets;
@@ -399,8 +398,12 @@ static ssize_t dbgfs_target_ids_write(struct file *file,
goto unlock_out;
}
- /* remove targets with previously-set primitive */
- damon_set_targets(ctx, NULL, 0);
+ /* remove previously set targets */
+ damon_for_each_target_safe(t, next_t, ctx) {
+ if (targetid_is_pid(ctx))
+ put_pid((struct pid *)t->id);
+ damon_destroy_target(t);
+ }
/* Configure the context for the address space type */
if (id_is_pid)
@@ -652,10 +655,12 @@ static void dbgfs_before_terminate(struct damon_ctx *ctx)
if (!targetid_is_pid(ctx))
return;
+ mutex_lock(&ctx->kdamond_lock);
damon_for_each_target_safe(t, next, ctx) {
put_pid((struct pid *)t->id);
damon_destroy_target(t);
}
+ mutex_unlock(&ctx->kdamond_lock);
}
static struct damon_ctx *dbgfs_new_ctx(void)
diff --git a/mm/damon/vaddr-test.h b/mm/damon/vaddr-test.h
index ecfd0b2ed222..6a1b9272ea12 100644
--- a/mm/damon/vaddr-test.h
+++ b/mm/damon/vaddr-test.h
@@ -135,7 +135,6 @@ static void damon_do_test_apply_three_regions(struct kunit *test,
struct damon_addr_range *three_regions,
unsigned long *expected, int nr_expected)
{
- struct damon_ctx *ctx = damon_new_ctx();
struct damon_target *t;
struct damon_region *r;
int i;
@@ -145,7 +144,6 @@ static void damon_do_test_apply_three_regions(struct kunit *test,
r = damon_new_region(regions[i * 2], regions[i * 2 + 1]);
damon_add_region(r, t);
}
- damon_add_target(ctx, t);
damon_va_apply_three_regions(t, three_regions);
@@ -154,8 +152,6 @@ static void damon_do_test_apply_three_regions(struct kunit *test,
KUNIT_EXPECT_EQ(test, r->ar.start, expected[i * 2]);
KUNIT_EXPECT_EQ(test, r->ar.end, expected[i * 2 + 1]);
}
-
- damon_destroy_ctx(ctx);
}
/*
@@ -252,60 +248,59 @@ static void damon_test_apply_three_regions4(struct kunit *test)
new_three_regions, expected, ARRAY_SIZE(expected));
}
-static void damon_test_split_evenly(struct kunit *test)
+static void damon_test_split_evenly_fail(struct kunit *test,
+ unsigned long start, unsigned long end, unsigned int nr_pieces)
{
- struct damon_ctx *c = damon_new_ctx();
- struct damon_target *t;
- struct damon_region *r;
- unsigned long i;
-
- KUNIT_EXPECT_EQ(test, damon_va_evenly_split_region(NULL, NULL, 5),
- -EINVAL);
-
- t = damon_new_target(42);
- r = damon_new_region(0, 100);
- KUNIT_EXPECT_EQ(test, damon_va_evenly_split_region(t, r, 0), -EINVAL);
+ struct damon_target *t = damon_new_target(42);
+ struct damon_region *r = damon_new_region(start, end);
damon_add_region(r, t);
- KUNIT_EXPECT_EQ(test, damon_va_evenly_split_region(t, r, 10), 0);
- KUNIT_EXPECT_EQ(test, damon_nr_regions(t), 10u);
+ KUNIT_EXPECT_EQ(test,
+ damon_va_evenly_split_region(t, r, nr_pieces), -EINVAL);
+ KUNIT_EXPECT_EQ(test, damon_nr_regions(t), 1u);
- i = 0;
damon_for_each_region(r, t) {
- KUNIT_EXPECT_EQ(test, r->ar.start, i++ * 10);
- KUNIT_EXPECT_EQ(test, r->ar.end, i * 10);
+ KUNIT_EXPECT_EQ(test, r->ar.start, start);
+ KUNIT_EXPECT_EQ(test, r->ar.end, end);
}
+
damon_free_target(t);
+}
+
+static void damon_test_split_evenly_succ(struct kunit *test,
+ unsigned long start, unsigned long end, unsigned int nr_pieces)
+{
+ struct damon_target *t = damon_new_target(42);
+ struct damon_region *r = damon_new_region(start, end);
+ unsigned long expected_width = (end - start) / nr_pieces;
+ unsigned long i = 0;
- t = damon_new_target(42);
- r = damon_new_region(5, 59);
damon_add_region(r, t);
- KUNIT_EXPECT_EQ(test, damon_va_evenly_split_region(t, r, 5), 0);
- KUNIT_EXPECT_EQ(test, damon_nr_regions(t), 5u);
+ KUNIT_EXPECT_EQ(test,
+ damon_va_evenly_split_region(t, r, nr_pieces), 0);
+ KUNIT_EXPECT_EQ(test, damon_nr_regions(t), nr_pieces);
- i = 0;
damon_for_each_region(r, t) {
- if (i == 4)
+ if (i == nr_pieces - 1)
break;
- KUNIT_EXPECT_EQ(test, r->ar.start, 5 + 10 * i++);
- KUNIT_EXPECT_EQ(test, r->ar.end, 5 + 10 * i);
+ KUNIT_EXPECT_EQ(test,
+ r->ar.start, start + i++ * expected_width);
+ KUNIT_EXPECT_EQ(test, r->ar.end, start + i * expected_width);
}
- KUNIT_EXPECT_EQ(test, r->ar.start, 5 + 10 * i);
- KUNIT_EXPECT_EQ(test, r->ar.end, 59ul);
+ KUNIT_EXPECT_EQ(test, r->ar.start, start + i * expected_width);
+ KUNIT_EXPECT_EQ(test, r->ar.end, end);
damon_free_target(t);
+}
- t = damon_new_target(42);
- r = damon_new_region(5, 6);
- damon_add_region(r, t);
- KUNIT_EXPECT_EQ(test, damon_va_evenly_split_region(t, r, 2), -EINVAL);
- KUNIT_EXPECT_EQ(test, damon_nr_regions(t), 1u);
+static void damon_test_split_evenly(struct kunit *test)
+{
+ KUNIT_EXPECT_EQ(test, damon_va_evenly_split_region(NULL, NULL, 5),
+ -EINVAL);
- damon_for_each_region(r, t) {
- KUNIT_EXPECT_EQ(test, r->ar.start, 5ul);
- KUNIT_EXPECT_EQ(test, r->ar.end, 6ul);
- }
- damon_free_target(t);
- damon_destroy_ctx(c);
+ damon_test_split_evenly_fail(test, 0, 100, 0);
+ damon_test_split_evenly_succ(test, 0, 100, 10);
+ damon_test_split_evenly_succ(test, 5, 59, 5);
+ damon_test_split_evenly_fail(test, 5, 6, 2);
}
static struct kunit_case damon_test_cases[] = {
diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c
index 35fe49080ee9..20a9a9d69eb1 100644
--- a/mm/damon/vaddr.c
+++ b/mm/damon/vaddr.c
@@ -13,6 +13,7 @@
#include <linux/mmu_notifier.h>
#include <linux/page_idle.h>
#include <linux/pagewalk.h>
+#include <linux/sched/mm.h>
#include "prmtv-common.h"
@@ -626,7 +627,6 @@ int damon_va_apply_scheme(struct damon_ctx *ctx, struct damon_target *t,
case DAMOS_STAT:
return 0;
default:
- pr_warn("Wrong action %d\n", scheme->action);
return -EINVAL;
}
diff --git a/mm/filemap.c b/mm/filemap.c
index daa0e23a6ee6..39c4c46c6133 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -3253,8 +3253,6 @@ static struct page *next_uptodate_page(struct page *page,
goto skip;
if (!PageUptodate(page) || PageReadahead(page))
goto skip;
- if (PageHWPoison(page))
- goto skip;
if (!trylock_page(page))
goto skip;
if (page->mapping != mapping)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index abcd1785c629..a1baa198519a 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2973,7 +2973,7 @@ int __alloc_bootmem_huge_page(struct hstate *h, int nid)
struct huge_bootmem_page *m = NULL; /* initialize for clang */
int nr_nodes, node;
- if (nid >= nr_online_nodes)
+ if (nid != NUMA_NO_NODE && nid >= nr_online_nodes)
return 0;
/* do node specific alloc */
if (nid != NUMA_NO_NODE) {
diff --git a/mm/kfence/core.c b/mm/kfence/core.c
index 09945784df9e..a19154a8d196 100644
--- a/mm/kfence/core.c
+++ b/mm/kfence/core.c
@@ -683,6 +683,7 @@ static const struct file_operations objects_fops = {
.open = open_objects,
.read = seq_read,
.llseek = seq_lseek,
+ .release = seq_release,
};
static int __init kfence_debugfs_init(void)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 6863a834ed42..2ed5f2a0879d 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -776,24 +776,6 @@ void __mod_lruvec_kmem_state(void *p, enum node_stat_item idx, int val)
rcu_read_unlock();
}
-/*
- * mod_objcg_mlstate() may be called with irq enabled, so
- * mod_memcg_lruvec_state() should be used.
- */
-static inline void mod_objcg_mlstate(struct obj_cgroup *objcg,
- struct pglist_data *pgdat,
- enum node_stat_item idx, int nr)
-{
- struct mem_cgroup *memcg;
- struct lruvec *lruvec;
-
- rcu_read_lock();
- memcg = obj_cgroup_memcg(objcg);
- lruvec = mem_cgroup_lruvec(memcg, pgdat);
- mod_memcg_lruvec_state(lruvec, idx, nr);
- rcu_read_unlock();
-}
-
/**
* __count_memcg_events - account VM events in a cgroup
* @memcg: the memory cgroup
@@ -2137,41 +2119,6 @@ static bool obj_stock_flush_required(struct memcg_stock_pcp *stock,
}
#endif
-/*
- * Most kmem_cache_alloc() calls are from user context. The irq disable/enable
- * sequence used in this case to access content from object stock is slow.
- * To optimize for user context access, there are now two object stocks for
- * task context and interrupt context access respectively.
- *
- * The task context object stock can be accessed by disabling preemption only
- * which is cheap in non-preempt kernel. The interrupt context object stock
- * can only be accessed after disabling interrupt. User context code can
- * access interrupt object stock, but not vice versa.
- */
-static inline struct obj_stock *get_obj_stock(unsigned long *pflags)
-{
- struct memcg_stock_pcp *stock;
-
- if (likely(in_task())) {
- *pflags = 0UL;
- preempt_disable();
- stock = this_cpu_ptr(&memcg_stock);
- return &stock->task_obj;
- }
-
- local_irq_save(*pflags);
- stock = this_cpu_ptr(&memcg_stock);
- return &stock->irq_obj;
-}
-
-static inline void put_obj_stock(unsigned long flags)
-{
- if (likely(in_task()))
- preempt_enable();
- else
- local_irq_restore(flags);
-}
-
/**
* consume_stock: Try to consume stocked charge on this cpu.
* @memcg: memcg to consume from.
@@ -2816,6 +2763,59 @@ retry:
*/
#define OBJCGS_CLEAR_MASK (__GFP_DMA | __GFP_RECLAIMABLE | __GFP_ACCOUNT)
+/*
+ * Most kmem_cache_alloc() calls are from user context. The irq disable/enable
+ * sequence used in this case to access content from object stock is slow.
+ * To optimize for user context access, there are now two object stocks for
+ * task context and interrupt context access respectively.
+ *
+ * The task context object stock can be accessed by disabling preemption only
+ * which is cheap in non-preempt kernel. The interrupt context object stock
+ * can only be accessed after disabling interrupt. User context code can
+ * access interrupt object stock, but not vice versa.
+ */
+static inline struct obj_stock *get_obj_stock(unsigned long *pflags)
+{
+ struct memcg_stock_pcp *stock;
+
+ if (likely(in_task())) {
+ *pflags = 0UL;
+ preempt_disable();
+ stock = this_cpu_ptr(&memcg_stock);
+ return &stock->task_obj;
+ }
+
+ local_irq_save(*pflags);
+ stock = this_cpu_ptr(&memcg_stock);
+ return &stock->irq_obj;
+}
+
+static inline void put_obj_stock(unsigned long flags)
+{
+ if (likely(in_task()))
+ preempt_enable();
+ else
+ local_irq_restore(flags);
+}
+
+/*
+ * mod_objcg_mlstate() may be called with irq enabled, so
+ * mod_memcg_lruvec_state() should be used.
+ */
+static inline void mod_objcg_mlstate(struct obj_cgroup *objcg,
+ struct pglist_data *pgdat,
+ enum node_stat_item idx, int nr)
+{
+ struct mem_cgroup *memcg;
+ struct lruvec *lruvec;
+
+ rcu_read_lock();
+ memcg = obj_cgroup_memcg(objcg);
+ lruvec = mem_cgroup_lruvec(memcg, pgdat);
+ mod_memcg_lruvec_state(lruvec, idx, nr);
+ rcu_read_unlock();
+}
+
int memcg_alloc_page_obj_cgroups(struct page *page, struct kmem_cache *s,
gfp_t gfp, bool new_page)
{
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 07c875fdeaf0..3a274468f193 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1470,17 +1470,12 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags)
if (!(flags & MF_COUNT_INCREASED)) {
res = get_hwpoison_page(p, flags);
if (!res) {
- /*
- * Check "filter hit" and "race with other subpage."
- */
lock_page(head);
- if (PageHWPoison(head)) {
- if ((hwpoison_filter(p) && TestClearPageHWPoison(p))
- || (p != head && TestSetPageHWPoison(head))) {
+ if (hwpoison_filter(p)) {
+ if (TestClearPageHWPoison(head))
num_poisoned_pages_dec();
- unlock_page(head);
- return 0;
- }
+ unlock_page(head);
+ return 0;
}
unlock_page(head);
res = MF_FAILED;
@@ -2239,6 +2234,7 @@ retry:
} else if (ret == 0) {
if (soft_offline_free_page(page) && try_again) {
try_again = false;
+ flags &= ~MF_COUNT_INCREASED;
goto retry;
}
}
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 852041f6be41..2a9627dc784c 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -35,6 +35,7 @@
#include <linux/memblock.h>
#include <linux/compaction.h>
#include <linux/rmap.h>
+#include <linux/module.h>
#include <asm/tlbflush.h>
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 10e9c87260ed..f6248affaf38 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2140,8 +2140,7 @@ struct page *alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
* memory with both reclaim and compact as well.
*/
if (!page && (gfp & __GFP_DIRECT_RECLAIM))
- page = __alloc_pages_node(hpage_node,
- gfp, order);
+ page = __alloc_pages(gfp, order, hpage_node, nmask);
goto out;
}
diff --git a/mm/slub.c b/mm/slub.c
index a8626825a829..abe7db581d68 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -5081,6 +5081,7 @@ struct loc_track {
unsigned long max;
unsigned long count;
struct location *loc;
+ loff_t idx;
};
static struct dentry *slab_debugfs_root;
@@ -6052,11 +6053,11 @@ __initcall(slab_sysfs_init);
#if defined(CONFIG_SLUB_DEBUG) && defined(CONFIG_DEBUG_FS)
static int slab_debugfs_show(struct seq_file *seq, void *v)
{
-
- struct location *l;
- unsigned int idx = *(unsigned int *)v;
struct loc_track *t = seq->private;
+ struct location *l;
+ unsigned long idx;
+ idx = (unsigned long) t->idx;
if (idx < t->count) {
l = &t->loc[idx];
@@ -6105,16 +6106,18 @@ static void *slab_debugfs_next(struct seq_file *seq, void *v, loff_t *ppos)
{
struct loc_track *t = seq->private;
- v = ppos;
- ++*ppos;
+ t->idx = ++(*ppos);
if (*ppos <= t->count)
- return v;
+ return ppos;
return NULL;
}
static void *slab_debugfs_start(struct seq_file *seq, loff_t *ppos)
{
+ struct loc_track *t = seq->private;
+
+ t->idx = *ppos;
return ppos;
}
diff --git a/mm/swap_slots.c b/mm/swap_slots.c
index 16f706c55d92..2b5531840583 100644
--- a/mm/swap_slots.c
+++ b/mm/swap_slots.c
@@ -30,6 +30,7 @@
#include <linux/swap_slots.h>
#include <linux/cpu.h>
#include <linux/cpumask.h>
+#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/mutex.h>
#include <linux/mm.h>
diff --git a/mm/vmscan.c b/mm/vmscan.c
index fb9584641ac7..700434db5735 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1021,6 +1021,39 @@ static void handle_write_error(struct address_space *mapping,
unlock_page(page);
}
+static bool skip_throttle_noprogress(pg_data_t *pgdat)
+{
+ int reclaimable = 0, write_pending = 0;
+ int i;
+
+ /*
+ * If kswapd is disabled, reschedule if necessary but do not
+ * throttle as the system is likely near OOM.
+ */
+ if (pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES)
+ return true;
+
+ /*
+ * If there are a lot of dirty/writeback pages then do not
+ * throttle as throttling will occur when the pages cycle
+ * towards the end of the LRU if still under writeback.
+ */
+ for (i = 0; i < MAX_NR_ZONES; i++) {
+ struct zone *zone = pgdat->node_zones + i;
+
+ if (!populated_zone(zone))
+ continue;
+
+ reclaimable += zone_reclaimable_pages(zone);
+ write_pending += zone_page_state_snapshot(zone,
+ NR_ZONE_WRITE_PENDING);
+ }
+ if (2 * write_pending <= reclaimable)
+ return true;
+
+ return false;
+}
+
void reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason)
{
wait_queue_head_t *wqh = &pgdat->reclaim_wait[reason];
@@ -1056,8 +1089,16 @@ void reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason)
}
break;
+ case VMSCAN_THROTTLE_CONGESTED:
+ fallthrough;
case VMSCAN_THROTTLE_NOPROGRESS:
- timeout = HZ/2;
+ if (skip_throttle_noprogress(pgdat)) {
+ cond_resched();
+ return;
+ }
+
+ timeout = 1;
+
break;
case VMSCAN_THROTTLE_ISOLATED:
timeout = HZ/50;
@@ -3321,7 +3362,7 @@ again:
if (!current_is_kswapd() && current_may_throttle() &&
!sc->hibernation_mode &&
test_bit(LRUVEC_CONGESTED, &target_lruvec->flags))
- reclaim_throttle(pgdat, VMSCAN_THROTTLE_WRITEBACK);
+ reclaim_throttle(pgdat, VMSCAN_THROTTLE_CONGESTED);
if (should_continue_reclaim(pgdat, sc->nr_reclaimed - nr_reclaimed,
sc))
@@ -3386,16 +3427,16 @@ static void consider_reclaim_throttle(pg_data_t *pgdat, struct scan_control *sc)
}
/*
- * Do not throttle kswapd on NOPROGRESS as it will throttle on
- * VMSCAN_THROTTLE_WRITEBACK if there are too many pages under
- * writeback and marked for immediate reclaim at the tail of
- * the LRU.
+ * Do not throttle kswapd or cgroup reclaim on NOPROGRESS as it will
+ * throttle on VMSCAN_THROTTLE_WRITEBACK if there are too many pages
+ * under writeback and marked for immediate reclaim at the tail of the
+ * LRU.
*/
- if (current_is_kswapd())
+ if (current_is_kswapd() || cgroup_reclaim(sc))
return;
/* Throttle if making no progress at high prioities. */
- if (sc->priority < DEF_PRIORITY - 2)
+ if (sc->priority == 1 && !sc->nr_reclaimed)
reclaim_throttle(pgdat, VMSCAN_THROTTLE_NOPROGRESS);
}
@@ -3415,6 +3456,7 @@ static void shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
unsigned long nr_soft_scanned;
gfp_t orig_mask;
pg_data_t *last_pgdat = NULL;
+ pg_data_t *first_pgdat = NULL;
/*
* If the number of buffer_heads in the machine exceeds the maximum
@@ -3478,14 +3520,19 @@ static void shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
/* need some check for avoid more shrink_zone() */
}
+ if (!first_pgdat)
+ first_pgdat = zone->zone_pgdat;
+
/* See comment about same check for global reclaim above */
if (zone->zone_pgdat == last_pgdat)
continue;
last_pgdat = zone->zone_pgdat;
shrink_node(zone->zone_pgdat, sc);
- consider_reclaim_throttle(zone->zone_pgdat, sc);
}
+ if (first_pgdat)
+ consider_reclaim_throttle(first_pgdat, sc);
+
/*
* Restore to original mask to avoid the impact on the caller if we
* promoted it to __GFP_HIGHMEM.