diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/filemap.c | 9 | ||||
-rw-r--r-- | mm/memcontrol.c | 364 | ||||
-rw-r--r-- | mm/migrate.c | 21 | ||||
-rw-r--r-- | mm/pdflush.c | 4 | ||||
-rw-r--r-- | mm/rmap.c | 14 | ||||
-rw-r--r-- | mm/shmem.c | 44 | ||||
-rw-r--r-- | mm/vmscan.c | 5 |
7 files changed, 272 insertions, 189 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index 7675b91f4f63..2d3ec1ffc66e 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -115,7 +115,7 @@ void __remove_from_page_cache(struct page *page) { struct address_space *mapping = page->mapping; - mem_cgroup_uncharge_page(page); + mem_cgroup_uncharge_cache_page(page); radix_tree_delete(&mapping->page_tree, page->index); page->mapping = NULL; mapping->nrpages--; @@ -474,12 +474,12 @@ int add_to_page_cache(struct page *page, struct address_space *mapping, mapping->nrpages++; __inc_zone_page_state(page, NR_FILE_PAGES); } else - mem_cgroup_uncharge_page(page); + mem_cgroup_uncharge_cache_page(page); write_unlock_irq(&mapping->tree_lock); radix_tree_preload_end(); } else - mem_cgroup_uncharge_page(page); + mem_cgroup_uncharge_cache_page(page); out: return error; } @@ -2563,9 +2563,8 @@ EXPORT_SYMBOL(generic_file_aio_write); * Otherwise return zero. * * The @gfp_mask argument specifies whether I/O may be performed to release - * this page (__GFP_IO), and whether the call may block (__GFP_WAIT). + * this page (__GFP_IO), and whether the call may block (__GFP_WAIT & __GFP_FS). * - * NOTE: @gfp_mask may go away, and this function may become non-blocking. */ int try_to_release_page(struct page *page, gfp_t gfp_mask) { diff --git a/mm/memcontrol.c b/mm/memcontrol.c index e46451e1d9b7..fba566c51322 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -35,9 +35,9 @@ #include <asm/uaccess.h> -struct cgroup_subsys mem_cgroup_subsys; -static const int MEM_CGROUP_RECLAIM_RETRIES = 5; -static struct kmem_cache *page_cgroup_cache; +struct cgroup_subsys mem_cgroup_subsys __read_mostly; +static struct kmem_cache *page_cgroup_cache __read_mostly; +#define MEM_CGROUP_RECLAIM_RETRIES 5 /* * Statistics for memory cgroup. @@ -166,7 +166,6 @@ struct page_cgroup { struct list_head lru; /* per cgroup LRU list */ struct page *page; struct mem_cgroup *mem_cgroup; - int ref_cnt; /* cached, mapped, migrating */ int flags; }; #define PAGE_CGROUP_FLAG_CACHE (0x1) /* charged as cache */ @@ -185,6 +184,7 @@ static enum zone_type page_cgroup_zid(struct page_cgroup *pc) enum charge_type { MEM_CGROUP_CHARGE_TYPE_CACHE = 0, MEM_CGROUP_CHARGE_TYPE_MAPPED, + MEM_CGROUP_CHARGE_TYPE_FORCE, /* used by force_empty */ }; /* @@ -296,7 +296,7 @@ static void __mem_cgroup_remove_list(struct mem_cgroup_per_zone *mz, MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE) -= 1; mem_cgroup_charge_statistics(pc->mem_cgroup, pc->flags, false); - list_del_init(&pc->lru); + list_del(&pc->lru); } static void __mem_cgroup_add_list(struct mem_cgroup_per_zone *mz, @@ -354,6 +354,9 @@ void mem_cgroup_move_lists(struct page *page, bool active) struct mem_cgroup_per_zone *mz; unsigned long flags; + if (mem_cgroup_subsys.disabled) + return; + /* * We cannot lock_page_cgroup while holding zone's lru_lock, * because other holders of lock_page_cgroup can be interrupted @@ -524,7 +527,8 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, * < 0 if the cgroup is over its limit */ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, - gfp_t gfp_mask, enum charge_type ctype) + gfp_t gfp_mask, enum charge_type ctype, + struct mem_cgroup *memcg) { struct mem_cgroup *mem; struct page_cgroup *pc; @@ -532,35 +536,8 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, unsigned long nr_retries = MEM_CGROUP_RECLAIM_RETRIES; struct mem_cgroup_per_zone *mz; - if (mem_cgroup_subsys.disabled) - return 0; - - /* - * Should page_cgroup's go to their own slab? - * One could optimize the performance of the charging routine - * by saving a bit in the page_flags and using it as a lock - * to see if the cgroup page already has a page_cgroup associated - * with it - */ -retry: - lock_page_cgroup(page); - pc = page_get_page_cgroup(page); - /* - * The page_cgroup exists and - * the page has already been accounted. - */ - if (pc) { - VM_BUG_ON(pc->page != page); - VM_BUG_ON(pc->ref_cnt <= 0); - - pc->ref_cnt++; - unlock_page_cgroup(page); - goto done; - } - unlock_page_cgroup(page); - - pc = kmem_cache_zalloc(page_cgroup_cache, gfp_mask); - if (pc == NULL) + pc = kmem_cache_alloc(page_cgroup_cache, gfp_mask); + if (unlikely(pc == NULL)) goto err; /* @@ -569,16 +546,18 @@ retry: * thread group leader migrates. It's possible that mm is not * set, if so charge the init_mm (happens for pagecache usage). */ - if (!mm) - mm = &init_mm; - - rcu_read_lock(); - mem = mem_cgroup_from_task(rcu_dereference(mm->owner)); - /* - * For every charge from the cgroup, increment reference count - */ - css_get(&mem->css); - rcu_read_unlock(); + if (likely(!memcg)) { + rcu_read_lock(); + mem = mem_cgroup_from_task(rcu_dereference(mm->owner)); + /* + * For every charge from the cgroup, increment reference count + */ + css_get(&mem->css); + rcu_read_unlock(); + } else { + mem = memcg; + css_get(&memcg->css); + } while (res_counter_charge(&mem->res, PAGE_SIZE)) { if (!(gfp_mask & __GFP_WAIT)) @@ -603,25 +582,24 @@ retry: } } - pc->ref_cnt = 1; pc->mem_cgroup = mem; pc->page = page; - pc->flags = PAGE_CGROUP_FLAG_ACTIVE; + /* + * If a page is accounted as a page cache, insert to inactive list. + * If anon, insert to active list. + */ if (ctype == MEM_CGROUP_CHARGE_TYPE_CACHE) pc->flags = PAGE_CGROUP_FLAG_CACHE; + else + pc->flags = PAGE_CGROUP_FLAG_ACTIVE; lock_page_cgroup(page); - if (page_get_page_cgroup(page)) { + if (unlikely(page_get_page_cgroup(page))) { unlock_page_cgroup(page); - /* - * Another charge has been added to this page already. - * We take lock_page_cgroup(page) again and read - * page->cgroup, increment refcnt.... just retry is OK. - */ res_counter_uncharge(&mem->res, PAGE_SIZE); css_put(&mem->css); kmem_cache_free(page_cgroup_cache, pc); - goto retry; + goto done; } page_assign_page_cgroup(page, pc); @@ -642,24 +620,65 @@ err: int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) { + if (mem_cgroup_subsys.disabled) + return 0; + + /* + * If already mapped, we don't have to account. + * If page cache, page->mapping has address_space. + * But page->mapping may have out-of-use anon_vma pointer, + * detecit it by PageAnon() check. newly-mapped-anon's page->mapping + * is NULL. + */ + if (page_mapped(page) || (page->mapping && !PageAnon(page))) + return 0; + if (unlikely(!mm)) + mm = &init_mm; return mem_cgroup_charge_common(page, mm, gfp_mask, - MEM_CGROUP_CHARGE_TYPE_MAPPED); + MEM_CGROUP_CHARGE_TYPE_MAPPED, NULL); } int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) { - if (!mm) + if (mem_cgroup_subsys.disabled) + return 0; + + /* + * Corner case handling. This is called from add_to_page_cache() + * in usual. But some FS (shmem) precharges this page before calling it + * and call add_to_page_cache() with GFP_NOWAIT. + * + * For GFP_NOWAIT case, the page may be pre-charged before calling + * add_to_page_cache(). (See shmem.c) check it here and avoid to call + * charge twice. (It works but has to pay a bit larger cost.) + */ + if (!(gfp_mask & __GFP_WAIT)) { + struct page_cgroup *pc; + + lock_page_cgroup(page); + pc = page_get_page_cgroup(page); + if (pc) { + VM_BUG_ON(pc->page != page); + VM_BUG_ON(!pc->mem_cgroup); + unlock_page_cgroup(page); + return 0; + } + unlock_page_cgroup(page); + } + + if (unlikely(!mm)) mm = &init_mm; + return mem_cgroup_charge_common(page, mm, gfp_mask, - MEM_CGROUP_CHARGE_TYPE_CACHE); + MEM_CGROUP_CHARGE_TYPE_CACHE, NULL); } /* - * Uncharging is always a welcome operation, we never complain, simply - * uncharge. + * uncharge if !page_mapped(page) */ -void mem_cgroup_uncharge_page(struct page *page) +static void +__mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) { struct page_cgroup *pc; struct mem_cgroup *mem; @@ -674,98 +693,151 @@ void mem_cgroup_uncharge_page(struct page *page) */ lock_page_cgroup(page); pc = page_get_page_cgroup(page); - if (!pc) + if (unlikely(!pc)) goto unlock; VM_BUG_ON(pc->page != page); - VM_BUG_ON(pc->ref_cnt <= 0); - if (--(pc->ref_cnt) == 0) { - mz = page_cgroup_zoneinfo(pc); - spin_lock_irqsave(&mz->lru_lock, flags); - __mem_cgroup_remove_list(mz, pc); - spin_unlock_irqrestore(&mz->lru_lock, flags); + if ((ctype == MEM_CGROUP_CHARGE_TYPE_MAPPED) + && ((pc->flags & PAGE_CGROUP_FLAG_CACHE) + || page_mapped(page))) + goto unlock; - page_assign_page_cgroup(page, NULL); - unlock_page_cgroup(page); + mz = page_cgroup_zoneinfo(pc); + spin_lock_irqsave(&mz->lru_lock, flags); + __mem_cgroup_remove_list(mz, pc); + spin_unlock_irqrestore(&mz->lru_lock, flags); - mem = pc->mem_cgroup; - res_counter_uncharge(&mem->res, PAGE_SIZE); - css_put(&mem->css); + page_assign_page_cgroup(page, NULL); + unlock_page_cgroup(page); - kmem_cache_free(page_cgroup_cache, pc); - return; - } + mem = pc->mem_cgroup; + res_counter_uncharge(&mem->res, PAGE_SIZE); + css_put(&mem->css); + kmem_cache_free(page_cgroup_cache, pc); + return; unlock: unlock_page_cgroup(page); } +void mem_cgroup_uncharge_page(struct page *page) +{ + __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_MAPPED); +} + +void mem_cgroup_uncharge_cache_page(struct page *page) +{ + VM_BUG_ON(page_mapped(page)); + __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE); +} + /* - * Returns non-zero if a page (under migration) has valid page_cgroup member. - * Refcnt of page_cgroup is incremented. + * Before starting migration, account against new page. */ -int mem_cgroup_prepare_migration(struct page *page) +int mem_cgroup_prepare_migration(struct page *page, struct page *newpage) { struct page_cgroup *pc; + struct mem_cgroup *mem = NULL; + enum charge_type ctype = MEM_CGROUP_CHARGE_TYPE_MAPPED; + int ret = 0; if (mem_cgroup_subsys.disabled) return 0; lock_page_cgroup(page); pc = page_get_page_cgroup(page); - if (pc) - pc->ref_cnt++; + if (pc) { + mem = pc->mem_cgroup; + css_get(&mem->css); + if (pc->flags & PAGE_CGROUP_FLAG_CACHE) + ctype = MEM_CGROUP_CHARGE_TYPE_CACHE; + } unlock_page_cgroup(page); - return pc != NULL; + if (mem) { + ret = mem_cgroup_charge_common(newpage, NULL, GFP_KERNEL, + ctype, mem); + css_put(&mem->css); + } + return ret; } -void mem_cgroup_end_migration(struct page *page) +/* remove redundant charge if migration failed*/ +void mem_cgroup_end_migration(struct page *newpage) { - mem_cgroup_uncharge_page(page); + /* + * At success, page->mapping is not NULL. + * special rollback care is necessary when + * 1. at migration failure. (newpage->mapping is cleared in this case) + * 2. the newpage was moved but not remapped again because the task + * exits and the newpage is obsolete. In this case, the new page + * may be a swapcache. So, we just call mem_cgroup_uncharge_page() + * always for avoiding mess. The page_cgroup will be removed if + * unnecessary. File cache pages is still on radix-tree. Don't + * care it. + */ + if (!newpage->mapping) + __mem_cgroup_uncharge_common(newpage, + MEM_CGROUP_CHARGE_TYPE_FORCE); + else if (PageAnon(newpage)) + mem_cgroup_uncharge_page(newpage); } /* - * We know both *page* and *newpage* are now not-on-LRU and PG_locked. - * And no race with uncharge() routines because page_cgroup for *page* - * has extra one reference by mem_cgroup_prepare_migration. + * A call to try to shrink memory usage under specified resource controller. + * This is typically used for page reclaiming for shmem for reducing side + * effect of page allocation from shmem, which is used by some mem_cgroup. */ -void mem_cgroup_page_migration(struct page *page, struct page *newpage) +int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask) { - struct page_cgroup *pc; - struct mem_cgroup_per_zone *mz; - unsigned long flags; + struct mem_cgroup *mem; + int progress = 0; + int retry = MEM_CGROUP_RECLAIM_RETRIES; - lock_page_cgroup(page); - pc = page_get_page_cgroup(page); - if (!pc) { - unlock_page_cgroup(page); - return; - } + if (mem_cgroup_subsys.disabled) + return 0; - mz = page_cgroup_zoneinfo(pc); - spin_lock_irqsave(&mz->lru_lock, flags); - __mem_cgroup_remove_list(mz, pc); - spin_unlock_irqrestore(&mz->lru_lock, flags); + rcu_read_lock(); + mem = mem_cgroup_from_task(rcu_dereference(mm->owner)); + css_get(&mem->css); + rcu_read_unlock(); - page_assign_page_cgroup(page, NULL); - unlock_page_cgroup(page); + do { + progress = try_to_free_mem_cgroup_pages(mem, gfp_mask); + } while (!progress && --retry); - pc->page = newpage; - lock_page_cgroup(newpage); - page_assign_page_cgroup(newpage, pc); + css_put(&mem->css); + if (!retry) + return -ENOMEM; + return 0; +} - mz = page_cgroup_zoneinfo(pc); - spin_lock_irqsave(&mz->lru_lock, flags); - __mem_cgroup_add_list(mz, pc); - spin_unlock_irqrestore(&mz->lru_lock, flags); +int mem_cgroup_resize_limit(struct mem_cgroup *memcg, unsigned long long val) +{ + + int retry_count = MEM_CGROUP_RECLAIM_RETRIES; + int progress; + int ret = 0; - unlock_page_cgroup(newpage); + while (res_counter_set_limit(&memcg->res, val)) { + if (signal_pending(current)) { + ret = -EINTR; + break; + } + if (!retry_count) { + ret = -EBUSY; + break; + } + progress = try_to_free_mem_cgroup_pages(memcg, GFP_KERNEL); + if (!progress) + retry_count--; + } + return ret; } + /* * This routine traverse page_cgroup in given list and drop them all. - * This routine ignores page_cgroup->ref_cnt. * *And* this routine doesn't reclaim page itself, just removes page_cgroup. */ #define FORCE_UNCHARGE_BATCH (128) @@ -790,12 +862,20 @@ static void mem_cgroup_force_empty_list(struct mem_cgroup *mem, page = pc->page; get_page(page); spin_unlock_irqrestore(&mz->lru_lock, flags); - mem_cgroup_uncharge_page(page); - put_page(page); - if (--count <= 0) { - count = FORCE_UNCHARGE_BATCH; + /* + * Check if this page is on LRU. !LRU page can be found + * if it's under page migration. + */ + if (PageLRU(page)) { + __mem_cgroup_uncharge_common(page, + MEM_CGROUP_CHARGE_TYPE_FORCE); + put_page(page); + if (--count <= 0) { + count = FORCE_UNCHARGE_BATCH; + cond_resched(); + } + } else cond_resched(); - } spin_lock_irqsave(&mz->lru_lock, flags); } spin_unlock_irqrestore(&mz->lru_lock, flags); @@ -810,9 +890,6 @@ static int mem_cgroup_force_empty(struct mem_cgroup *mem) int ret = -EBUSY; int node, zid; - if (mem_cgroup_subsys.disabled) - return 0; - css_get(&mem->css); /* * page reclaim code (kswapd etc..) will move pages between @@ -838,32 +915,34 @@ out: return ret; } -static int mem_cgroup_write_strategy(char *buf, unsigned long long *tmp) -{ - *tmp = memparse(buf, &buf); - if (*buf != '\0') - return -EINVAL; - - /* - * Round up the value to the closest page size - */ - *tmp = ((*tmp + PAGE_SIZE - 1) >> PAGE_SHIFT) << PAGE_SHIFT; - return 0; -} - static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft) { return res_counter_read_u64(&mem_cgroup_from_cont(cont)->res, cft->private); } - -static ssize_t mem_cgroup_write(struct cgroup *cont, struct cftype *cft, - struct file *file, const char __user *userbuf, - size_t nbytes, loff_t *ppos) +/* + * The user of this function is... + * RES_LIMIT. + */ +static int mem_cgroup_write(struct cgroup *cont, struct cftype *cft, + const char *buffer) { - return res_counter_write(&mem_cgroup_from_cont(cont)->res, - cft->private, userbuf, nbytes, ppos, - mem_cgroup_write_strategy); + struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); + unsigned long long val; + int ret; + + switch (cft->private) { + case RES_LIMIT: + /* This function does all necessary parse...reuse it */ + ret = res_counter_memparse_write_strategy(buffer, &val); + if (!ret) + ret = mem_cgroup_resize_limit(memcg, val); + break; + default: + ret = -EINVAL; /* should be BUG() ? */ + break; + } + return ret; } static int mem_cgroup_reset(struct cgroup *cont, unsigned int event) @@ -940,7 +1019,7 @@ static struct cftype mem_cgroup_files[] = { { .name = "limit_in_bytes", .private = RES_LIMIT, - .write = mem_cgroup_write, + .write_string = mem_cgroup_write, .read_u64 = mem_cgroup_read, }, { @@ -1070,8 +1149,6 @@ static void mem_cgroup_destroy(struct cgroup_subsys *ss, static int mem_cgroup_populate(struct cgroup_subsys *ss, struct cgroup *cont) { - if (mem_cgroup_subsys.disabled) - return 0; return cgroup_add_files(cont, ss, mem_cgroup_files, ARRAY_SIZE(mem_cgroup_files)); } @@ -1084,9 +1161,6 @@ static void mem_cgroup_move_task(struct cgroup_subsys *ss, struct mm_struct *mm; struct mem_cgroup *mem, *old_mem; - if (mem_cgroup_subsys.disabled) - return; - mm = get_task_mm(p); if (mm == NULL) return; diff --git a/mm/migrate.c b/mm/migrate.c index 376cceba82f9..d8c65a65c61d 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -358,6 +358,9 @@ static int migrate_page_move_mapping(struct address_space *mapping, __inc_zone_page_state(newpage, NR_FILE_PAGES); write_unlock_irq(&mapping->tree_lock); + if (!PageSwapCache(newpage)) { + mem_cgroup_uncharge_cache_page(page); + } return 0; } @@ -611,7 +614,6 @@ static int move_to_new_page(struct page *newpage, struct page *page) rc = fallback_migrate_page(mapping, newpage, page); if (!rc) { - mem_cgroup_page_migration(page, newpage); remove_migration_ptes(page, newpage); } else newpage->mapping = NULL; @@ -641,6 +643,14 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private, /* page was freed from under us. So we are done. */ goto move_newpage; + charge = mem_cgroup_prepare_migration(page, newpage); + if (charge == -ENOMEM) { + rc = -ENOMEM; + goto move_newpage; + } + /* prepare cgroup just returns 0 or -ENOMEM */ + BUG_ON(charge); + rc = -EAGAIN; if (TestSetPageLocked(page)) { if (!force) @@ -692,19 +702,14 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private, goto rcu_unlock; } - charge = mem_cgroup_prepare_migration(page); /* Establish migration ptes or remove ptes */ try_to_unmap(page, 1); if (!page_mapped(page)) rc = move_to_new_page(newpage, page); - if (rc) { + if (rc) remove_migration_ptes(page, page); - if (charge) - mem_cgroup_end_migration(page); - } else if (charge) - mem_cgroup_end_migration(newpage); rcu_unlock: if (rcu_locked) rcu_read_unlock(); @@ -725,6 +730,8 @@ unlock: } move_newpage: + if (!charge) + mem_cgroup_end_migration(newpage); /* * Move the new page to the LRU. If migration was not successful * then this will free the page. diff --git a/mm/pdflush.c b/mm/pdflush.c index 9d834aa4b979..0cbe0c60c6bf 100644 --- a/mm/pdflush.c +++ b/mm/pdflush.c @@ -130,7 +130,7 @@ static int __pdflush(struct pdflush_work *my_work) * Thread creation: For how long have there been zero * available threads? */ - if (jiffies - last_empty_jifs > 1 * HZ) { + if (time_after(jiffies, last_empty_jifs + 1 * HZ)) { /* unlocked list_empty() test is OK here */ if (list_empty(&pdflush_list)) { /* unlocked test is OK here */ @@ -151,7 +151,7 @@ static int __pdflush(struct pdflush_work *my_work) if (nr_pdflush_threads <= MIN_PDFLUSH_THREADS) continue; pdf = list_entry(pdflush_list.prev, struct pdflush_work, list); - if (jiffies - pdf->when_i_went_to_sleep > 1 * HZ) { + if (time_after(jiffies, pdf->when_i_went_to_sleep + 1 * HZ)) { /* Limit exit rate */ pdf->when_i_went_to_sleep = jiffies; break; /* exeunt */ diff --git a/mm/rmap.c b/mm/rmap.c index bf0a5b7cfb8e..abbd29f7c43f 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -576,14 +576,8 @@ void page_add_anon_rmap(struct page *page, VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end); if (atomic_inc_and_test(&page->_mapcount)) __page_set_anon_rmap(page, vma, address); - else { + else __page_check_anon_rmap(page, vma, address); - /* - * We unconditionally charged during prepare, we uncharge here - * This takes care of balancing the reference counts - */ - mem_cgroup_uncharge_page(page); - } } /** @@ -614,12 +608,6 @@ void page_add_file_rmap(struct page *page) { if (atomic_inc_and_test(&page->_mapcount)) __inc_zone_page_state(page, NR_FILE_MAPPED); - else - /* - * We unconditionally charged during prepare, we uncharge here - * This takes care of balancing the reference counts - */ - mem_cgroup_uncharge_page(page); } #ifdef CONFIG_DEBUG_VM diff --git a/mm/shmem.c b/mm/shmem.c index 9ffbea9b79e1..f92fea94d037 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -922,20 +922,26 @@ found: error = 1; if (!inode) goto out; - /* Precharge page while we can wait, compensate afterwards */ + /* Precharge page using GFP_KERNEL while we can wait */ error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL); if (error) goto out; error = radix_tree_preload(GFP_KERNEL); - if (error) - goto uncharge; + if (error) { + mem_cgroup_uncharge_cache_page(page); + goto out; + } error = 1; spin_lock(&info->lock); ptr = shmem_swp_entry(info, idx, NULL); - if (ptr && ptr->val == entry.val) + if (ptr && ptr->val == entry.val) { error = add_to_page_cache(page, inode->i_mapping, idx, GFP_NOWAIT); + /* does mem_cgroup_uncharge_cache_page on error */ + } else /* we must compensate for our precharge above */ + mem_cgroup_uncharge_cache_page(page); + if (error == -EEXIST) { struct page *filepage = find_get_page(inode->i_mapping, idx); error = 1; @@ -961,8 +967,6 @@ found: shmem_swp_unmap(ptr); spin_unlock(&info->lock); radix_tree_preload_end(); -uncharge: - mem_cgroup_uncharge_page(page); out: unlock_page(page); page_cache_release(page); @@ -1311,17 +1315,14 @@ repeat: shmem_swp_unmap(entry); spin_unlock(&info->lock); unlock_page(swappage); + page_cache_release(swappage); if (error == -ENOMEM) { /* allow reclaim from this memory cgroup */ - error = mem_cgroup_cache_charge(swappage, - current->mm, gfp & ~__GFP_HIGHMEM); - if (error) { - page_cache_release(swappage); + error = mem_cgroup_shrink_usage(current->mm, + gfp); + if (error) goto failed; - } - mem_cgroup_uncharge_page(swappage); } - page_cache_release(swappage); goto repeat; } } else if (sgp == SGP_READ && !filepage) { @@ -1358,6 +1359,8 @@ repeat: } if (!filepage) { + int ret; + spin_unlock(&info->lock); filepage = shmem_alloc_page(gfp, info, idx); if (!filepage) { @@ -1386,10 +1389,18 @@ repeat: swap = *entry; shmem_swp_unmap(entry); } - if (error || swap.val || 0 != add_to_page_cache_lru( - filepage, mapping, idx, GFP_NOWAIT)) { + ret = error || swap.val; + if (ret) + mem_cgroup_uncharge_cache_page(filepage); + else + ret = add_to_page_cache_lru(filepage, mapping, + idx, GFP_NOWAIT); + /* + * At add_to_page_cache_lru() failure, uncharge will + * be done automatically. + */ + if (ret) { spin_unlock(&info->lock); - mem_cgroup_uncharge_page(filepage); page_cache_release(filepage); shmem_unacct_blocks(info->flags, 1); shmem_free_blocks(inode, 1); @@ -1398,7 +1409,6 @@ repeat: goto failed; goto repeat; } - mem_cgroup_uncharge_page(filepage); info->flags |= SHMEM_PAGEIN; } diff --git a/mm/vmscan.c b/mm/vmscan.c index 967d30ccd92b..26672c6cd3ce 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -38,6 +38,7 @@ #include <linux/kthread.h> #include <linux/freezer.h> #include <linux/memcontrol.h> +#include <linux/delayacct.h> #include <asm/tlbflush.h> #include <asm/div64.h> @@ -1316,6 +1317,8 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, struct zone *zone; enum zone_type high_zoneidx = gfp_zone(sc->gfp_mask); + delayacct_freepages_start(); + if (scan_global_lru(sc)) count_vm_event(ALLOCSTALL); /* @@ -1396,6 +1399,8 @@ out: } else mem_cgroup_record_reclaim_priority(sc->mem_cgroup, priority); + delayacct_freepages_end(); + return ret; } |