From 27ee57c93ff00b8a2d6c6dd6b0b3dddda7b43b77 Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Thu, 17 Mar 2016 14:17:35 -0700 Subject: mm: memcontrol: report slab usage in cgroup2 memory.stat Show how much memory is used for storing reclaimable and unreclaimable in-kernel data structures allocated from slab caches. Signed-off-by: Vladimir Davydov Acked-by: Johannes Weiner Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/cgroup-v2.txt | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'Documentation') diff --git a/Documentation/cgroup-v2.txt b/Documentation/cgroup-v2.txt index ff49cf901148..e4e0c1d78cee 100644 --- a/Documentation/cgroup-v2.txt +++ b/Documentation/cgroup-v2.txt @@ -843,6 +843,11 @@ PAGE_SIZE multiple when read back. Amount of memory used to cache filesystem data, including tmpfs and shared memory. + slab + + Amount of memory used for storing in-kernel data + structures. + sock Amount of memory used in network transmission buffers @@ -871,6 +876,16 @@ PAGE_SIZE multiple when read back. on the internal memory management lists used by the page reclaim algorithm + slab_reclaimable + + Part of "slab" that might be reclaimed, such as + dentries and inodes. + + slab_unreclaimable + + Part of "slab" that cannot be reclaimed on memory + pressure. + pgfault Total number of page faults incurred -- cgit v1.2.3 From 12580e4b54ba8a1b22ec977c200be0174ca42348 Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Thu, 17 Mar 2016 14:17:38 -0700 Subject: mm: memcontrol: report kernel stack usage in cgroup2 memory.stat Show how much memory is allocated to kernel stacks. Signed-off-by: Vladimir Davydov Acked-by: Johannes Weiner Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/cgroup-v2.txt | 4 ++++ include/linux/memcontrol.h | 3 ++- kernel/fork.c | 10 +++++++++- mm/memcontrol.c | 2 ++ 4 files changed, 17 insertions(+), 2 deletions(-) (limited to 'Documentation') diff --git a/Documentation/cgroup-v2.txt b/Documentation/cgroup-v2.txt index e4e0c1d78cee..e2f4e7948a66 100644 --- a/Documentation/cgroup-v2.txt +++ b/Documentation/cgroup-v2.txt @@ -843,6 +843,10 @@ PAGE_SIZE multiple when read back. Amount of memory used to cache filesystem data, including tmpfs and shared memory. + kernel_stack + + Amount of memory allocated to kernel stacks. + slab Amount of memory used for storing in-kernel data diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index e7af4834ffea..d6300313b298 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -52,9 +52,10 @@ enum mem_cgroup_stat_index { MEM_CGROUP_STAT_SWAP, /* # of pages, swapped out */ MEM_CGROUP_STAT_NSTATS, /* default hierarchy stats */ - MEMCG_SOCK = MEM_CGROUP_STAT_NSTATS, + MEMCG_KERNEL_STACK = MEM_CGROUP_STAT_NSTATS, MEMCG_SLAB_RECLAIMABLE, MEMCG_SLAB_UNRECLAIMABLE, + MEMCG_SOCK, MEMCG_NR_STAT, }; diff --git a/kernel/fork.c b/kernel/fork.c index 2e391c754ae7..accb7221d547 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -164,12 +164,20 @@ static struct thread_info *alloc_thread_info_node(struct task_struct *tsk, struct page *page = alloc_kmem_pages_node(node, THREADINFO_GFP, THREAD_SIZE_ORDER); + if (page) + memcg_kmem_update_page_stat(page, MEMCG_KERNEL_STACK, + 1 << THREAD_SIZE_ORDER); + return page ? page_address(page) : NULL; } static inline void free_thread_info(struct thread_info *ti) { - free_kmem_pages((unsigned long)ti, THREAD_SIZE_ORDER); + struct page *page = virt_to_page(ti); + + memcg_kmem_update_page_stat(page, MEMCG_KERNEL_STACK, + -(1 << THREAD_SIZE_ORDER)); + __free_kmem_pages(page, THREAD_SIZE_ORDER); } # else static struct kmem_cache *thread_info_cache; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 3ad64bf464fd..4b7dda7c2e74 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5106,6 +5106,8 @@ static int memory_stat_show(struct seq_file *m, void *v) (u64)stat[MEM_CGROUP_STAT_RSS] * PAGE_SIZE); seq_printf(m, "file %llu\n", (u64)stat[MEM_CGROUP_STAT_CACHE] * PAGE_SIZE); + seq_printf(m, "kernel_stack %llu\n", + (u64)stat[MEMCG_KERNEL_STACK] * PAGE_SIZE); seq_printf(m, "slab %llu\n", (u64)(stat[MEMCG_SLAB_RECLAIMABLE] + stat[MEMCG_SLAB_UNRECLAIMABLE]) * PAGE_SIZE); -- cgit v1.2.3 From f9719a03de51e13526d614e79d002f838770b2d6 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 17 Mar 2016 14:18:45 -0700 Subject: thp, vmstats: count deferred split events Count how many times we put a THP in split queue. Currently, it happens on partial unmap of a THP. Rapidly growing value can indicate that an application behaves unfriendly wrt THP: often fault in huge page and then unmap part of it. This leads to unnecessary memory fragmentation and the application may require tuning. The event also can help with debugging kernel [mis-]behaviour. Signed-off-by: Kirill A. Shutemov Cc: Andrea Arcangeli Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/vm/transhuge.txt | 5 +++++ include/linux/vm_event_item.h | 1 + mm/huge_memory.c | 1 + mm/vmstat.c | 1 + 4 files changed, 8 insertions(+) (limited to 'Documentation') diff --git a/Documentation/vm/transhuge.txt b/Documentation/vm/transhuge.txt index 21cf34f3ddb2..0dc8632aa01e 100644 --- a/Documentation/vm/transhuge.txt +++ b/Documentation/vm/transhuge.txt @@ -229,6 +229,11 @@ thp_split_page is incremented every time a huge page is split into base thp_split_page_failed is is incremented if kernel fails to split huge page. This can happen if the page was pinned by somebody. +thp_deferred_split_page is incremented when a huge page is put onto split + queue. This happens when a huge page is partially unmapped and + splitting it would free up some memory. Pages on split queue are + going to be split under memory pressure. + thp_split_pmd is incremented every time a PMD split into table of PTEs. This can happen, for instance, when application calls mprotect() or munmap() on part of huge page. It doesn't split huge page, only diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index 58ecc056ee45..ec084321fe09 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -72,6 +72,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, THP_COLLAPSE_ALLOC_FAILED, THP_SPLIT_PAGE, THP_SPLIT_PAGE_FAILED, + THP_DEFERRED_SPLIT_PAGE, THP_SPLIT_PMD, THP_ZERO_PAGE_ALLOC, THP_ZERO_PAGE_ALLOC_FAILED, diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 1ea21e203a70..1dddfb21fc22 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -3455,6 +3455,7 @@ void deferred_split_huge_page(struct page *page) spin_lock_irqsave(&pgdata->split_queue_lock, flags); if (list_empty(page_deferred_list(page))) { + count_vm_event(THP_DEFERRED_SPLIT_PAGE); list_add_tail(page_deferred_list(page), &pgdata->split_queue); pgdata->split_queue_len++; } diff --git a/mm/vmstat.c b/mm/vmstat.c index f80066248c94..5e4300482897 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -848,6 +848,7 @@ const char * const vmstat_text[] = { "thp_collapse_alloc_failed", "thp_split_page", "thp_split_page_failed", + "thp_deferred_split_page", "thp_split_pmd", "thp_zero_page_alloc", "thp_zero_page_alloc_failed", -- cgit v1.2.3 From 795ae7a0de6b834a0cc202aa55c190ef81496665 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 17 Mar 2016 14:19:14 -0700 Subject: mm: scale kswapd watermarks in proportion to memory In machines with 140G of memory and enterprise flash storage, we have seen read and write bursts routinely exceed the kswapd watermarks and cause thundering herds in direct reclaim. Unfortunately, the only way to tune kswapd aggressiveness is through adjusting min_free_kbytes - the system's emergency reserves - which is entirely unrelated to the system's latency requirements. In order to get kswapd to maintain a 250M buffer of free memory, the emergency reserves need to be set to 1G. That is a lot of memory wasted for no good reason. On the other hand, it's reasonable to assume that allocation bursts and overall allocation concurrency scale with memory capacity, so it makes sense to make kswapd aggressiveness a function of that as well. Change the kswapd watermark scale factor from the currently fixed 25% of the tunable emergency reserve to a tunable 0.1% of memory. Beyond 1G of memory, this will produce bigger watermark steps than the current formula in default settings. Ensure that the new formula never chooses steps smaller than that, i.e. 25% of the emergency reserve. On a 140G machine, this raises the default watermark steps - the distance between min and low, and low and high - from 16M to 143M. Signed-off-by: Johannes Weiner Acked-by: Mel Gorman Acked-by: Rik van Riel Acked-by: David Rientjes Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/sysctl/vm.txt | 18 ++++++++++++++++++ include/linux/mm.h | 1 + include/linux/mmzone.h | 2 ++ kernel/sysctl.c | 10 ++++++++++ mm/page_alloc.c | 29 +++++++++++++++++++++++++++-- 5 files changed, 58 insertions(+), 2 deletions(-) (limited to 'Documentation') diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt index 89a887c76629..cb0368459da3 100644 --- a/Documentation/sysctl/vm.txt +++ b/Documentation/sysctl/vm.txt @@ -803,6 +803,24 @@ performance impact. Reclaim code needs to take various locks to find freeable directory and inode objects. With vfs_cache_pressure=1000, it will look for ten times more freeable objects than there are. +============================================================= + +watermark_scale_factor: + +This factor controls the aggressiveness of kswapd. It defines the +amount of memory left in a node/system before kswapd is woken up and +how much memory needs to be free before kswapd goes back to sleep. + +The unit is in fractions of 10,000. The default value of 10 means the +distances between watermarks are 0.1% of the available memory in the +node/system. The maximum value is 1000, or 10% of memory. + +A high rate of threads entering direct reclaim (allocstall) or kswapd +going to sleep prematurely (kswapd_low_wmark_hit_quickly) can indicate +that the number of free pages kswapd maintains for latency reasons is +too small for the allocation bursts occurring in the system. This knob +can then be used to tune kswapd aggressiveness accordingly. + ============================================================== zone_reclaim_mode: diff --git a/include/linux/mm.h b/include/linux/mm.h index 75d1907b9009..f7fd64227d3a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1889,6 +1889,7 @@ extern void zone_pcp_reset(struct zone *zone); /* page_alloc.c */ extern int min_free_kbytes; +extern int watermark_scale_factor; /* nommu.c */ extern atomic_long_t mmap_pages_allocated; diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index bdd9a270a813..c60df9257cc7 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -841,6 +841,8 @@ static inline int is_highmem(struct zone *zone) struct ctl_table; int min_free_kbytes_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); +int watermark_scale_factor_sysctl_handler(struct ctl_table *, int, + void __user *, size_t *, loff_t *); extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1]; int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index f5102fabef7f..725587f10667 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -126,6 +126,7 @@ static int __maybe_unused two = 2; static int __maybe_unused four = 4; static unsigned long one_ul = 1; static int one_hundred = 100; +static int one_thousand = 1000; #ifdef CONFIG_PRINTK static int ten_thousand = 10000; #endif @@ -1403,6 +1404,15 @@ static struct ctl_table vm_table[] = { .proc_handler = min_free_kbytes_sysctl_handler, .extra1 = &zero, }, + { + .procname = "watermark_scale_factor", + .data = &watermark_scale_factor, + .maxlen = sizeof(watermark_scale_factor), + .mode = 0644, + .proc_handler = watermark_scale_factor_sysctl_handler, + .extra1 = &one, + .extra2 = &one_thousand, + }, { .procname = "percpu_pagelist_fraction", .data = &percpu_pagelist_fraction, diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 941b802e11ec..d156310aedeb 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -249,6 +249,7 @@ compound_page_dtor * const compound_page_dtors[] = { int min_free_kbytes = 1024; int user_min_free_kbytes = -1; +int watermark_scale_factor = 10; static unsigned long __meminitdata nr_kernel_pages; static unsigned long __meminitdata nr_all_pages; @@ -6347,8 +6348,17 @@ static void __setup_per_zone_wmarks(void) zone->watermark[WMARK_MIN] = tmp; } - zone->watermark[WMARK_LOW] = min_wmark_pages(zone) + (tmp >> 2); - zone->watermark[WMARK_HIGH] = min_wmark_pages(zone) + (tmp >> 1); + /* + * Set the kswapd watermarks distance according to the + * scale factor in proportion to available memory, but + * ensure a minimum size on small systems. + */ + tmp = max_t(u64, tmp >> 2, + mult_frac(zone->managed_pages, + watermark_scale_factor, 10000)); + + zone->watermark[WMARK_LOW] = min_wmark_pages(zone) + tmp; + zone->watermark[WMARK_HIGH] = min_wmark_pages(zone) + tmp * 2; __mod_zone_page_state(zone, NR_ALLOC_BATCH, high_wmark_pages(zone) - low_wmark_pages(zone) - @@ -6489,6 +6499,21 @@ int min_free_kbytes_sysctl_handler(struct ctl_table *table, int write, return 0; } +int watermark_scale_factor_sysctl_handler(struct ctl_table *table, int write, + void __user *buffer, size_t *length, loff_t *ppos) +{ + int rc; + + rc = proc_dointvec_minmax(table, write, buffer, length, ppos); + if (rc) + return rc; + + if (write) + setup_per_zone_wmarks(); + + return 0; +} + #ifdef CONFIG_NUMA int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *table, int write, void __user *buffer, size_t *length, loff_t *ppos) -- cgit v1.2.3 From 444eb2a449ef36fe115431ed7b71467c4563c7f1 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Thu, 17 Mar 2016 14:19:23 -0700 Subject: mm: thp: set THP defrag by default to madvise and add a stall-free defrag option THP defrag is enabled by default to direct reclaim/compact but not wake kswapd in the event of a THP allocation failure. The problem is that THP allocation requests potentially enter reclaim/compaction. This potentially incurs a severe stall that is not guaranteed to be offset by reduced TLB misses. While there has been considerable effort to reduce the impact of reclaim/compaction, it is still a high cost and workloads that should fit in memory fail to do so. Specifically, a simple anon/file streaming workload will enter direct reclaim on NUMA at least even though the working set size is 80% of RAM. It's been years and it's time to throw in the towel. First, this patch defines THP defrag as follows; madvise: A failed allocation will direct reclaim/compact if the application requests it never: Neither reclaim/compact nor wake kswapd defer: A failed allocation will wake kswapd/kcompactd always: A failed allocation will direct reclaim/compact (historical behaviour) khugepaged defrag will enter direct/reclaim but not wake kswapd. Next it sets the default defrag option to be "madvise" to only enter direct reclaim/compaction for applications that specifically requested it. Lastly, it removes a check from the page allocator slowpath that is related to __GFP_THISNODE to allow "defer" to work. The callers that really cares are slub/slab and they are updated accordingly. The slab one may be surprising because it also corrects a comment as kswapd was never woken up by that path. This means that a THP fault will no longer stall for most applications by default and the ideal for most users that get THP if they are immediately available. There are still options for users that prefer a stall at startup of a new application by either restoring historical behaviour with "always" or pick a half-way point with "defer" where kswapd does some of the work in the background and wakes kcompactd if necessary. THP defrag for khugepaged remains enabled and will enter direct/reclaim but no wakeup kswapd or kcompactd. After this patch a THP allocation failure will quickly fallback and rely on khugepaged to recover the situation at some time in the future. In some cases, this will reduce THP usage but the benefit of THP is hard to measure and not a universal win where as a stall to reclaim/compaction is definitely measurable and can be painful. The first test for this is using "usemem" to read a large file and write a large anonymous mapping (to avoid the zero page) multiple times. The total size of the mappings is 80% of RAM and the benchmark simply measures how long it takes to complete. It uses multiple threads to see if that is a factor. On UMA, the performance is almost identical so is not reported but on NUMA, we see this usemem 4.4.0 4.4.0 kcompactd-v1r1 nodefrag-v1r3 Amean System-1 102.86 ( 0.00%) 46.81 ( 54.50%) Amean System-4 37.85 ( 0.00%) 34.02 ( 10.12%) Amean System-7 48.12 ( 0.00%) 46.89 ( 2.56%) Amean System-12 51.98 ( 0.00%) 56.96 ( -9.57%) Amean System-21 80.16 ( 0.00%) 79.05 ( 1.39%) Amean System-30 110.71 ( 0.00%) 107.17 ( 3.20%) Amean System-48 127.98 ( 0.00%) 124.83 ( 2.46%) Amean Elapsd-1 185.84 ( 0.00%) 105.51 ( 43.23%) Amean Elapsd-4 26.19 ( 0.00%) 25.58 ( 2.33%) Amean Elapsd-7 21.65 ( 0.00%) 21.62 ( 0.16%) Amean Elapsd-12 18.58 ( 0.00%) 17.94 ( 3.43%) Amean Elapsd-21 17.53 ( 0.00%) 16.60 ( 5.33%) Amean Elapsd-30 17.45 ( 0.00%) 17.13 ( 1.84%) Amean Elapsd-48 15.40 ( 0.00%) 15.27 ( 0.82%) For a single thread, the benchmark completes 43.23% faster with this patch applied with smaller benefits as the thread increases. Similar, notice the large reduction in most cases in system CPU usage. The overall CPU time is 4.4.0 4.4.0 kcompactd-v1r1 nodefrag-v1r3 User 10357.65 10438.33 System 3988.88 3543.94 Elapsed 2203.01 1634.41 Which is substantial. Now, the reclaim figures 4.4.0 4.4.0 kcompactd-v1r1nodefrag-v1r3 Minor Faults 128458477 278352931 Major Faults 2174976 225 Swap Ins 16904701 0 Swap Outs 17359627 0 Allocation stalls 43611 0 DMA allocs 0 0 DMA32 allocs 19832646 19448017 Normal allocs 614488453 580941839 Movable allocs 0 0 Direct pages scanned 24163800 0 Kswapd pages scanned 0 0 Kswapd pages reclaimed 0 0 Direct pages reclaimed 20691346 0 Compaction stalls 42263 0 Compaction success 938 0 Compaction failures 41325 0 This patch eliminates almost all swapping and direct reclaim activity. There is still overhead but it's from NUMA balancing which does not identify that it's pointless trying to do anything with this workload. I also tried the thpscale benchmark which forces a corner case where compaction can be used heavily and measures the latency of whether base or huge pages were used thpscale Fault Latencies 4.4.0 4.4.0 kcompactd-v1r1 nodefrag-v1r3 Amean fault-base-1 5288.84 ( 0.00%) 2817.12 ( 46.73%) Amean fault-base-3 6365.53 ( 0.00%) 3499.11 ( 45.03%) Amean fault-base-5 6526.19 ( 0.00%) 4363.06 ( 33.15%) Amean fault-base-7 7142.25 ( 0.00%) 4858.08 ( 31.98%) Amean fault-base-12 13827.64 ( 0.00%) 10292.11 ( 25.57%) Amean fault-base-18 18235.07 ( 0.00%) 13788.84 ( 24.38%) Amean fault-base-24 21597.80 ( 0.00%) 24388.03 (-12.92%) Amean fault-base-30 26754.15 ( 0.00%) 19700.55 ( 26.36%) Amean fault-base-32 26784.94 ( 0.00%) 19513.57 ( 27.15%) Amean fault-huge-1 4223.96 ( 0.00%) 2178.57 ( 48.42%) Amean fault-huge-3 2194.77 ( 0.00%) 2149.74 ( 2.05%) Amean fault-huge-5 2569.60 ( 0.00%) 2346.95 ( 8.66%) Amean fault-huge-7 3612.69 ( 0.00%) 2997.70 ( 17.02%) Amean fault-huge-12 3301.75 ( 0.00%) 6727.02 (-103.74%) Amean fault-huge-18 6696.47 ( 0.00%) 6685.72 ( 0.16%) Amean fault-huge-24 8000.72 ( 0.00%) 9311.43 (-16.38%) Amean fault-huge-30 13305.55 ( 0.00%) 9750.45 ( 26.72%) Amean fault-huge-32 9981.71 ( 0.00%) 10316.06 ( -3.35%) The average time to fault pages is substantially reduced in the majority of caseds but with the obvious caveat that fewer THPs are actually used in this adverse workload 4.4.0 4.4.0 kcompactd-v1r1 nodefrag-v1r3 Percentage huge-1 0.71 ( 0.00%) 14.04 (1865.22%) Percentage huge-3 10.77 ( 0.00%) 33.05 (206.85%) Percentage huge-5 60.39 ( 0.00%) 38.51 (-36.23%) Percentage huge-7 45.97 ( 0.00%) 34.57 (-24.79%) Percentage huge-12 68.12 ( 0.00%) 40.07 (-41.17%) Percentage huge-18 64.93 ( 0.00%) 47.82 (-26.35%) Percentage huge-24 62.69 ( 0.00%) 44.23 (-29.44%) Percentage huge-30 43.49 ( 0.00%) 55.38 ( 27.34%) Percentage huge-32 50.72 ( 0.00%) 51.90 ( 2.35%) 4.4.0 4.4.0 kcompactd-v1r1nodefrag-v1r3 Minor Faults 37429143 47564000 Major Faults 1916 1558 Swap Ins 1466 1079 Swap Outs 2936863 149626 Allocation stalls 62510 3 DMA allocs 0 0 DMA32 allocs 6566458 6401314 Normal allocs 216361697 216538171 Movable allocs 0 0 Direct pages scanned 25977580 17998 Kswapd pages scanned 0 3638931 Kswapd pages reclaimed 0 207236 Direct pages reclaimed 8833714 88 Compaction stalls 103349 5 Compaction success 270 4 Compaction failures 103079 1 Note again that while this does swap as it's an aggressive workload, the direct relcim activity and allocation stalls is substantially reduced. There is some kswapd activity but ftrace showed that the kswapd activity was due to normal wakeups from 4K pages being allocated. Compaction-related stalls and activity are almost eliminated. I also tried the stutter benchmark. For this, I do not have figures for NUMA but it's something that does impact UMA so I'll report what is available stutter 4.4.0 4.4.0 kcompactd-v1r1 nodefrag-v1r3 Min mmap 7.3571 ( 0.00%) 7.3438 ( 0.18%) 1st-qrtle mmap 7.5278 ( 0.00%) 17.9200 (-138.05%) 2nd-qrtle mmap 7.6818 ( 0.00%) 21.6055 (-181.25%) 3rd-qrtle mmap 11.0889 ( 0.00%) 21.8881 (-97.39%) Max-90% mmap 27.8978 ( 0.00%) 22.1632 ( 20.56%) Max-93% mmap 28.3202 ( 0.00%) 22.3044 ( 21.24%) Max-95% mmap 28.5600 ( 0.00%) 22.4580 ( 21.37%) Max-99% mmap 29.6032 ( 0.00%) 25.5216 ( 13.79%) Max mmap 4109.7289 ( 0.00%) 4813.9832 (-17.14%) Mean mmap 12.4474 ( 0.00%) 19.3027 (-55.07%) This benchmark is trying to fault an anonymous mapping while there is a heavy IO load -- a scenario that desktop users used to complain about frequently. This shows a mix because the ideal case of mapping with THP is not hit as often. However, note that 99% of the mappings complete 13.79% faster. The CPU usage here is particularly interesting 4.4.0 4.4.0 kcompactd-v1r1nodefrag-v1r3 User 67.50 0.99 System 1327.88 91.30 Elapsed 2079.00 2128.98 And once again we look at the reclaim figures 4.4.0 4.4.0 kcompactd-v1r1nodefrag-v1r3 Minor Faults 335241922 1314582827 Major Faults 715 819 Swap Ins 0 0 Swap Outs 0 0 Allocation stalls 532723 0 DMA allocs 0 0 DMA32 allocs 1822364341 1177950222 Normal allocs 1815640808 1517844854 Movable allocs 0 0 Direct pages scanned 21892772 0 Kswapd pages scanned 20015890 41879484 Kswapd pages reclaimed 19961986 41822072 Direct pages reclaimed 21892741 0 Compaction stalls 1065755 0 Compaction success 514 0 Compaction failures 1065241 0 Allocation stalls and all direct reclaim activity is eliminated as well as compaction-related stalls. THP gives impressive gains in some cases but only if they are quickly available. We're not going to reach the point where they are completely free so lets take the costs out of the fast paths finally and defer the cost to kswapd, kcompactd and khugepaged where it belongs. Signed-off-by: Mel Gorman Acked-by: Rik van Riel Acked-by: Johannes Weiner Acked-by: Vlastimil Babka Cc: Andrea Arcangeli Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/vm/transhuge.txt | 17 +++++++ include/linux/gfp.h | 2 +- include/linux/huge_mm.h | 9 +--- mm/huge_memory.c | 101 +++++++++++++++++++++++++++-------------- mm/page_alloc.c | 8 ---- mm/slab.c | 8 ++-- mm/slub.c | 2 +- 7 files changed, 91 insertions(+), 56 deletions(-) (limited to 'Documentation') diff --git a/Documentation/vm/transhuge.txt b/Documentation/vm/transhuge.txt index 0dc8632aa01e..d9cb65cf5cfd 100644 --- a/Documentation/vm/transhuge.txt +++ b/Documentation/vm/transhuge.txt @@ -113,9 +113,26 @@ guaranteed, but it may be more likely in case the allocation is for a MADV_HUGEPAGE region. echo always >/sys/kernel/mm/transparent_hugepage/defrag +echo defer >/sys/kernel/mm/transparent_hugepage/defrag echo madvise >/sys/kernel/mm/transparent_hugepage/defrag echo never >/sys/kernel/mm/transparent_hugepage/defrag +"always" means that an application requesting THP will stall on allocation +failure and directly reclaim pages and compact memory in an effort to +allocate a THP immediately. This may be desirable for virtual machines +that benefit heavily from THP use and are willing to delay the VM start +to utilise them. + +"defer" means that an application will wake kswapd in the background +to reclaim pages and wake kcompact to compact memory so that THP is +available in the near future. It's the responsibility of khugepaged +to then install the THP pages later. + +"madvise" will enter direct reclaim like "always" but only for regions +that are have used madvise(MADV_HUGEPAGE). This is the default behaviour. + +"never" should be self-explanatory. + By default kernel tries to use huge zero page on read page fault. It's possible to disable huge zero page by writing 0 or enable it back by writing 1: diff --git a/include/linux/gfp.h b/include/linux/gfp.h index c083d0820a87..11d56c6e7ef2 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -257,7 +257,7 @@ struct vm_area_struct; #define GFP_HIGHUSER_MOVABLE (GFP_HIGHUSER | __GFP_MOVABLE) #define GFP_TRANSHUGE ((GFP_HIGHUSER_MOVABLE | __GFP_COMP | \ __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN) & \ - ~__GFP_KSWAPD_RECLAIM) + ~__GFP_RECLAIM) /* Convert GFP flags to their corresponding migrate type */ #define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE) diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 459fd25b378e..a4cecb4801ec 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -41,7 +41,8 @@ int vmf_insert_pfn_pmd(struct vm_area_struct *, unsigned long addr, pmd_t *, enum transparent_hugepage_flag { TRANSPARENT_HUGEPAGE_FLAG, TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG, - TRANSPARENT_HUGEPAGE_DEFRAG_FLAG, + TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, + TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG, TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG, @@ -71,12 +72,6 @@ extern bool is_vma_temporary_stack(struct vm_area_struct *vma); ((__vma)->vm_flags & VM_HUGEPAGE))) && \ !((__vma)->vm_flags & VM_NOHUGEPAGE) && \ !is_vma_temporary_stack(__vma)) -#define transparent_hugepage_defrag(__vma) \ - ((transparent_hugepage_flags & \ - (1<vm_flags & VM_HUGEPAGE)) #define transparent_hugepage_use_zero_page() \ (transparent_hugepage_flags & \ (1<vm_flags & VM_HUGEPAGE)) + reclaim_flags = __GFP_DIRECT_RECLAIM; + else if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags)) + reclaim_flags = __GFP_KSWAPD_RECLAIM; + else if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags)) + reclaim_flags = __GFP_DIRECT_RECLAIM; + + return GFP_TRANSHUGE | reclaim_flags; +} + +/* Defrag for khugepaged will enter direct reclaim/compaction if necessary */ +static inline gfp_t alloc_hugepage_khugepaged_gfpmask(void) { - return (GFP_TRANSHUGE & ~(defrag ? 0 : __GFP_RECLAIM)) | extra_gfp; + return GFP_TRANSHUGE | (khugepaged_defrag() ? __GFP_DIRECT_RECLAIM : 0); } /* Caller must hold page table lock. */ @@ -919,7 +950,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, } return ret; } - gfp = alloc_hugepage_gfpmask(transparent_hugepage_defrag(vma), 0); + gfp = alloc_hugepage_direct_gfpmask(vma); page = alloc_hugepage_vma(gfp, vma, haddr, HPAGE_PMD_ORDER); if (unlikely(!page)) { count_vm_event(THP_FAULT_FALLBACK); @@ -1279,7 +1310,7 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, alloc: if (transparent_hugepage_enabled(vma) && !transparent_hugepage_debug_cow()) { - huge_gfp = alloc_hugepage_gfpmask(transparent_hugepage_defrag(vma), 0); + huge_gfp = alloc_hugepage_direct_gfpmask(vma); new_page = alloc_hugepage_vma(huge_gfp, vma, haddr, HPAGE_PMD_ORDER); } else new_page = NULL; @@ -2249,11 +2280,12 @@ static int khugepaged_find_target_node(void) return 0; } -static inline struct page *alloc_hugepage(int defrag) +static inline struct page *alloc_khugepaged_hugepage(void) { struct page *page; - page = alloc_pages(alloc_hugepage_gfpmask(defrag, 0), HPAGE_PMD_ORDER); + page = alloc_pages(alloc_hugepage_khugepaged_gfpmask(), + HPAGE_PMD_ORDER); if (page) prep_transhuge_page(page); return page; @@ -2264,7 +2296,7 @@ static struct page *khugepaged_alloc_hugepage(bool *wait) struct page *hpage; do { - hpage = alloc_hugepage(khugepaged_defrag()); + hpage = alloc_khugepaged_hugepage(); if (!hpage) { count_vm_event(THP_COLLAPSE_ALLOC_FAILED); if (!*wait) @@ -2335,8 +2367,7 @@ static void collapse_huge_page(struct mm_struct *mm, VM_BUG_ON(address & ~HPAGE_PMD_MASK); /* Only allocate from the target node */ - gfp = alloc_hugepage_gfpmask(khugepaged_defrag(), __GFP_OTHER_NODE) | - __GFP_THISNODE; + gfp = alloc_hugepage_khugepaged_gfpmask() | __GFP_OTHER_NODE | __GFP_THISNODE; /* release the mmap_sem read lock. */ new_page = khugepaged_alloc_page(hpage, gfp, mm, address, node); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index d156310aedeb..096a00d98a45 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3119,14 +3119,6 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM))) gfp_mask &= ~__GFP_ATOMIC; - /* - * If this allocation cannot block and it is for a specific node, then - * fail early. There's no need to wakeup kswapd or retry for a - * speculative node-specific allocation. - */ - if (IS_ENABLED(CONFIG_NUMA) && (gfp_mask & __GFP_THISNODE) && !can_direct_reclaim) - goto nopage; - retry: if (gfp_mask & __GFP_KSWAPD_RECLAIM) wake_all_kswapds(order, ac); diff --git a/mm/slab.c b/mm/slab.c index 56dd0df2a8ce..e1f6c27c3db5 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -670,7 +670,7 @@ static inline void *____cache_alloc_node(struct kmem_cache *cachep, static inline gfp_t gfp_exact_node(gfp_t flags) { - return flags; + return flags & ~__GFP_NOFAIL; } #else /* CONFIG_NUMA */ @@ -841,12 +841,12 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) } /* - * Construct gfp mask to allocate from a specific node but do not direct reclaim - * or warn about failures. kswapd may still wake to reclaim in the background. + * Construct gfp mask to allocate from a specific node but do not reclaim or + * warn about failures. */ static inline gfp_t gfp_exact_node(gfp_t flags) { - return (flags | __GFP_THISNODE | __GFP_NOWARN) & ~__GFP_DIRECT_RECLAIM; + return (flags | __GFP_THISNODE | __GFP_NOWARN) & ~(__GFP_RECLAIM|__GFP_NOFAIL); } #endif diff --git a/mm/slub.c b/mm/slub.c index 2f2f04d39104..64ed5f3a3046 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1426,7 +1426,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) */ alloc_gfp = (flags | __GFP_NOWARN | __GFP_NORETRY) & ~__GFP_NOFAIL; if ((alloc_gfp & __GFP_DIRECT_RECLAIM) && oo_order(oo) > oo_order(s->min)) - alloc_gfp = (alloc_gfp | __GFP_NOMEMALLOC) & ~__GFP_DIRECT_RECLAIM; + alloc_gfp = (alloc_gfp | __GFP_NOMEMALLOC) & ~(__GFP_RECLAIM|__GFP_NOFAIL); page = alloc_slab_page(s, alloc_gfp, node, oo); if (unlikely(!page)) { -- cgit v1.2.3 From b6e6edcfa40561e9c8abe5eecf1c96f8e5fd9c6f Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 17 Mar 2016 14:20:28 -0700 Subject: mm: memcontrol: reclaim and OOM kill when shrinking memory.max below usage Setting the original memory.limit_in_bytes hardlimit is subject to a race condition when the desired value is below the current usage. The code tries a few times to first reclaim and then see if the usage has dropped to where we would like it to be, but there is no locking, and the workload is free to continue making new charges up to the old limit. Thus, attempting to shrink a workload relies on pure luck and hope that the workload happens to cooperate. To fix this in the cgroup2 memory.max knob, do it the other way round: set the limit first, then try enforcement. And if reclaim is not able to succeed, trigger OOM kills in the group. Keep going until the new limit is met, we run out of OOM victims and there's only unreclaimable memory left, or the task writing to memory.max is killed. This allows users to shrink groups reliably, and the behavior is consistent with what happens when new charges are attempted in excess of memory.max. Signed-off-by: Johannes Weiner Acked-by: Michal Hocko Cc: Vladimir Davydov Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/cgroup-v2.txt | 6 ++++++ mm/memcontrol.c | 38 ++++++++++++++++++++++++++++++++++---- 2 files changed, 40 insertions(+), 4 deletions(-) (limited to 'Documentation') diff --git a/Documentation/cgroup-v2.txt b/Documentation/cgroup-v2.txt index e2f4e7948a66..8f1329a5f700 100644 --- a/Documentation/cgroup-v2.txt +++ b/Documentation/cgroup-v2.txt @@ -1387,6 +1387,12 @@ system than killing the group. Otherwise, memory.max is there to limit this type of spillover and ultimately contain buggy or even malicious applications. +Setting the original memory.limit_in_bytes below the current usage was +subject to a race condition, where concurrent charges could cause the +limit setting to fail. memory.max on the other hand will first set the +limit to prevent new charges, and then reclaim and OOM kill until the +new limit is met - or the task writing to memory.max is killed. + The combined memory+swap accounting and limiting is replaced by real control over swap space. diff --git a/mm/memcontrol.c b/mm/memcontrol.c index f7c9b4cbdf01..8614e0d750e5 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1236,7 +1236,7 @@ static unsigned long mem_cgroup_get_limit(struct mem_cgroup *memcg) return limit; } -static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask, +static bool mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask, int order) { struct oom_control oc = { @@ -1314,6 +1314,7 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask, } unlock: mutex_unlock(&oom_lock); + return chosen; } #if MAX_NUMNODES > 1 @@ -5029,6 +5030,8 @@ static ssize_t memory_max_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of)); + unsigned int nr_reclaims = MEM_CGROUP_RECLAIM_RETRIES; + bool drained = false; unsigned long max; int err; @@ -5037,9 +5040,36 @@ static ssize_t memory_max_write(struct kernfs_open_file *of, if (err) return err; - err = mem_cgroup_resize_limit(memcg, max); - if (err) - return err; + xchg(&memcg->memory.limit, max); + + for (;;) { + unsigned long nr_pages = page_counter_read(&memcg->memory); + + if (nr_pages <= max) + break; + + if (signal_pending(current)) { + err = -EINTR; + break; + } + + if (!drained) { + drain_all_stock(memcg); + drained = true; + continue; + } + + if (nr_reclaims) { + if (!try_to_free_mem_cgroup_pages(memcg, nr_pages - max, + GFP_KERNEL, true)) + nr_reclaims--; + continue; + } + + mem_cgroup_events(memcg, MEMCG_OOM, 1); + if (!mem_cgroup_out_of_memory(memcg, GFP_KERNEL, 0)) + break; + } memcg_wb_domain_size_changed(memcg); return nbytes; -- cgit v1.2.3 From 5de23d435e88996b1efe0e2cebe242074ce67c9e Mon Sep 17 00:00:00 2001 From: John Stultz Date: Thu, 17 Mar 2016 14:20:54 -0700 Subject: proc: add /proc//timerslack_ns interface This patch provides a proc/PID/timerslack_ns interface which exposes a task's timerslack value in nanoseconds and allows it to be changed. This allows power/performance management software to set timer slack for other threads according to its policy for the thread (such as when the thread is designated foreground vs. background activity) If the value written is non-zero, slack is set to that value. Otherwise sets it to the default for the thread. This interface checks that the calling task has permissions to to use PTRACE_MODE_ATTACH_FSCREDS on the target task, so that we can ensure arbitrary apps do not change the timer slack for other apps. Signed-off-by: John Stultz Acked-by: Kees Cook Cc: Arjan van de Ven Cc: Thomas Gleixner Cc: Oren Laadan Cc: Ruchi Kandoi Cc: Rom Lemarchand Cc: Android Kernel Team Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/filesystems/proc.txt | 18 ++++++++++ fs/proc/base.c | 67 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 85 insertions(+) (limited to 'Documentation') diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index 843b045b4069..7f5607a089b4 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -43,6 +43,7 @@ Table of Contents 3.7 /proc//task//children - Information about task children 3.8 /proc//fdinfo/ - Information about opened file 3.9 /proc//map_files - Information about memory mapped files + 3.10 /proc//timerslack_ns - Task timerslack value 4 Configuring procfs 4.1 Mount options @@ -1862,6 +1863,23 @@ time one can open(2) mappings from the listings of two processes and comparing their inode numbers to figure out which anonymous memory areas are actually shared. +3.10 /proc//timerslack_ns - Task timerslack value +--------------------------------------------------------- +This file provides the value of the task's timerslack value in nanoseconds. +This value specifies a amount of time that normal timers may be deferred +in order to coalesce timers and avoid unnecessary wakeups. + +This allows a task's interactivity vs power consumption trade off to be +adjusted. + +Writing 0 to the file will set the tasks timerslack to the default value. + +Valid values are from 0 - ULLONG_MAX + +An application setting the value must have PTRACE_MODE_ATTACH_FSCREDS level +permissions on the task specified to change its timerslack_ns value. + + ------------------------------------------------------------------------------ Configuring procfs ------------------------------------------------------------------------------ diff --git a/fs/proc/base.c b/fs/proc/base.c index 4f764c2ac1a5..35f583ad7dbe 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -2257,6 +2257,72 @@ static const struct file_operations proc_timers_operations = { .release = seq_release_private, }; +static ssize_t timerslack_ns_write(struct file *file, const char __user *buf, + size_t count, loff_t *offset) +{ + struct inode *inode = file_inode(file); + struct task_struct *p; + u64 slack_ns; + int err; + + err = kstrtoull_from_user(buf, count, 10, &slack_ns); + if (err < 0) + return err; + + p = get_proc_task(inode); + if (!p) + return -ESRCH; + + if (ptrace_may_access(p, PTRACE_MODE_ATTACH_FSCREDS)) { + task_lock(p); + if (slack_ns == 0) + p->timer_slack_ns = p->default_timer_slack_ns; + else + p->timer_slack_ns = slack_ns; + task_unlock(p); + } else + count = -EPERM; + + put_task_struct(p); + + return count; +} + +static int timerslack_ns_show(struct seq_file *m, void *v) +{ + struct inode *inode = m->private; + struct task_struct *p; + int err = 0; + + p = get_proc_task(inode); + if (!p) + return -ESRCH; + + if (ptrace_may_access(p, PTRACE_MODE_ATTACH_FSCREDS)) { + task_lock(p); + seq_printf(m, "%llu\n", p->timer_slack_ns); + task_unlock(p); + } else + err = -EPERM; + + put_task_struct(p); + + return err; +} + +static int timerslack_ns_open(struct inode *inode, struct file *filp) +{ + return single_open(filp, timerslack_ns_show, inode); +} + +static const struct file_operations proc_pid_set_timerslack_ns_operations = { + .open = timerslack_ns_open, + .read = seq_read, + .write = timerslack_ns_write, + .llseek = seq_lseek, + .release = single_release, +}; + static int proc_pident_instantiate(struct inode *dir, struct dentry *dentry, struct task_struct *task, const void *ptr) { @@ -2831,6 +2897,7 @@ static const struct pid_entry tgid_base_stuff[] = { #ifdef CONFIG_CHECKPOINT_RESTORE REG("timers", S_IRUGO, proc_timers_operations), #endif + REG("timerslack_ns", S_IRUGO|S_IWUGO, proc_pid_set_timerslack_ns_operations), }; static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx) -- cgit v1.2.3 From 93e205a728e6cb8d7d11f6836e289798a1de25e2 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Thu, 17 Mar 2016 14:21:15 -0700 Subject: fix Christoph's email addresses There are various email addresses for me throughout the kernel. Use the one that will always be valid. Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/cgroup-v1/cgroups.txt | 2 +- Documentation/cgroup-v1/cpusets.txt | 2 +- MAINTAINERS | 2 +- arch/ia64/include/asm/rwsem.h | 2 +- include/linux/quicklist.h | 2 +- mm/mmu_notifier.c | 2 +- mm/quicklist.c | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) (limited to 'Documentation') diff --git a/Documentation/cgroup-v1/cgroups.txt b/Documentation/cgroup-v1/cgroups.txt index c6256ae9885b..947e6fe31ef9 100644 --- a/Documentation/cgroup-v1/cgroups.txt +++ b/Documentation/cgroup-v1/cgroups.txt @@ -8,7 +8,7 @@ Original copyright statements from cpusets.txt: Portions Copyright (C) 2004 BULL SA. Portions Copyright (c) 2004-2006 Silicon Graphics, Inc. Modified by Paul Jackson -Modified by Christoph Lameter +Modified by Christoph Lameter CONTENTS: ========= diff --git a/Documentation/cgroup-v1/cpusets.txt b/Documentation/cgroup-v1/cpusets.txt index fdf7dff3f607..e5cdcd445615 100644 --- a/Documentation/cgroup-v1/cpusets.txt +++ b/Documentation/cgroup-v1/cpusets.txt @@ -6,7 +6,7 @@ Written by Simon.Derr@bull.net Portions Copyright (c) 2004-2006 Silicon Graphics, Inc. Modified by Paul Jackson -Modified by Christoph Lameter +Modified by Christoph Lameter Modified by Paul Menage Modified by Hidetoshi Seto diff --git a/MAINTAINERS b/MAINTAINERS index 99bd725affc6..67d34bb7335c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8473,7 +8473,7 @@ F: include/crypto/pcrypt.h PER-CPU MEMORY ALLOCATOR M: Tejun Heo -M: Christoph Lameter +M: Christoph Lameter T: git git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu.git S: Maintained F: include/linux/percpu*.h diff --git a/arch/ia64/include/asm/rwsem.h b/arch/ia64/include/asm/rwsem.h index 3027e7516d85..ce112472bdd6 100644 --- a/arch/ia64/include/asm/rwsem.h +++ b/arch/ia64/include/asm/rwsem.h @@ -3,7 +3,7 @@ * * Copyright (C) 2003 Ken Chen * Copyright (C) 2003 Asit Mallick - * Copyright (C) 2005 Christoph Lameter + * Copyright (C) 2005 Christoph Lameter * * Based on asm-i386/rwsem.h and other architecture implementation. * diff --git a/include/linux/quicklist.h b/include/linux/quicklist.h index bd466439c588..3bdfa70bc642 100644 --- a/include/linux/quicklist.h +++ b/include/linux/quicklist.h @@ -5,7 +5,7 @@ * as needed after allocation when they are freed. Per cpu lists of pages * are kept that only contain node local pages. * - * (C) 2007, SGI. Christoph Lameter + * (C) 2007, SGI. Christoph Lameter */ #include #include diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c index 5fbdd367bbed..f4259e496f83 100644 --- a/mm/mmu_notifier.c +++ b/mm/mmu_notifier.c @@ -3,7 +3,7 @@ * * Copyright (C) 2008 Qumranet, Inc. * Copyright (C) 2008 SGI - * Christoph Lameter + * Christoph Lameter * * This work is licensed under the terms of the GNU GPL, version 2. See * the COPYING file in the top-level directory. diff --git a/mm/quicklist.c b/mm/quicklist.c index 942212970529..daf6ff6e199a 100644 --- a/mm/quicklist.c +++ b/mm/quicklist.c @@ -8,7 +8,7 @@ * improved on it. * * Copyright (C) 2007 SGI, - * Christoph Lameter + * Christoph Lameter * Generalized, added support for multiple lists and * constructors / destructors. */ -- cgit v1.2.3