From f245e1c17a702964ad552878d01a10e53cf0e8e5 Mon Sep 17 00:00:00 2001 From: "Tobin C. Harding" Date: Mon, 8 May 2017 15:54:55 -0700 Subject: fs/proc/inode.c: remove cast from memory allocation Coccinelle emits this warning: WARNING: casting value returned by memory allocation function to (struct proc_inode *) is useless. Remove unnecessary cast. Link: http://lkml.kernel.org/r/1487745720-16967-1-git-send-email-me@tobin.cc Signed-off-by: Tobin C. Harding Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 2cc7a8030275..e250910cffc8 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -58,7 +58,7 @@ static struct inode *proc_alloc_inode(struct super_block *sb) struct proc_inode *ei; struct inode *inode; - ei = (struct proc_inode *)kmem_cache_alloc(proc_inode_cachep, GFP_KERNEL); + ei = kmem_cache_alloc(proc_inode_cachep, GFP_KERNEL); if (!ei) return NULL; ei->pid = NULL; -- cgit v1.2.3 From 7fe6a42e87ef20edd7faf1b2f4bf18d95922d1e4 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 8 May 2017 15:56:08 -0700 Subject: reiserfs: use designated initializers Prepare to mark sensitive kernel structures for randomization by making sure they're using designated initializers. These were identified during allyesconfig builds of x86, arm, and arm64, with most initializer fixes extracted from grsecurity. Link: http://lkml.kernel.org/r/20170329210419.GA40066@beast Signed-off-by: Kees Cook Cc: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/reiserfs/item_ops.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/reiserfs/item_ops.c b/fs/reiserfs/item_ops.c index aca73dd73906..e3c558d1b78c 100644 --- a/fs/reiserfs/item_ops.c +++ b/fs/reiserfs/item_ops.c @@ -724,18 +724,18 @@ static void errcatch_print_vi(struct virtual_item *vi) } static struct item_operations errcatch_ops = { - errcatch_bytes_number, - errcatch_decrement_key, - errcatch_is_left_mergeable, - errcatch_print_item, - errcatch_check_item, - - errcatch_create_vi, - errcatch_check_left, - errcatch_check_right, - errcatch_part_size, - errcatch_unit_num, - errcatch_print_vi + .bytes_number = errcatch_bytes_number, + .decrement_key = errcatch_decrement_key, + .is_left_mergeable = errcatch_is_left_mergeable, + .print_item = errcatch_print_item, + .check_item = errcatch_check_item, + + .create_vi = errcatch_create_vi, + .check_left = errcatch_check_left, + .check_right = errcatch_check_right, + .part_size = errcatch_part_size, + .unit_num = errcatch_unit_num, + .print_vi = errcatch_print_vi }; #if ! (TYPE_STAT_DATA == 0 && TYPE_INDIRECT == 1 && TYPE_DIRECT == 2 && TYPE_DIRENTRY == 3) -- cgit v1.2.3 From 25b14e92af1a563c7331466ca59188f88050bbf0 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Mon, 8 May 2017 15:56:38 -0700 Subject: ns: allow ns_entries to have custom symlink content Patch series "Expose task pid_ns_for_children to userspace". pid_ns_for_children set by a task is known only to the task itself, and it's impossible to identify it from outside. It's a big problem for checkpoint/restore software like CRIU, because it can't correctly handle tasks, that do setns(CLONE_NEWPID) in proccess of their work. If they have a custom pid_ns_for_children before dump, they must have the same ns after restore. Otherwise, restored task bumped into enviroment it does not expect. This patchset solves the problem. It exposes pid_ns_for_children to ns directory in standard way with the name "pid_for_children": ~# ls /proc/5531/ns -l | grep pid lrwxrwxrwx 1 root root 0 Jan 14 16:38 pid -> pid:[4026531836] lrwxrwxrwx 1 root root 0 Jan 14 16:38 pid_for_children -> pid:[4026532286] This patch (of 2): Make possible to have link content prefix yyy different from the link name xxx: $ readlink /proc/[pid]/ns/xxx yyy:[4026531838] This will be used in next patch. Link: http://lkml.kernel.org/r/149201120318.6007.7362655181033883000.stgit@localhost.localdomain Signed-off-by: Kirill Tkhai Reviewed-by: Cyrill Gorcunov Acked-by: Andrei Vagin Cc: Andreas Gruenbacher Cc: Kees Cook Cc: Michael Kerrisk Cc: Al Viro Cc: Oleg Nesterov Cc: Paul Moore Cc: Eric Biederman Cc: Andy Lutomirski Cc: Ingo Molnar Cc: Serge Hallyn Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nsfs.c | 4 +++- include/linux/proc_ns.h | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nsfs.c b/fs/nsfs.c index 323f492e0822..f3db56e83dd2 100644 --- a/fs/nsfs.c +++ b/fs/nsfs.c @@ -196,9 +196,11 @@ int ns_get_name(char *buf, size_t size, struct task_struct *task, { struct ns_common *ns; int res = -ENOENT; + const char *name; ns = ns_ops->get(task); if (ns) { - res = snprintf(buf, size, "%s:[%u]", ns_ops->name, ns->inum); + name = ns_ops->real_ns_name ? : ns_ops->name; + res = snprintf(buf, size, "%s:[%u]", name, ns->inum); ns_ops->put(ns); } return res; diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h index 12cb8bd81d2d..88dba3b53375 100644 --- a/include/linux/proc_ns.h +++ b/include/linux/proc_ns.h @@ -14,6 +14,7 @@ struct inode; struct proc_ns_operations { const char *name; + const char *real_ns_name; int type; struct ns_common *(*get)(struct task_struct *task); void (*put)(struct ns_common *ns); -- cgit v1.2.3 From eaa0d190bfe1ed891b814a52712dcd852554cb08 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Mon, 8 May 2017 15:56:41 -0700 Subject: pidns: expose task pid_ns_for_children to userspace pid_ns_for_children set by a task is known only to the task itself, and it's impossible to identify it from outside. It's a big problem for checkpoint/restore software like CRIU, because it can't correctly handle tasks, that do setns(CLONE_NEWPID) in proccess of their work. This patch solves the problem, and it exposes pid_ns_for_children to ns directory in standard way with the name "pid_for_children": ~# ls /proc/5531/ns -l | grep pid lrwxrwxrwx 1 root root 0 Jan 14 16:38 pid -> pid:[4026531836] lrwxrwxrwx 1 root root 0 Jan 14 16:38 pid_for_children -> pid:[4026532286] Link: http://lkml.kernel.org/r/149201123914.6007.2187327078064239572.stgit@localhost.localdomain Signed-off-by: Kirill Tkhai Cc: Andrei Vagin Cc: Andreas Gruenbacher Cc: Kees Cook Cc: Michael Kerrisk Cc: Al Viro Cc: Oleg Nesterov Cc: Paul Moore Cc: Eric Biederman Cc: Andy Lutomirski Cc: Ingo Molnar Cc: Serge Hallyn Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/namespaces.c | 1 + include/linux/proc_ns.h | 1 + kernel/pid_namespace.c | 34 ++++++++++++++++++++++++++++++++++ 3 files changed, 36 insertions(+) (limited to 'fs') diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c index 766f0c637ad1..3803b24ca220 100644 --- a/fs/proc/namespaces.c +++ b/fs/proc/namespaces.c @@ -23,6 +23,7 @@ static const struct proc_ns_operations *ns_entries[] = { #endif #ifdef CONFIG_PID_NS &pidns_operations, + &pidns_for_children_operations, #endif #ifdef CONFIG_USER_NS &userns_operations, diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h index 88dba3b53375..58ab28d81fc2 100644 --- a/include/linux/proc_ns.h +++ b/include/linux/proc_ns.h @@ -27,6 +27,7 @@ extern const struct proc_ns_operations netns_operations; extern const struct proc_ns_operations utsns_operations; extern const struct proc_ns_operations ipcns_operations; extern const struct proc_ns_operations pidns_operations; +extern const struct proc_ns_operations pidns_for_children_operations; extern const struct proc_ns_operations userns_operations; extern const struct proc_ns_operations mntns_operations; extern const struct proc_ns_operations cgroupns_operations; diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index de461aa0bf9a..d1f3e9f558b8 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -374,6 +374,29 @@ static struct ns_common *pidns_get(struct task_struct *task) return ns ? &ns->ns : NULL; } +static struct ns_common *pidns_for_children_get(struct task_struct *task) +{ + struct pid_namespace *ns = NULL; + + task_lock(task); + if (task->nsproxy) { + ns = task->nsproxy->pid_ns_for_children; + get_pid_ns(ns); + } + task_unlock(task); + + if (ns) { + read_lock(&tasklist_lock); + if (!ns->child_reaper) { + put_pid_ns(ns); + ns = NULL; + } + read_unlock(&tasklist_lock); + } + + return ns ? &ns->ns : NULL; +} + static void pidns_put(struct ns_common *ns) { put_pid_ns(to_pid_ns(ns)); @@ -443,6 +466,17 @@ const struct proc_ns_operations pidns_operations = { .get_parent = pidns_get_parent, }; +const struct proc_ns_operations pidns_for_children_operations = { + .name = "pid_for_children", + .real_ns_name = "pid", + .type = CLONE_NEWPID, + .get = pidns_for_children_get, + .put = pidns_put, + .install = pidns_install, + .owner = pidns_owner, + .get_parent = pidns_get_parent, +}; + static __init int pid_namespaces_init(void) { pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC); -- cgit v1.2.3 From a7c3e901a46ff54c016d040847eda598a9e3e653 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Mon, 8 May 2017 15:57:09 -0700 Subject: mm: introduce kv[mz]alloc helpers Patch series "kvmalloc", v5. There are many open coded kmalloc with vmalloc fallback instances in the tree. Most of them are not careful enough or simply do not care about the underlying semantic of the kmalloc/page allocator which means that a) some vmalloc fallbacks are basically unreachable because the kmalloc part will keep retrying until it succeeds b) the page allocator can invoke a really disruptive steps like the OOM killer to move forward which doesn't sound appropriate when we consider that the vmalloc fallback is available. As it can be seen implementing kvmalloc requires quite an intimate knowledge if the page allocator and the memory reclaim internals which strongly suggests that a helper should be implemented in the memory subsystem proper. Most callers, I could find, have been converted to use the helper instead. This is patch 6. There are some more relying on __GFP_REPEAT in the networking stack which I have converted as well and Eric Dumazet was not opposed [2] to convert them as well. [1] http://lkml.kernel.org/r/20170130094940.13546-1-mhocko@kernel.org [2] http://lkml.kernel.org/r/1485273626.16328.301.camel@edumazet-glaptop3.roam.corp.google.com This patch (of 9): Using kmalloc with the vmalloc fallback for larger allocations is a common pattern in the kernel code. Yet we do not have any common helper for that and so users have invented their own helpers. Some of them are really creative when doing so. Let's just add kv[mz]alloc and make sure it is implemented properly. This implementation makes sure to not make a large memory pressure for > PAGE_SZE requests (__GFP_NORETRY) and also to not warn about allocation failures. This also rules out the OOM killer as the vmalloc is a more approapriate fallback than a disruptive user visible action. This patch also changes some existing users and removes helpers which are specific for them. In some cases this is not possible (e.g. ext4_kvmalloc, libcfs_kvzalloc) because those seems to be broken and require GFP_NO{FS,IO} context which is not vmalloc compatible in general (note that the page table allocation is GFP_KERNEL). Those need to be fixed separately. While we are at it, document that __vmalloc{_node} about unsupported gfp mask because there seems to be a lot of confusion out there. kvmalloc_node will warn about GFP_KERNEL incompatible (which are not superset) flags to catch new abusers. Existing ones would have to die slowly. [sfr@canb.auug.org.au: f2fs fixup] Link: http://lkml.kernel.org/r/20170320163735.332e64b7@canb.auug.org.au Link: http://lkml.kernel.org/r/20170306103032.2540-2-mhocko@kernel.org Signed-off-by: Michal Hocko Signed-off-by: Stephen Rothwell Reviewed-by: Andreas Dilger [ext4 part] Acked-by: Vlastimil Babka Cc: John Hubbard Cc: David Miller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/kvm/lapic.c | 4 ++-- arch/x86/kvm/page_track.c | 4 ++-- arch/x86/kvm/x86.c | 4 ++-- drivers/md/dm-stats.c | 7 +----- fs/ext4/mballoc.c | 2 +- fs/ext4/super.c | 4 ++-- fs/f2fs/f2fs.h | 20 ----------------- fs/f2fs/file.c | 4 ++-- fs/f2fs/node.c | 6 +++--- fs/f2fs/segment.c | 14 ++++++------ fs/seq_file.c | 16 +------------- include/linux/kvm_host.h | 2 -- include/linux/mm.h | 14 ++++++++++++ include/linux/vmalloc.h | 1 + ipc/util.c | 7 +----- mm/nommu.c | 5 +++++ mm/util.c | 45 +++++++++++++++++++++++++++++++++++++++ mm/vmalloc.c | 9 +++++++- security/apparmor/apparmorfs.c | 2 +- security/apparmor/include/lib.h | 11 ---------- security/apparmor/lib.c | 30 -------------------------- security/apparmor/match.c | 2 +- security/apparmor/policy_unpack.c | 2 +- virt/kvm/kvm_main.c | 18 +++------------- 24 files changed, 103 insertions(+), 130 deletions(-) (limited to 'fs') diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index bad6a25067bc..d2a892fc92bf 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -177,8 +177,8 @@ static void recalculate_apic_map(struct kvm *kvm) if (kvm_apic_present(vcpu)) max_id = max(max_id, kvm_x2apic_id(vcpu->arch.apic)); - new = kvm_kvzalloc(sizeof(struct kvm_apic_map) + - sizeof(struct kvm_lapic *) * ((u64)max_id + 1)); + new = kvzalloc(sizeof(struct kvm_apic_map) + + sizeof(struct kvm_lapic *) * ((u64)max_id + 1), GFP_KERNEL); if (!new) goto out; diff --git a/arch/x86/kvm/page_track.c b/arch/x86/kvm/page_track.c index 60168cdd0546..ea67dc876316 100644 --- a/arch/x86/kvm/page_track.c +++ b/arch/x86/kvm/page_track.c @@ -40,8 +40,8 @@ int kvm_page_track_create_memslot(struct kvm_memory_slot *slot, int i; for (i = 0; i < KVM_PAGE_TRACK_MAX; i++) { - slot->arch.gfn_track[i] = kvm_kvzalloc(npages * - sizeof(*slot->arch.gfn_track[i])); + slot->arch.gfn_track[i] = kvzalloc(npages * + sizeof(*slot->arch.gfn_track[i]), GFP_KERNEL); if (!slot->arch.gfn_track[i]) goto track_free; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ccbd45ecd41a..ee22226e3807 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8199,13 +8199,13 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, slot->base_gfn, level) + 1; slot->arch.rmap[i] = - kvm_kvzalloc(lpages * sizeof(*slot->arch.rmap[i])); + kvzalloc(lpages * sizeof(*slot->arch.rmap[i]), GFP_KERNEL); if (!slot->arch.rmap[i]) goto out_free; if (i == 0) continue; - linfo = kvm_kvzalloc(lpages * sizeof(*linfo)); + linfo = kvzalloc(lpages * sizeof(*linfo), GFP_KERNEL); if (!linfo) goto out_free; diff --git a/drivers/md/dm-stats.c b/drivers/md/dm-stats.c index 0250e7e521ab..6028d8247f58 100644 --- a/drivers/md/dm-stats.c +++ b/drivers/md/dm-stats.c @@ -146,12 +146,7 @@ static void *dm_kvzalloc(size_t alloc_size, int node) if (!claim_shared_memory(alloc_size)) return NULL; - if (alloc_size <= KMALLOC_MAX_SIZE) { - p = kzalloc_node(alloc_size, GFP_KERNEL | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN, node); - if (p) - return p; - } - p = vzalloc_node(alloc_size, node); + p = kvzalloc_node(alloc_size, GFP_KERNEL | __GFP_NOMEMALLOC, node); if (p) return p; diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 354dc1a894c2..b60698c104fd 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2393,7 +2393,7 @@ int ext4_mb_alloc_groupinfo(struct super_block *sb, ext4_group_t ngroups) return 0; size = roundup_pow_of_two(sizeof(*sbi->s_group_info) * size); - new_groupinfo = ext4_kvzalloc(size, GFP_KERNEL); + new_groupinfo = kvzalloc(size, GFP_KERNEL); if (!new_groupinfo) { ext4_msg(sb, KERN_ERR, "can't allocate buddy meta group"); return -ENOMEM; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index a9c72e39a4ee..b2c74644d5de 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2153,7 +2153,7 @@ int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup) return 0; size = roundup_pow_of_two(size * sizeof(struct flex_groups)); - new_groups = ext4_kvzalloc(size, GFP_KERNEL); + new_groups = kvzalloc(size, GFP_KERNEL); if (!new_groups) { ext4_msg(sb, KERN_ERR, "not enough memory for %d flex groups", size / (int) sizeof(struct flex_groups)); @@ -3887,7 +3887,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) goto failed_mount; } } - sbi->s_group_desc = ext4_kvmalloc(db_count * + sbi->s_group_desc = kvmalloc(db_count * sizeof(struct buffer_head *), GFP_KERNEL); if (sbi->s_group_desc == NULL) { diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 0a6e115562f6..1fc17a1fc5d0 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2005,26 +2005,6 @@ static inline void *f2fs_kmalloc(struct f2fs_sb_info *sbi, return kmalloc(size, flags); } -static inline void *f2fs_kvmalloc(size_t size, gfp_t flags) -{ - void *ret; - - ret = kmalloc(size, flags | __GFP_NOWARN); - if (!ret) - ret = __vmalloc(size, flags, PAGE_KERNEL); - return ret; -} - -static inline void *f2fs_kvzalloc(size_t size, gfp_t flags) -{ - void *ret; - - ret = kzalloc(size, flags | __GFP_NOWARN); - if (!ret) - ret = __vmalloc(size, flags | __GFP_ZERO, PAGE_KERNEL); - return ret; -} - #define get_inode_mode(i) \ ((is_inode_flag_set(i, FI_ACL_MODE)) ? \ (F2FS_I(i)->i_acl_mode) : ((i)->i_mode)) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 5f7317875a67..0849af78381f 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1012,11 +1012,11 @@ static int __exchange_data_block(struct inode *src_inode, while (len) { olen = min((pgoff_t)4 * ADDRS_PER_BLOCK, len); - src_blkaddr = f2fs_kvzalloc(sizeof(block_t) * olen, GFP_KERNEL); + src_blkaddr = kvzalloc(sizeof(block_t) * olen, GFP_KERNEL); if (!src_blkaddr) return -ENOMEM; - do_replace = f2fs_kvzalloc(sizeof(int) * olen, GFP_KERNEL); + do_replace = kvzalloc(sizeof(int) * olen, GFP_KERNEL); if (!do_replace) { kvfree(src_blkaddr); return -ENOMEM; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 481aa8dc79f4..0ea1dca8a0e2 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -2621,17 +2621,17 @@ static int init_free_nid_cache(struct f2fs_sb_info *sbi) { struct f2fs_nm_info *nm_i = NM_I(sbi); - nm_i->free_nid_bitmap = f2fs_kvzalloc(nm_i->nat_blocks * + nm_i->free_nid_bitmap = kvzalloc(nm_i->nat_blocks * NAT_ENTRY_BITMAP_SIZE, GFP_KERNEL); if (!nm_i->free_nid_bitmap) return -ENOMEM; - nm_i->nat_block_bitmap = f2fs_kvzalloc(nm_i->nat_blocks / 8, + nm_i->nat_block_bitmap = kvzalloc(nm_i->nat_blocks / 8, GFP_KERNEL); if (!nm_i->nat_block_bitmap) return -ENOMEM; - nm_i->free_nid_count = f2fs_kvzalloc(nm_i->nat_blocks * + nm_i->free_nid_count = kvzalloc(nm_i->nat_blocks * sizeof(unsigned short), GFP_KERNEL); if (!nm_i->free_nid_count) return -ENOMEM; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 29ef7088c558..13806f642ab5 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2501,13 +2501,13 @@ static int build_sit_info(struct f2fs_sb_info *sbi) SM_I(sbi)->sit_info = sit_i; - sit_i->sentries = f2fs_kvzalloc(MAIN_SEGS(sbi) * + sit_i->sentries = kvzalloc(MAIN_SEGS(sbi) * sizeof(struct seg_entry), GFP_KERNEL); if (!sit_i->sentries) return -ENOMEM; bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi)); - sit_i->dirty_sentries_bitmap = f2fs_kvzalloc(bitmap_size, GFP_KERNEL); + sit_i->dirty_sentries_bitmap = kvzalloc(bitmap_size, GFP_KERNEL); if (!sit_i->dirty_sentries_bitmap) return -ENOMEM; @@ -2540,7 +2540,7 @@ static int build_sit_info(struct f2fs_sb_info *sbi) return -ENOMEM; if (sbi->segs_per_sec > 1) { - sit_i->sec_entries = f2fs_kvzalloc(MAIN_SECS(sbi) * + sit_i->sec_entries = kvzalloc(MAIN_SECS(sbi) * sizeof(struct sec_entry), GFP_KERNEL); if (!sit_i->sec_entries) return -ENOMEM; @@ -2591,12 +2591,12 @@ static int build_free_segmap(struct f2fs_sb_info *sbi) SM_I(sbi)->free_info = free_i; bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi)); - free_i->free_segmap = f2fs_kvmalloc(bitmap_size, GFP_KERNEL); + free_i->free_segmap = kvmalloc(bitmap_size, GFP_KERNEL); if (!free_i->free_segmap) return -ENOMEM; sec_bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi)); - free_i->free_secmap = f2fs_kvmalloc(sec_bitmap_size, GFP_KERNEL); + free_i->free_secmap = kvmalloc(sec_bitmap_size, GFP_KERNEL); if (!free_i->free_secmap) return -ENOMEM; @@ -2764,7 +2764,7 @@ static int init_victim_secmap(struct f2fs_sb_info *sbi) struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); unsigned int bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi)); - dirty_i->victim_secmap = f2fs_kvzalloc(bitmap_size, GFP_KERNEL); + dirty_i->victim_secmap = kvzalloc(bitmap_size, GFP_KERNEL); if (!dirty_i->victim_secmap) return -ENOMEM; return 0; @@ -2786,7 +2786,7 @@ static int build_dirty_segmap(struct f2fs_sb_info *sbi) bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi)); for (i = 0; i < NR_DIRTY_TYPE; i++) { - dirty_i->dirty_segmap[i] = f2fs_kvzalloc(bitmap_size, GFP_KERNEL); + dirty_i->dirty_segmap[i] = kvzalloc(bitmap_size, GFP_KERNEL); if (!dirty_i->dirty_segmap[i]) return -ENOMEM; } diff --git a/fs/seq_file.c b/fs/seq_file.c index ca69fb99e41a..dc7c2be963ed 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -25,21 +25,7 @@ static void seq_set_overflow(struct seq_file *m) static void *seq_buf_alloc(unsigned long size) { - void *buf; - gfp_t gfp = GFP_KERNEL; - - /* - * For high order allocations, use __GFP_NORETRY to avoid oom-killing - - * it's better to fall back to vmalloc() than to kill things. For small - * allocations, just use GFP_KERNEL which will oom kill, thus no need - * for vmalloc fallback. - */ - if (size > PAGE_SIZE) - gfp |= __GFP_NORETRY | __GFP_NOWARN; - buf = kmalloc(size, gfp); - if (!buf && size > PAGE_SIZE) - buf = vmalloc(size); - return buf; + return kvmalloc(size, GFP_KERNEL); } /** diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index d0250744507a..5d9b2a08e553 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -767,8 +767,6 @@ void kvm_arch_check_processor_compat(void *rtn); int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu); int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu); -void *kvm_kvzalloc(unsigned long size); - #ifndef __KVM_HAVE_ARCH_VM_ALLOC static inline struct kvm *kvm_arch_alloc_vm(void) { diff --git a/include/linux/mm.h b/include/linux/mm.h index 5d22e69f51ea..08e2849d27ca 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -518,6 +518,20 @@ static inline int is_vmalloc_or_module_addr(const void *x) } #endif +extern void *kvmalloc_node(size_t size, gfp_t flags, int node); +static inline void *kvmalloc(size_t size, gfp_t flags) +{ + return kvmalloc_node(size, flags, NUMA_NO_NODE); +} +static inline void *kvzalloc_node(size_t size, gfp_t flags, int node) +{ + return kvmalloc_node(size, flags | __GFP_ZERO, node); +} +static inline void *kvzalloc(size_t size, gfp_t flags) +{ + return kvmalloc(size, flags | __GFP_ZERO); +} + extern void kvfree(const void *addr); static inline atomic_t *compound_mapcount_ptr(struct page *page) diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index d68edffbf142..46991ad3ddd5 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -80,6 +80,7 @@ extern void *__vmalloc_node_range(unsigned long size, unsigned long align, unsigned long start, unsigned long end, gfp_t gfp_mask, pgprot_t prot, unsigned long vm_flags, int node, const void *caller); +extern void *__vmalloc_node_flags(unsigned long size, int node, gfp_t flags); extern void vfree(const void *addr); extern void vfree_atomic(const void *addr); diff --git a/ipc/util.c b/ipc/util.c index 3459a16a9df9..caec7b1bfaa3 100644 --- a/ipc/util.c +++ b/ipc/util.c @@ -403,12 +403,7 @@ void ipc_rmid(struct ipc_ids *ids, struct kern_ipc_perm *ipcp) */ void *ipc_alloc(int size) { - void *out; - if (size > PAGE_SIZE) - out = vmalloc(size); - else - out = kmalloc(size, GFP_KERNEL); - return out; + return kvmalloc(size, GFP_KERNEL); } /** diff --git a/mm/nommu.c b/mm/nommu.c index 2d131b97a851..a80411d258fc 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -237,6 +237,11 @@ void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot) } EXPORT_SYMBOL(__vmalloc); +void *__vmalloc_node_flags(unsigned long size, int node, gfp_t flags) +{ + return __vmalloc(size, flags, PAGE_KERNEL); +} + void *vmalloc_user(unsigned long size) { void *ret; diff --git a/mm/util.c b/mm/util.c index 656dc5e37a87..10a14a0ac3c2 100644 --- a/mm/util.c +++ b/mm/util.c @@ -329,6 +329,51 @@ unsigned long vm_mmap(struct file *file, unsigned long addr, } EXPORT_SYMBOL(vm_mmap); +/** + * kvmalloc_node - attempt to allocate physically contiguous memory, but upon + * failure, fall back to non-contiguous (vmalloc) allocation. + * @size: size of the request. + * @flags: gfp mask for the allocation - must be compatible (superset) with GFP_KERNEL. + * @node: numa node to allocate from + * + * Uses kmalloc to get the memory but if the allocation fails then falls back + * to the vmalloc allocator. Use kvfree for freeing the memory. + * + * Reclaim modifiers - __GFP_NORETRY, __GFP_REPEAT and __GFP_NOFAIL are not supported + * + * Any use of gfp flags outside of GFP_KERNEL should be consulted with mm people. + */ +void *kvmalloc_node(size_t size, gfp_t flags, int node) +{ + gfp_t kmalloc_flags = flags; + void *ret; + + /* + * vmalloc uses GFP_KERNEL for some internal allocations (e.g page tables) + * so the given set of flags has to be compatible. + */ + WARN_ON_ONCE((flags & GFP_KERNEL) != GFP_KERNEL); + + /* + * Make sure that larger requests are not too disruptive - no OOM + * killer and no allocation failure warnings as we have a fallback + */ + if (size > PAGE_SIZE) + kmalloc_flags |= __GFP_NORETRY | __GFP_NOWARN; + + ret = kmalloc_node(size, kmalloc_flags, node); + + /* + * It doesn't really make sense to fallback to vmalloc for sub page + * requests + */ + if (ret || size <= PAGE_SIZE) + return ret; + + return __vmalloc_node_flags(size, node, flags | __GFP_HIGHMEM); +} +EXPORT_SYMBOL(kvmalloc_node); + void kvfree(const void *addr) { if (is_vmalloc_addr(addr)) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index b52aeed3f58e..33603239560e 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1786,6 +1786,13 @@ fail: * Allocate enough pages to cover @size from the page level * allocator with @gfp_mask flags. Map them into contiguous * kernel virtual space, using a pagetable protection of @prot. + * + * Reclaim modifiers in @gfp_mask - __GFP_NORETRY, __GFP_REPEAT + * and __GFP_NOFAIL are not supported + * + * Any use of gfp flags outside of GFP_KERNEL should be consulted + * with mm people. + * */ static void *__vmalloc_node(unsigned long size, unsigned long align, gfp_t gfp_mask, pgprot_t prot, @@ -1802,7 +1809,7 @@ void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot) } EXPORT_SYMBOL(__vmalloc); -static inline void *__vmalloc_node_flags(unsigned long size, +void *__vmalloc_node_flags(unsigned long size, int node, gfp_t flags) { return __vmalloc_node(size, 1, flags, PAGE_KERNEL, diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c index 41073f70eb41..be0b49897a67 100644 --- a/security/apparmor/apparmorfs.c +++ b/security/apparmor/apparmorfs.c @@ -98,7 +98,7 @@ static struct aa_loaddata *aa_simple_write_to_buffer(const char __user *userbuf, return ERR_PTR(-ESPIPE); /* freed by caller to simple_write_to_buffer */ - data = kvmalloc(sizeof(*data) + alloc_size); + data = kvmalloc(sizeof(*data) + alloc_size, GFP_KERNEL); if (data == NULL) return ERR_PTR(-ENOMEM); kref_init(&data->count); diff --git a/security/apparmor/include/lib.h b/security/apparmor/include/lib.h index 0291ff3902f9..550a700563b4 100644 --- a/security/apparmor/include/lib.h +++ b/security/apparmor/include/lib.h @@ -64,17 +64,6 @@ char *aa_split_fqname(char *args, char **ns_name); const char *aa_splitn_fqname(const char *fqname, size_t n, const char **ns_name, size_t *ns_len); void aa_info_message(const char *str); -void *__aa_kvmalloc(size_t size, gfp_t flags); - -static inline void *kvmalloc(size_t size) -{ - return __aa_kvmalloc(size, 0); -} - -static inline void *kvzalloc(size_t size) -{ - return __aa_kvmalloc(size, __GFP_ZERO); -} /** * aa_strneq - compare null terminated @str to a non null terminated substring diff --git a/security/apparmor/lib.c b/security/apparmor/lib.c index 32cafc12593e..7cd788a9445b 100644 --- a/security/apparmor/lib.c +++ b/security/apparmor/lib.c @@ -128,36 +128,6 @@ void aa_info_message(const char *str) printk(KERN_INFO "AppArmor: %s\n", str); } -/** - * __aa_kvmalloc - do allocation preferring kmalloc but falling back to vmalloc - * @size: how many bytes of memory are required - * @flags: the type of memory to allocate (see kmalloc). - * - * Return: allocated buffer or NULL if failed - * - * It is possible that policy being loaded from the user is larger than - * what can be allocated by kmalloc, in those cases fall back to vmalloc. - */ -void *__aa_kvmalloc(size_t size, gfp_t flags) -{ - void *buffer = NULL; - - if (size == 0) - return NULL; - - /* do not attempt kmalloc if we need more than 16 pages at once */ - if (size <= (16*PAGE_SIZE)) - buffer = kmalloc(size, flags | GFP_KERNEL | __GFP_NORETRY | - __GFP_NOWARN); - if (!buffer) { - if (flags & __GFP_ZERO) - buffer = vzalloc(size); - else - buffer = vmalloc(size); - } - return buffer; -} - /** * aa_policy_init - initialize a policy structure * @policy: policy to initialize (NOT NULL) diff --git a/security/apparmor/match.c b/security/apparmor/match.c index eb0efef746f5..960c913381e2 100644 --- a/security/apparmor/match.c +++ b/security/apparmor/match.c @@ -88,7 +88,7 @@ static struct table_header *unpack_table(char *blob, size_t bsize) if (bsize < tsize) goto out; - table = kvzalloc(tsize); + table = kvzalloc(tsize, GFP_KERNEL); if (table) { table->td_id = th.td_id; table->td_flags = th.td_flags; diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c index 2e37c9c26bbd..f3422a91353c 100644 --- a/security/apparmor/policy_unpack.c +++ b/security/apparmor/policy_unpack.c @@ -487,7 +487,7 @@ fail: static void *kvmemdup(const void *src, size_t len) { - void *p = kvmalloc(len); + void *p = kvmalloc(len, GFP_KERNEL); if (p) memcpy(p, src, len); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 88257b311cb5..aca22d36be9c 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -504,7 +504,7 @@ static struct kvm_memslots *kvm_alloc_memslots(void) int i; struct kvm_memslots *slots; - slots = kvm_kvzalloc(sizeof(struct kvm_memslots)); + slots = kvzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); if (!slots) return NULL; @@ -689,18 +689,6 @@ out_err_no_disable: return ERR_PTR(r); } -/* - * Avoid using vmalloc for a small buffer. - * Should not be used when the size is statically known. - */ -void *kvm_kvzalloc(unsigned long size) -{ - if (size > PAGE_SIZE) - return vzalloc(size); - else - return kzalloc(size, GFP_KERNEL); -} - static void kvm_destroy_devices(struct kvm *kvm) { struct kvm_device *dev, *tmp; @@ -782,7 +770,7 @@ static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot) { unsigned long dirty_bytes = 2 * kvm_dirty_bitmap_bytes(memslot); - memslot->dirty_bitmap = kvm_kvzalloc(dirty_bytes); + memslot->dirty_bitmap = kvzalloc(dirty_bytes, GFP_KERNEL); if (!memslot->dirty_bitmap) return -ENOMEM; @@ -1008,7 +996,7 @@ int __kvm_set_memory_region(struct kvm *kvm, goto out_free; } - slots = kvm_kvzalloc(sizeof(struct kvm_memslots)); + slots = kvzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); if (!slots) goto out_free; memcpy(slots, __kvm_memslots(kvm, as_id), sizeof(struct kvm_memslots)); -- cgit v1.2.3 From 81be3dee96346fbe08c31be5ef74f03f6b63cf68 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Mon, 8 May 2017 15:57:24 -0700 Subject: fs/xattr.c: zero out memory copied to userspace in getxattr getxattr uses vmalloc to allocate memory if kzalloc fails. This is filled by vfs_getxattr and then copied to the userspace. vmalloc, however, doesn't zero out the memory so if the specific implementation of the xattr handler is sloppy we can theoretically expose a kernel memory. There is no real sign this is really the case but let's make sure this will not happen and use vzalloc instead. Fixes: 779302e67835 ("fs/xattr.c:getxattr(): improve handling of allocation failures") Link: http://lkml.kernel.org/r/20170306103327.2766-1-mhocko@kernel.org Acked-by: Kees Cook Reported-by: Vlastimil Babka Signed-off-by: Michal Hocko Cc: [3.6+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/xattr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/xattr.c b/fs/xattr.c index 7e3317cf4045..94f49a082dd2 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -530,7 +530,7 @@ getxattr(struct dentry *d, const char __user *name, void __user *value, size = XATTR_SIZE_MAX; kvalue = kzalloc(size, GFP_KERNEL | __GFP_NOWARN); if (!kvalue) { - kvalue = vmalloc(size); + kvalue = vzalloc(size); if (!kvalue) return -ENOMEM; } -- cgit v1.2.3 From 752ade68cbd81d0321dfecc188f655a945551b25 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Mon, 8 May 2017 15:57:27 -0700 Subject: treewide: use kv[mz]alloc* rather than opencoded variants There are many code paths opencoding kvmalloc. Let's use the helper instead. The main difference to kvmalloc is that those users are usually not considering all the aspects of the memory allocator. E.g. allocation requests <= 32kB (with 4kB pages) are basically never failing and invoke OOM killer to satisfy the allocation. This sounds too disruptive for something that has a reasonable fallback - the vmalloc. On the other hand those requests might fallback to vmalloc even when the memory allocator would succeed after several more reclaim/compaction attempts previously. There is no guarantee something like that happens though. This patch converts many of those places to kv[mz]alloc* helpers because they are more conservative. Link: http://lkml.kernel.org/r/20170306103327.2766-2-mhocko@kernel.org Signed-off-by: Michal Hocko Reviewed-by: Boris Ostrovsky # Xen bits Acked-by: Kees Cook Acked-by: Vlastimil Babka Acked-by: Andreas Dilger # Lustre Acked-by: Christian Borntraeger # KVM/s390 Acked-by: Dan Williams # nvdim Acked-by: David Sterba # btrfs Acked-by: Ilya Dryomov # Ceph Acked-by: Tariq Toukan # mlx4 Acked-by: Leon Romanovsky # mlx5 Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Herbert Xu Cc: Anton Vorontsov Cc: Colin Cross Cc: Tony Luck Cc: "Rafael J. Wysocki" Cc: Ben Skeggs Cc: Kent Overstreet Cc: Santosh Raspatur Cc: Hariprasad S Cc: Yishai Hadas Cc: Oleg Drokin Cc: "Yan, Zheng" Cc: Alexander Viro Cc: Alexei Starovoitov Cc: Eric Dumazet Cc: David Miller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/s390/kvm/kvm-s390.c | 10 ++----- crypto/lzo.c | 4 +-- drivers/acpi/apei/erst.c | 8 ++---- drivers/char/agp/generic.c | 8 +----- drivers/gpu/drm/nouveau/nouveau_gem.c | 4 +-- drivers/md/bcache/util.h | 12 ++------ drivers/net/ethernet/chelsio/cxgb3/cxgb3_defs.h | 3 -- drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c | 29 +++---------------- drivers/net/ethernet/chelsio/cxgb3/l2t.c | 8 +----- drivers/net/ethernet/chelsio/cxgb3/l2t.h | 1 - drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c | 12 ++++---- drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 3 -- drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c | 10 +++---- drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c | 8 +++--- drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 31 ++++---------------- drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c | 14 ++++----- drivers/net/ethernet/chelsio/cxgb4/l2t.c | 2 +- drivers/net/ethernet/chelsio/cxgb4/sched.c | 12 ++++---- drivers/net/ethernet/mellanox/mlx4/en_tx.c | 9 ++---- drivers/net/ethernet/mellanox/mlx4/mr.c | 9 ++---- drivers/nvdimm/dimm_devs.c | 5 +--- .../staging/lustre/lnet/libcfs/linux/linux-mem.c | 11 +------- drivers/xen/evtchn.c | 14 +-------- fs/btrfs/ctree.c | 9 ++---- fs/btrfs/ioctl.c | 9 ++---- fs/btrfs/send.c | 27 ++++++------------ fs/ceph/file.c | 9 ++---- fs/select.c | 5 +--- fs/xattr.c | 27 ++++++------------ include/linux/mlx5/driver.h | 7 +---- include/linux/mm.h | 8 ++++++ lib/iov_iter.c | 5 +--- mm/frame_vector.c | 5 +--- net/ipv4/inet_hashtables.c | 6 +--- net/ipv4/tcp_metrics.c | 5 +--- net/mpls/af_mpls.c | 5 +--- net/netfilter/x_tables.c | 21 +++----------- net/netfilter/xt_recent.c | 5 +--- net/sched/sch_choke.c | 5 +--- net/sched/sch_fq_codel.c | 26 ++++------------- net/sched/sch_hhf.c | 33 ++++++---------------- net/sched/sch_netem.c | 6 +--- net/sched/sch_sfq.c | 6 +--- security/keys/keyctl.c | 22 ++++----------- 44 files changed, 128 insertions(+), 350 deletions(-) (limited to 'fs') diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index d5c5c911821a..323297e55e80 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1166,10 +1166,7 @@ static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX) return -EINVAL; - keys = kmalloc_array(args->count, sizeof(uint8_t), - GFP_KERNEL | __GFP_NOWARN); - if (!keys) - keys = vmalloc(sizeof(uint8_t) * args->count); + keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL); if (!keys) return -ENOMEM; @@ -1211,10 +1208,7 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX) return -EINVAL; - keys = kmalloc_array(args->count, sizeof(uint8_t), - GFP_KERNEL | __GFP_NOWARN); - if (!keys) - keys = vmalloc(sizeof(uint8_t) * args->count); + keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL); if (!keys) return -ENOMEM; diff --git a/crypto/lzo.c b/crypto/lzo.c index 168df784da84..218567d717d6 100644 --- a/crypto/lzo.c +++ b/crypto/lzo.c @@ -32,9 +32,7 @@ static void *lzo_alloc_ctx(struct crypto_scomp *tfm) { void *ctx; - ctx = kmalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL | __GFP_NOWARN); - if (!ctx) - ctx = vmalloc(LZO1X_MEM_COMPRESS); + ctx = kvmalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL); if (!ctx) return ERR_PTR(-ENOMEM); diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c index 7207e5fc9d3d..2c462beee551 100644 --- a/drivers/acpi/apei/erst.c +++ b/drivers/acpi/apei/erst.c @@ -513,7 +513,7 @@ retry: if (i < erst_record_id_cache.len) goto retry; if (erst_record_id_cache.len >= erst_record_id_cache.size) { - int new_size, alloc_size; + int new_size; u64 *new_entries; new_size = erst_record_id_cache.size * 2; @@ -524,11 +524,7 @@ retry: pr_warn(FW_WARN "too many record IDs!\n"); return 0; } - alloc_size = new_size * sizeof(entries[0]); - if (alloc_size < PAGE_SIZE) - new_entries = kmalloc(alloc_size, GFP_KERNEL); - else - new_entries = vmalloc(alloc_size); + new_entries = kvmalloc(new_size * sizeof(entries[0]), GFP_KERNEL); if (!new_entries) return -ENOMEM; memcpy(new_entries, entries, diff --git a/drivers/char/agp/generic.c b/drivers/char/agp/generic.c index f002fa5d1887..bdf418cac8ef 100644 --- a/drivers/char/agp/generic.c +++ b/drivers/char/agp/generic.c @@ -88,13 +88,7 @@ static int agp_get_key(void) void agp_alloc_page_array(size_t size, struct agp_memory *mem) { - mem->pages = NULL; - - if (size <= 2*PAGE_SIZE) - mem->pages = kmalloc(size, GFP_KERNEL | __GFP_NOWARN); - if (mem->pages == NULL) { - mem->pages = vmalloc(size); - } + mem->pages = kvmalloc(size, GFP_KERNEL); } EXPORT_SYMBOL(agp_alloc_page_array); diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c index ca5397beb357..2170534101ca 100644 --- a/drivers/gpu/drm/nouveau/nouveau_gem.c +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c @@ -568,9 +568,7 @@ u_memcpya(uint64_t user, unsigned nmemb, unsigned size) size *= nmemb; - mem = kmalloc(size, GFP_KERNEL | __GFP_NOWARN); - if (!mem) - mem = vmalloc(size); + mem = kvmalloc(size, GFP_KERNEL); if (!mem) return ERR_PTR(-ENOMEM); diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h index 5d13930f0f22..cb8d2ccbb6c6 100644 --- a/drivers/md/bcache/util.h +++ b/drivers/md/bcache/util.h @@ -43,11 +43,7 @@ struct closure; (heap)->used = 0; \ (heap)->size = (_size); \ _bytes = (heap)->size * sizeof(*(heap)->data); \ - (heap)->data = NULL; \ - if (_bytes < KMALLOC_MAX_SIZE) \ - (heap)->data = kmalloc(_bytes, (gfp)); \ - if ((!(heap)->data) && ((gfp) & GFP_KERNEL)) \ - (heap)->data = vmalloc(_bytes); \ + (heap)->data = kvmalloc(_bytes, (gfp) & GFP_KERNEL); \ (heap)->data; \ }) @@ -136,12 +132,8 @@ do { \ \ (fifo)->mask = _allocated_size - 1; \ (fifo)->front = (fifo)->back = 0; \ - (fifo)->data = NULL; \ \ - if (_bytes < KMALLOC_MAX_SIZE) \ - (fifo)->data = kmalloc(_bytes, (gfp)); \ - if ((!(fifo)->data) && ((gfp) & GFP_KERNEL)) \ - (fifo)->data = vmalloc(_bytes); \ + (fifo)->data = kvmalloc(_bytes, (gfp) & GFP_KERNEL); \ (fifo)->data; \ }) diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_defs.h b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_defs.h index 920d918ed193..f04e81f33795 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_defs.h +++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_defs.h @@ -41,9 +41,6 @@ #define VALIDATE_TID 1 -void *cxgb_alloc_mem(unsigned long size); -void cxgb_free_mem(void *addr); - /* * Map an ATID or STID to their entries in the corresponding TID tables. */ diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c index 76684dcb874c..fa81445e334c 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c +++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c @@ -1151,27 +1151,6 @@ static void cxgb_redirect(struct dst_entry *old, struct dst_entry *new, l2t_release(tdev, e); } -/* - * Allocate a chunk of memory using kmalloc or, if that fails, vmalloc. - * The allocated memory is cleared. - */ -void *cxgb_alloc_mem(unsigned long size) -{ - void *p = kzalloc(size, GFP_KERNEL | __GFP_NOWARN); - - if (!p) - p = vzalloc(size); - return p; -} - -/* - * Free memory allocated through t3_alloc_mem(). - */ -void cxgb_free_mem(void *addr) -{ - kvfree(addr); -} - /* * Allocate and initialize the TID tables. Returns 0 on success. */ @@ -1182,7 +1161,7 @@ static int init_tid_tabs(struct tid_info *t, unsigned int ntids, unsigned long size = ntids * sizeof(*t->tid_tab) + natids * sizeof(*t->atid_tab) + nstids * sizeof(*t->stid_tab); - t->tid_tab = cxgb_alloc_mem(size); + t->tid_tab = kvzalloc(size, GFP_KERNEL); if (!t->tid_tab) return -ENOMEM; @@ -1218,7 +1197,7 @@ static int init_tid_tabs(struct tid_info *t, unsigned int ntids, static void free_tid_maps(struct tid_info *t) { - cxgb_free_mem(t->tid_tab); + kvfree(t->tid_tab); } static inline void add_adapter(struct adapter *adap) @@ -1293,7 +1272,7 @@ int cxgb3_offload_activate(struct adapter *adapter) return 0; out_free_l2t: - t3_free_l2t(l2td); + kvfree(l2td); out_free: kfree(t); return err; @@ -1302,7 +1281,7 @@ out_free: static void clean_l2_data(struct rcu_head *head) { struct l2t_data *d = container_of(head, struct l2t_data, rcu_head); - t3_free_l2t(d); + kvfree(d); } diff --git a/drivers/net/ethernet/chelsio/cxgb3/l2t.c b/drivers/net/ethernet/chelsio/cxgb3/l2t.c index 52063587e1e9..26264125865f 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/l2t.c +++ b/drivers/net/ethernet/chelsio/cxgb3/l2t.c @@ -444,7 +444,7 @@ struct l2t_data *t3_init_l2t(unsigned int l2t_capacity) struct l2t_data *d; int i, size = sizeof(*d) + l2t_capacity * sizeof(struct l2t_entry); - d = cxgb_alloc_mem(size); + d = kvzalloc(size, GFP_KERNEL); if (!d) return NULL; @@ -462,9 +462,3 @@ struct l2t_data *t3_init_l2t(unsigned int l2t_capacity) } return d; } - -void t3_free_l2t(struct l2t_data *d) -{ - cxgb_free_mem(d); -} - diff --git a/drivers/net/ethernet/chelsio/cxgb3/l2t.h b/drivers/net/ethernet/chelsio/cxgb3/l2t.h index 8cffcdfd5678..c2fd323c4078 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/l2t.h +++ b/drivers/net/ethernet/chelsio/cxgb3/l2t.h @@ -115,7 +115,6 @@ int t3_l2t_send_slow(struct t3cdev *dev, struct sk_buff *skb, struct l2t_entry *e); void t3_l2t_send_event(struct t3cdev *dev, struct l2t_entry *e); struct l2t_data *t3_init_l2t(unsigned int l2t_capacity); -void t3_free_l2t(struct l2t_data *d); int cxgb3_ofld_send(struct t3cdev *dev, struct sk_buff *skb); diff --git a/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c b/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c index 7ad43af6bde1..3103ef9b561d 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c +++ b/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c @@ -290,8 +290,8 @@ struct clip_tbl *t4_init_clip_tbl(unsigned int clipt_start, if (clipt_size < CLIPT_MIN_HASH_BUCKETS) return NULL; - ctbl = t4_alloc_mem(sizeof(*ctbl) + - clipt_size*sizeof(struct list_head)); + ctbl = kvzalloc(sizeof(*ctbl) + + clipt_size*sizeof(struct list_head), GFP_KERNEL); if (!ctbl) return NULL; @@ -305,9 +305,9 @@ struct clip_tbl *t4_init_clip_tbl(unsigned int clipt_start, for (i = 0; i < ctbl->clipt_size; ++i) INIT_LIST_HEAD(&ctbl->hash_list[i]); - cl_list = t4_alloc_mem(clipt_size*sizeof(struct clip_entry)); + cl_list = kvzalloc(clipt_size*sizeof(struct clip_entry), GFP_KERNEL); if (!cl_list) { - t4_free_mem(ctbl); + kvfree(ctbl); return NULL; } ctbl->cl_list = (void *)cl_list; @@ -326,8 +326,8 @@ void t4_cleanup_clip_tbl(struct adapter *adap) if (ctbl) { if (ctbl->cl_list) - t4_free_mem(ctbl->cl_list); - t4_free_mem(ctbl); + kvfree(ctbl->cl_list); + kvfree(ctbl); } } EXPORT_SYMBOL(t4_cleanup_clip_tbl); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index 163543b1ea0b..1d2be2dd19dd 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -1184,8 +1184,6 @@ extern const char cxgb4_driver_version[]; void t4_os_portmod_changed(const struct adapter *adap, int port_id); void t4_os_link_changed(struct adapter *adap, int port_id, int link_stat); -void *t4_alloc_mem(size_t size); - void t4_free_sge_resources(struct adapter *adap); void t4_free_ofld_rxqs(struct adapter *adap, int n, struct sge_ofld_rxq *q); irq_handler_t t4_intr_handler(struct adapter *adap); @@ -1557,7 +1555,6 @@ int t4_sched_params(struct adapter *adapter, int type, int level, int mode, int rateunit, int ratemode, int channel, int class, int minrate, int maxrate, int weight, int pktsize); void t4_sge_decode_idma_state(struct adapter *adapter, int state); -void t4_free_mem(void *addr); void t4_idma_monitor_init(struct adapter *adapter, struct sge_idma_monitor_state *idma); void t4_idma_monitor(struct adapter *adapter, diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c index f6e739da7bb7..1fa34b009891 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c @@ -2634,7 +2634,7 @@ static ssize_t mem_read(struct file *file, char __user *buf, size_t count, if (count > avail - pos) count = avail - pos; - data = t4_alloc_mem(count); + data = kvzalloc(count, GFP_KERNEL); if (!data) return -ENOMEM; @@ -2642,12 +2642,12 @@ static ssize_t mem_read(struct file *file, char __user *buf, size_t count, ret = t4_memory_rw(adap, 0, mem, pos, count, data, T4_MEMORY_READ); spin_unlock(&adap->win0_lock); if (ret) { - t4_free_mem(data); + kvfree(data); return ret; } ret = copy_to_user(buf, data, count); - t4_free_mem(data); + kvfree(data); if (ret) return -EFAULT; @@ -2753,7 +2753,7 @@ static ssize_t blocked_fl_read(struct file *filp, char __user *ubuf, adap->sge.egr_sz, adap->sge.blocked_fl); len += sprintf(buf + len, "\n"); size = simple_read_from_buffer(ubuf, count, ppos, buf, len); - t4_free_mem(buf); + kvfree(buf); return size; } @@ -2773,7 +2773,7 @@ static ssize_t blocked_fl_write(struct file *filp, const char __user *ubuf, return err; bitmap_copy(adap->sge.blocked_fl, t, adap->sge.egr_sz); - t4_free_mem(t); + kvfree(t); return count; } diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c index 02f80febeb91..0ba7866c8259 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c @@ -969,7 +969,7 @@ static int get_eeprom(struct net_device *dev, struct ethtool_eeprom *e, { int i, err = 0; struct adapter *adapter = netdev2adap(dev); - u8 *buf = t4_alloc_mem(EEPROMSIZE); + u8 *buf = kvzalloc(EEPROMSIZE, GFP_KERNEL); if (!buf) return -ENOMEM; @@ -980,7 +980,7 @@ static int get_eeprom(struct net_device *dev, struct ethtool_eeprom *e, if (!err) memcpy(data, buf + e->offset, e->len); - t4_free_mem(buf); + kvfree(buf); return err; } @@ -1009,7 +1009,7 @@ static int set_eeprom(struct net_device *dev, struct ethtool_eeprom *eeprom, if (aligned_offset != eeprom->offset || aligned_len != eeprom->len) { /* RMW possibly needed for first or last words. */ - buf = t4_alloc_mem(aligned_len); + buf = kvzalloc(aligned_len, GFP_KERNEL); if (!buf) return -ENOMEM; err = eeprom_rd_phys(adapter, aligned_offset, (u32 *)buf); @@ -1037,7 +1037,7 @@ static int set_eeprom(struct net_device *dev, struct ethtool_eeprom *eeprom, err = t4_seeprom_wp(adapter, true); out: if (buf != data) - t4_free_mem(buf); + kvfree(buf); return err; } diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index c12c4a3b82b5..38a5c6764bb5 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -880,27 +880,6 @@ freeout: return err; } -/* - * Allocate a chunk of memory using kmalloc or, if that fails, vmalloc. - * The allocated memory is cleared. - */ -void *t4_alloc_mem(size_t size) -{ - void *p = kzalloc(size, GFP_KERNEL | __GFP_NOWARN); - - if (!p) - p = vzalloc(size); - return p; -} - -/* - * Free memory allocated through alloc_mem(). - */ -void t4_free_mem(void *addr) -{ - kvfree(addr); -} - static u16 cxgb_select_queue(struct net_device *dev, struct sk_buff *skb, void *accel_priv, select_queue_fallback_t fallback) { @@ -1299,7 +1278,7 @@ static int tid_init(struct tid_info *t) max_ftids * sizeof(*t->ftid_tab) + ftid_bmap_size * sizeof(long); - t->tid_tab = t4_alloc_mem(size); + t->tid_tab = kvzalloc(size, GFP_KERNEL); if (!t->tid_tab) return -ENOMEM; @@ -3445,7 +3424,7 @@ static int adap_init0(struct adapter *adap) /* allocate memory to read the header of the firmware on the * card */ - card_fw = t4_alloc_mem(sizeof(*card_fw)); + card_fw = kvzalloc(sizeof(*card_fw), GFP_KERNEL); /* Get FW from from /lib/firmware/ */ ret = request_firmware(&fw, fw_info->fw_mod_name, @@ -3465,7 +3444,7 @@ static int adap_init0(struct adapter *adap) /* Cleaning up */ release_firmware(fw); - t4_free_mem(card_fw); + kvfree(card_fw); if (ret < 0) goto bye; @@ -4470,9 +4449,9 @@ static void free_some_resources(struct adapter *adapter) { unsigned int i; - t4_free_mem(adapter->l2t); + kvfree(adapter->l2t); t4_cleanup_sched(adapter); - t4_free_mem(adapter->tids.tid_tab); + kvfree(adapter->tids.tid_tab); cxgb4_cleanup_tc_u32(adapter); kfree(adapter->sge.egr_map); kfree(adapter->sge.ingr_map); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c index a1b19422b339..ef06ce8247ab 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c @@ -432,9 +432,9 @@ void cxgb4_cleanup_tc_u32(struct adapter *adap) for (i = 0; i < t->size; i++) { struct cxgb4_link *link = &t->table[i]; - t4_free_mem(link->tid_map); + kvfree(link->tid_map); } - t4_free_mem(adap->tc_u32); + kvfree(adap->tc_u32); } struct cxgb4_tc_u32_table *cxgb4_init_tc_u32(struct adapter *adap) @@ -446,8 +446,8 @@ struct cxgb4_tc_u32_table *cxgb4_init_tc_u32(struct adapter *adap) if (!max_tids) return NULL; - t = t4_alloc_mem(sizeof(*t) + - (max_tids * sizeof(struct cxgb4_link))); + t = kvzalloc(sizeof(*t) + + (max_tids * sizeof(struct cxgb4_link)), GFP_KERNEL); if (!t) return NULL; @@ -458,7 +458,7 @@ struct cxgb4_tc_u32_table *cxgb4_init_tc_u32(struct adapter *adap) unsigned int bmap_size; bmap_size = BITS_TO_LONGS(max_tids); - link->tid_map = t4_alloc_mem(sizeof(unsigned long) * bmap_size); + link->tid_map = kvzalloc(sizeof(unsigned long) * bmap_size, GFP_KERNEL); if (!link->tid_map) goto out_no_mem; bitmap_zero(link->tid_map, max_tids); @@ -471,11 +471,11 @@ out_no_mem: struct cxgb4_link *link = &t->table[i]; if (link->tid_map) - t4_free_mem(link->tid_map); + kvfree(link->tid_map); } if (t) - t4_free_mem(t); + kvfree(t); return NULL; } diff --git a/drivers/net/ethernet/chelsio/cxgb4/l2t.c b/drivers/net/ethernet/chelsio/cxgb4/l2t.c index 7c8c5b9a3c22..6f3692db29af 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/l2t.c +++ b/drivers/net/ethernet/chelsio/cxgb4/l2t.c @@ -646,7 +646,7 @@ struct l2t_data *t4_init_l2t(unsigned int l2t_start, unsigned int l2t_end) if (l2t_size < L2T_MIN_HASH_BUCKETS) return NULL; - d = t4_alloc_mem(sizeof(*d) + l2t_size * sizeof(struct l2t_entry)); + d = kvzalloc(sizeof(*d) + l2t_size * sizeof(struct l2t_entry), GFP_KERNEL); if (!d) return NULL; diff --git a/drivers/net/ethernet/chelsio/cxgb4/sched.c b/drivers/net/ethernet/chelsio/cxgb4/sched.c index c9026352a842..02acff741f11 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sched.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sched.c @@ -177,7 +177,7 @@ static int t4_sched_queue_unbind(struct port_info *pi, struct ch_sched_queue *p) } list_del(&qe->list); - t4_free_mem(qe); + kvfree(qe); if (atomic_dec_and_test(&e->refcnt)) { e->state = SCHED_STATE_UNUSED; memset(&e->info, 0, sizeof(e->info)); @@ -201,7 +201,7 @@ static int t4_sched_queue_bind(struct port_info *pi, struct ch_sched_queue *p) if (p->queue < 0 || p->queue >= pi->nqsets) return -ERANGE; - qe = t4_alloc_mem(sizeof(struct sched_queue_entry)); + qe = kvzalloc(sizeof(struct sched_queue_entry), GFP_KERNEL); if (!qe) return -ENOMEM; @@ -211,7 +211,7 @@ static int t4_sched_queue_bind(struct port_info *pi, struct ch_sched_queue *p) /* Unbind queue from any existing class */ err = t4_sched_queue_unbind(pi, p); if (err) { - t4_free_mem(qe); + kvfree(qe); goto out; } @@ -224,7 +224,7 @@ static int t4_sched_queue_bind(struct port_info *pi, struct ch_sched_queue *p) spin_lock(&e->lock); err = t4_sched_bind_unbind_op(pi, (void *)qe, SCHED_QUEUE, true); if (err) { - t4_free_mem(qe); + kvfree(qe); spin_unlock(&e->lock); goto out; } @@ -512,7 +512,7 @@ struct sched_table *t4_init_sched(unsigned int sched_size) struct sched_table *s; unsigned int i; - s = t4_alloc_mem(sizeof(*s) + sched_size * sizeof(struct sched_class)); + s = kvzalloc(sizeof(*s) + sched_size * sizeof(struct sched_class), GFP_KERNEL); if (!s) return NULL; @@ -548,6 +548,6 @@ void t4_cleanup_sched(struct adapter *adap) t4_sched_class_free(pi, e); write_unlock(&s->rw_lock); } - t4_free_mem(s); + kvfree(s); } } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index 3ba89bc43d74..6ffd1849a604 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -70,13 +70,10 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, ring->full_size = ring->size - HEADROOM - MAX_DESC_TXBBS; tmp = size * sizeof(struct mlx4_en_tx_info); - ring->tx_info = kmalloc_node(tmp, GFP_KERNEL | __GFP_NOWARN, node); + ring->tx_info = kvmalloc_node(tmp, GFP_KERNEL, node); if (!ring->tx_info) { - ring->tx_info = vmalloc(tmp); - if (!ring->tx_info) { - err = -ENOMEM; - goto err_ring; - } + err = -ENOMEM; + goto err_ring; } en_dbg(DRV, priv, "Allocated tx_info ring at addr:%p size:%d\n", diff --git a/drivers/net/ethernet/mellanox/mlx4/mr.c b/drivers/net/ethernet/mellanox/mlx4/mr.c index db65f72879e9..ce852ca22a96 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mr.c +++ b/drivers/net/ethernet/mellanox/mlx4/mr.c @@ -115,12 +115,9 @@ static int mlx4_buddy_init(struct mlx4_buddy *buddy, int max_order) for (i = 0; i <= buddy->max_order; ++i) { s = BITS_TO_LONGS(1 << (buddy->max_order - i)); - buddy->bits[i] = kcalloc(s, sizeof (long), GFP_KERNEL | __GFP_NOWARN); - if (!buddy->bits[i]) { - buddy->bits[i] = vzalloc(s * sizeof(long)); - if (!buddy->bits[i]) - goto err_out_free; - } + buddy->bits[i] = kvmalloc_array(s, sizeof(long), GFP_KERNEL | __GFP_ZERO); + if (!buddy->bits[i]) + goto err_out_free; } set_bit(0, buddy->bits[buddy->max_order]); diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c index fac1e9fbd11d..9852a3355509 100644 --- a/drivers/nvdimm/dimm_devs.c +++ b/drivers/nvdimm/dimm_devs.c @@ -106,10 +106,7 @@ int nvdimm_init_config_data(struct nvdimm_drvdata *ndd) return -ENXIO; } - ndd->data = kmalloc(ndd->nsarea.config_size, GFP_KERNEL); - if (!ndd->data) - ndd->data = vmalloc(ndd->nsarea.config_size); - + ndd->data = kvmalloc(ndd->nsarea.config_size, GFP_KERNEL); if (!ndd->data) return -ENOMEM; diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-mem.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-mem.c index a6a76a681ea9..8f638267e704 100644 --- a/drivers/staging/lustre/lnet/libcfs/linux/linux-mem.c +++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-mem.c @@ -45,15 +45,6 @@ EXPORT_SYMBOL(libcfs_kvzalloc); void *libcfs_kvzalloc_cpt(struct cfs_cpt_table *cptab, int cpt, size_t size, gfp_t flags) { - void *ret; - - ret = kzalloc_node(size, flags | __GFP_NOWARN, - cfs_cpt_spread_node(cptab, cpt)); - if (!ret) { - WARN_ON(!(flags & (__GFP_FS | __GFP_HIGH))); - ret = vmalloc_node(size, cfs_cpt_spread_node(cptab, cpt)); - } - - return ret; + return kvzalloc_node(size, flags, cfs_cpt_spread_node(cptab, cpt)); } EXPORT_SYMBOL(libcfs_kvzalloc_cpt); diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c index 6890897a6f30..10f1ef582659 100644 --- a/drivers/xen/evtchn.c +++ b/drivers/xen/evtchn.c @@ -87,18 +87,6 @@ struct user_evtchn { bool enabled; }; -static evtchn_port_t *evtchn_alloc_ring(unsigned int size) -{ - evtchn_port_t *ring; - size_t s = size * sizeof(*ring); - - ring = kmalloc(s, GFP_KERNEL); - if (!ring) - ring = vmalloc(s); - - return ring; -} - static void evtchn_free_ring(evtchn_port_t *ring) { kvfree(ring); @@ -334,7 +322,7 @@ static int evtchn_resize_ring(struct per_user_data *u) else new_size = 2 * u->ring_size; - new_ring = evtchn_alloc_ring(new_size); + new_ring = kvmalloc(new_size * sizeof(*new_ring), GFP_KERNEL); if (!new_ring) return -ENOMEM; diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 7dc8844037e0..1c3b6c54d5ee 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -5392,13 +5392,10 @@ int btrfs_compare_trees(struct btrfs_root *left_root, goto out; } - tmp_buf = kmalloc(fs_info->nodesize, GFP_KERNEL | __GFP_NOWARN); + tmp_buf = kvmalloc(fs_info->nodesize, GFP_KERNEL); if (!tmp_buf) { - tmp_buf = vmalloc(fs_info->nodesize); - if (!tmp_buf) { - ret = -ENOMEM; - goto out; - } + ret = -ENOMEM; + goto out; } left_path->search_commit_root = 1; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index dabfc7ac48a6..922a66fce401 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3539,12 +3539,9 @@ static int btrfs_clone(struct inode *src, struct inode *inode, u64 last_dest_end = destoff; ret = -ENOMEM; - buf = kmalloc(fs_info->nodesize, GFP_KERNEL | __GFP_NOWARN); - if (!buf) { - buf = vmalloc(fs_info->nodesize); - if (!buf) - return ret; - } + buf = kvmalloc(fs_info->nodesize, GFP_KERNEL); + if (!buf) + return ret; path = btrfs_alloc_path(); if (!path) { diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index a60d5bfb8a49..3f645cd67b54 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -6360,22 +6360,16 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) sctx->clone_roots_cnt = arg->clone_sources_count; sctx->send_max_size = BTRFS_SEND_BUF_SIZE; - sctx->send_buf = kmalloc(sctx->send_max_size, GFP_KERNEL | __GFP_NOWARN); + sctx->send_buf = kvmalloc(sctx->send_max_size, GFP_KERNEL); if (!sctx->send_buf) { - sctx->send_buf = vmalloc(sctx->send_max_size); - if (!sctx->send_buf) { - ret = -ENOMEM; - goto out; - } + ret = -ENOMEM; + goto out; } - sctx->read_buf = kmalloc(BTRFS_SEND_READ_SIZE, GFP_KERNEL | __GFP_NOWARN); + sctx->read_buf = kvmalloc(BTRFS_SEND_READ_SIZE, GFP_KERNEL); if (!sctx->read_buf) { - sctx->read_buf = vmalloc(BTRFS_SEND_READ_SIZE); - if (!sctx->read_buf) { - ret = -ENOMEM; - goto out; - } + ret = -ENOMEM; + goto out; } sctx->pending_dir_moves = RB_ROOT; @@ -6396,13 +6390,10 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) alloc_size = arg->clone_sources_count * sizeof(*arg->clone_sources); if (arg->clone_sources_count) { - clone_sources_tmp = kmalloc(alloc_size, GFP_KERNEL | __GFP_NOWARN); + clone_sources_tmp = kvmalloc(alloc_size, GFP_KERNEL); if (!clone_sources_tmp) { - clone_sources_tmp = vmalloc(alloc_size); - if (!clone_sources_tmp) { - ret = -ENOMEM; - goto out; - } + ret = -ENOMEM; + goto out; } ret = copy_from_user(clone_sources_tmp, arg->clone_sources, diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 26cc95421cca..18c045e2ead6 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -74,12 +74,9 @@ dio_get_pages_alloc(const struct iov_iter *it, size_t nbytes, align = (unsigned long)(it->iov->iov_base + it->iov_offset) & (PAGE_SIZE - 1); npages = calc_pages_for(align, nbytes); - pages = kmalloc(sizeof(*pages) * npages, GFP_KERNEL); - if (!pages) { - pages = vmalloc(sizeof(*pages) * npages); - if (!pages) - return ERR_PTR(-ENOMEM); - } + pages = kvmalloc(sizeof(*pages) * npages, GFP_KERNEL); + if (!pages) + return ERR_PTR(-ENOMEM); for (idx = 0; idx < npages; ) { size_t start; diff --git a/fs/select.c b/fs/select.c index bd4b2ccfd346..d6c652a31e99 100644 --- a/fs/select.c +++ b/fs/select.c @@ -633,10 +633,7 @@ int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp, goto out_nofds; alloc_size = 6 * size; - bits = kmalloc(alloc_size, GFP_KERNEL|__GFP_NOWARN); - if (!bits && alloc_size > PAGE_SIZE) - bits = vmalloc(alloc_size); - + bits = kvmalloc(alloc_size, GFP_KERNEL); if (!bits) goto out_nofds; } diff --git a/fs/xattr.c b/fs/xattr.c index 94f49a082dd2..464c94bf65f9 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -431,12 +431,9 @@ setxattr(struct dentry *d, const char __user *name, const void __user *value, if (size) { if (size > XATTR_SIZE_MAX) return -E2BIG; - kvalue = kmalloc(size, GFP_KERNEL | __GFP_NOWARN); - if (!kvalue) { - kvalue = vmalloc(size); - if (!kvalue) - return -ENOMEM; - } + kvalue = kvmalloc(size, GFP_KERNEL); + if (!kvalue) + return -ENOMEM; if (copy_from_user(kvalue, value, size)) { error = -EFAULT; goto out; @@ -528,12 +525,9 @@ getxattr(struct dentry *d, const char __user *name, void __user *value, if (size) { if (size > XATTR_SIZE_MAX) size = XATTR_SIZE_MAX; - kvalue = kzalloc(size, GFP_KERNEL | __GFP_NOWARN); - if (!kvalue) { - kvalue = vzalloc(size); - if (!kvalue) - return -ENOMEM; - } + kvalue = kvzalloc(size, GFP_KERNEL); + if (!kvalue) + return -ENOMEM; } error = vfs_getxattr(d, kname, kvalue, size); @@ -611,12 +605,9 @@ listxattr(struct dentry *d, char __user *list, size_t size) if (size) { if (size > XATTR_LIST_MAX) size = XATTR_LIST_MAX; - klist = kmalloc(size, __GFP_NOWARN | GFP_KERNEL); - if (!klist) { - klist = vmalloc(size); - if (!klist) - return -ENOMEM; - } + klist = kvmalloc(size, GFP_KERNEL); + if (!klist) + return -ENOMEM; } error = vfs_listxattr(d, klist, size); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 3fece51dcf13..18fc65b84b79 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -892,12 +892,7 @@ static inline u16 cmdif_rev(struct mlx5_core_dev *dev) static inline void *mlx5_vzalloc(unsigned long size) { - void *rtn; - - rtn = kzalloc(size, GFP_KERNEL | __GFP_NOWARN); - if (!rtn) - rtn = vzalloc(size); - return rtn; + return kvzalloc(size, GFP_KERNEL); } static inline u32 mlx5_base_mkey(const u32 key) diff --git a/include/linux/mm.h b/include/linux/mm.h index 08e2849d27ca..7cb17c6b97de 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -532,6 +532,14 @@ static inline void *kvzalloc(size_t size, gfp_t flags) return kvmalloc(size, flags | __GFP_ZERO); } +static inline void *kvmalloc_array(size_t n, size_t size, gfp_t flags) +{ + if (size != 0 && n > SIZE_MAX / size) + return NULL; + + return kvmalloc(n * size, flags); +} + extern void kvfree(const void *addr); static inline atomic_t *compound_mapcount_ptr(struct page *page) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 4952311422c1..ae82d9cea553 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1028,10 +1028,7 @@ EXPORT_SYMBOL(iov_iter_get_pages); static struct page **get_pages_array(size_t n) { - struct page **p = kmalloc(n * sizeof(struct page *), GFP_KERNEL); - if (!p) - p = vmalloc(n * sizeof(struct page *)); - return p; + return kvmalloc_array(n, sizeof(struct page *), GFP_KERNEL); } static ssize_t pipe_get_pages_alloc(struct iov_iter *i, diff --git a/mm/frame_vector.c b/mm/frame_vector.c index db77dcb38afd..72ebec18629c 100644 --- a/mm/frame_vector.c +++ b/mm/frame_vector.c @@ -200,10 +200,7 @@ struct frame_vector *frame_vector_create(unsigned int nr_frames) * Avoid higher order allocations, use vmalloc instead. It should * be rare anyway. */ - if (size <= PAGE_SIZE) - vec = kmalloc(size, GFP_KERNEL); - else - vec = vmalloc(size); + vec = kvmalloc(size, GFP_KERNEL); if (!vec) return NULL; vec->nr_allocated = nr_frames; diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 8bea74298173..e9a59d2d91d4 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -678,11 +678,7 @@ int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo) /* no more locks than number of hash buckets */ nblocks = min(nblocks, hashinfo->ehash_mask + 1); - hashinfo->ehash_locks = kmalloc_array(nblocks, locksz, - GFP_KERNEL | __GFP_NOWARN); - if (!hashinfo->ehash_locks) - hashinfo->ehash_locks = vmalloc(nblocks * locksz); - + hashinfo->ehash_locks = kvmalloc_array(nblocks, locksz, GFP_KERNEL); if (!hashinfo->ehash_locks) return -ENOMEM; diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 9d0d4f39e42b..653bbd67e3a3 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -1011,10 +1011,7 @@ static int __net_init tcp_net_metrics_init(struct net *net) tcp_metrics_hash_log = order_base_2(slots); size = sizeof(struct tcpm_hash_bucket) << tcp_metrics_hash_log; - tcp_metrics_hash = kzalloc(size, GFP_KERNEL | __GFP_NOWARN); - if (!tcp_metrics_hash) - tcp_metrics_hash = vzalloc(size); - + tcp_metrics_hash = kvzalloc(size, GFP_KERNEL); if (!tcp_metrics_hash) return -ENOMEM; diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 088e2b459d0f..257ec66009da 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -2005,10 +2005,7 @@ static int resize_platform_label_table(struct net *net, size_t limit) unsigned index; if (size) { - labels = kzalloc(size, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY); - if (!labels) - labels = vzalloc(size); - + labels = kvzalloc(size, GFP_KERNEL); if (!labels) goto nolabels; } diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index f134d384852f..3d0584665b5d 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -763,17 +763,8 @@ EXPORT_SYMBOL(xt_check_entry_offsets); */ unsigned int *xt_alloc_entry_offsets(unsigned int size) { - unsigned int *off; + return kvmalloc_array(size, sizeof(unsigned int), GFP_KERNEL | __GFP_ZERO); - off = kcalloc(size, sizeof(unsigned int), GFP_KERNEL | __GFP_NOWARN); - - if (off) - return off; - - if (size < (SIZE_MAX / sizeof(unsigned int))) - off = vmalloc(size * sizeof(unsigned int)); - - return off; } EXPORT_SYMBOL(xt_alloc_entry_offsets); @@ -1116,7 +1107,7 @@ static int xt_jumpstack_alloc(struct xt_table_info *i) size = sizeof(void **) * nr_cpu_ids; if (size > PAGE_SIZE) - i->jumpstack = vzalloc(size); + i->jumpstack = kvzalloc(size, GFP_KERNEL); else i->jumpstack = kzalloc(size, GFP_KERNEL); if (i->jumpstack == NULL) @@ -1138,12 +1129,8 @@ static int xt_jumpstack_alloc(struct xt_table_info *i) */ size = sizeof(void *) * i->stacksize * 2u; for_each_possible_cpu(cpu) { - if (size > PAGE_SIZE) - i->jumpstack[cpu] = vmalloc_node(size, - cpu_to_node(cpu)); - else - i->jumpstack[cpu] = kmalloc_node(size, - GFP_KERNEL, cpu_to_node(cpu)); + i->jumpstack[cpu] = kvmalloc_node(size, GFP_KERNEL, + cpu_to_node(cpu)); if (i->jumpstack[cpu] == NULL) /* * Freeing will be done later on by the callers. The diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c index 37d581a31cff..3f6c4fa78bdb 100644 --- a/net/netfilter/xt_recent.c +++ b/net/netfilter/xt_recent.c @@ -388,10 +388,7 @@ static int recent_mt_check(const struct xt_mtchk_param *par, } sz = sizeof(*t) + sizeof(t->iphash[0]) * ip_list_hash_size; - if (sz <= PAGE_SIZE) - t = kzalloc(sz, GFP_KERNEL); - else - t = vzalloc(sz); + t = kvzalloc(sz, GFP_KERNEL); if (t == NULL) { ret = -ENOMEM; goto out; diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c index d00f4c7c2f3a..b30a2c70bd48 100644 --- a/net/sched/sch_choke.c +++ b/net/sched/sch_choke.c @@ -376,10 +376,7 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt) if (mask != q->tab_mask) { struct sk_buff **ntab; - ntab = kcalloc(mask + 1, sizeof(struct sk_buff *), - GFP_KERNEL | __GFP_NOWARN); - if (!ntab) - ntab = vzalloc((mask + 1) * sizeof(struct sk_buff *)); + ntab = kvmalloc_array((mask + 1), sizeof(struct sk_buff *), GFP_KERNEL | __GFP_ZERO); if (!ntab) return -ENOMEM; diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index 18bbb5476c83..9201abce928c 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -446,27 +446,13 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt) return 0; } -static void *fq_codel_zalloc(size_t sz) -{ - void *ptr = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN); - - if (!ptr) - ptr = vzalloc(sz); - return ptr; -} - -static void fq_codel_free(void *addr) -{ - kvfree(addr); -} - static void fq_codel_destroy(struct Qdisc *sch) { struct fq_codel_sched_data *q = qdisc_priv(sch); tcf_destroy_chain(&q->filter_list); - fq_codel_free(q->backlogs); - fq_codel_free(q->flows); + kvfree(q->backlogs); + kvfree(q->flows); } static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt) @@ -493,13 +479,13 @@ static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt) } if (!q->flows) { - q->flows = fq_codel_zalloc(q->flows_cnt * - sizeof(struct fq_codel_flow)); + q->flows = kvzalloc(q->flows_cnt * + sizeof(struct fq_codel_flow), GFP_KERNEL); if (!q->flows) return -ENOMEM; - q->backlogs = fq_codel_zalloc(q->flows_cnt * sizeof(u32)); + q->backlogs = kvzalloc(q->flows_cnt * sizeof(u32), GFP_KERNEL); if (!q->backlogs) { - fq_codel_free(q->flows); + kvfree(q->flows); return -ENOMEM; } for (i = 0; i < q->flows_cnt; i++) { diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c index c19d346e6c5a..51d3ba682af9 100644 --- a/net/sched/sch_hhf.c +++ b/net/sched/sch_hhf.c @@ -467,29 +467,14 @@ static void hhf_reset(struct Qdisc *sch) rtnl_kfree_skbs(skb, skb); } -static void *hhf_zalloc(size_t sz) -{ - void *ptr = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN); - - if (!ptr) - ptr = vzalloc(sz); - - return ptr; -} - -static void hhf_free(void *addr) -{ - kvfree(addr); -} - static void hhf_destroy(struct Qdisc *sch) { int i; struct hhf_sched_data *q = qdisc_priv(sch); for (i = 0; i < HHF_ARRAYS_CNT; i++) { - hhf_free(q->hhf_arrays[i]); - hhf_free(q->hhf_valid_bits[i]); + kvfree(q->hhf_arrays[i]); + kvfree(q->hhf_valid_bits[i]); } for (i = 0; i < HH_FLOWS_CNT; i++) { @@ -503,7 +488,7 @@ static void hhf_destroy(struct Qdisc *sch) kfree(flow); } } - hhf_free(q->hh_flows); + kvfree(q->hh_flows); } static const struct nla_policy hhf_policy[TCA_HHF_MAX + 1] = { @@ -609,8 +594,8 @@ static int hhf_init(struct Qdisc *sch, struct nlattr *opt) if (!q->hh_flows) { /* Initialize heavy-hitter flow table. */ - q->hh_flows = hhf_zalloc(HH_FLOWS_CNT * - sizeof(struct list_head)); + q->hh_flows = kvzalloc(HH_FLOWS_CNT * + sizeof(struct list_head), GFP_KERNEL); if (!q->hh_flows) return -ENOMEM; for (i = 0; i < HH_FLOWS_CNT; i++) @@ -624,8 +609,8 @@ static int hhf_init(struct Qdisc *sch, struct nlattr *opt) /* Initialize heavy-hitter filter arrays. */ for (i = 0; i < HHF_ARRAYS_CNT; i++) { - q->hhf_arrays[i] = hhf_zalloc(HHF_ARRAYS_LEN * - sizeof(u32)); + q->hhf_arrays[i] = kvzalloc(HHF_ARRAYS_LEN * + sizeof(u32), GFP_KERNEL); if (!q->hhf_arrays[i]) { /* Note: hhf_destroy() will be called * by our caller. @@ -637,8 +622,8 @@ static int hhf_init(struct Qdisc *sch, struct nlattr *opt) /* Initialize valid bits of heavy-hitter filter arrays. */ for (i = 0; i < HHF_ARRAYS_CNT; i++) { - q->hhf_valid_bits[i] = hhf_zalloc(HHF_ARRAYS_LEN / - BITS_PER_BYTE); + q->hhf_valid_bits[i] = kvzalloc(HHF_ARRAYS_LEN / + BITS_PER_BYTE, GFP_KERNEL); if (!q->hhf_valid_bits[i]) { /* Note: hhf_destroy() will be called * by our caller. diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index f0ce4780f395..1b3dd6190e93 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -702,15 +702,11 @@ static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr) spinlock_t *root_lock; struct disttable *d; int i; - size_t s; if (n > NETEM_DIST_MAX) return -EINVAL; - s = sizeof(struct disttable) + n * sizeof(s16); - d = kmalloc(s, GFP_KERNEL | __GFP_NOWARN); - if (!d) - d = vmalloc(s); + d = kvmalloc(sizeof(struct disttable) + n * sizeof(s16), GFP_KERNEL); if (!d) return -ENOMEM; diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index b00e02c139de..332d94be6e1c 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -685,11 +685,7 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt) static void *sfq_alloc(size_t sz) { - void *ptr = kmalloc(sz, GFP_KERNEL | __GFP_NOWARN); - - if (!ptr) - ptr = vmalloc(sz); - return ptr; + return kvmalloc(sz, GFP_KERNEL); } static void sfq_free(void *addr) diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index 82a9e1851108..447a7d5cee0f 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -101,14 +101,9 @@ SYSCALL_DEFINE5(add_key, const char __user *, _type, if (_payload) { ret = -ENOMEM; - payload = kmalloc(plen, GFP_KERNEL | __GFP_NOWARN); - if (!payload) { - if (plen <= PAGE_SIZE) - goto error2; - payload = vmalloc(plen); - if (!payload) - goto error2; - } + payload = kvmalloc(plen, GFP_KERNEL); + if (!payload) + goto error2; ret = -EFAULT; if (copy_from_user(payload, _payload, plen) != 0) @@ -1071,14 +1066,9 @@ long keyctl_instantiate_key_common(key_serial_t id, if (from) { ret = -ENOMEM; - payload = kmalloc(plen, GFP_KERNEL); - if (!payload) { - if (plen <= PAGE_SIZE) - goto error; - payload = vmalloc(plen); - if (!payload) - goto error; - } + payload = kvmalloc(plen, GFP_KERNEL); + if (!payload) + goto error; ret = -EFAULT; if (!copy_from_iter_full(payload, plen, from)) -- cgit v1.2.3 From 19809c2da28aee5860ad9a2eff760730a0710df0 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Mon, 8 May 2017 15:57:44 -0700 Subject: mm, vmalloc: use __GFP_HIGHMEM implicitly __vmalloc* allows users to provide gfp flags for the underlying allocation. This API is quite popular $ git grep "=[[:space:]]__vmalloc\|return[[:space:]]*__vmalloc" | wc -l 77 The only problem is that many people are not aware that they really want to give __GFP_HIGHMEM along with other flags because there is really no reason to consume precious lowmemory on CONFIG_HIGHMEM systems for pages which are mapped to the kernel vmalloc space. About half of users don't use this flag, though. This signals that we make the API unnecessarily too complex. This patch simply uses __GFP_HIGHMEM implicitly when allocating pages to be mapped to the vmalloc space. Current users which add __GFP_HIGHMEM are simplified and drop the flag. Link: http://lkml.kernel.org/r/20170307141020.29107-1-mhocko@kernel.org Signed-off-by: Michal Hocko Reviewed-by: Matthew Wilcox Cc: Al Viro Cc: Vlastimil Babka Cc: David Rientjes Cc: Cristopher Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/parisc/kernel/module.c | 2 +- arch/x86/kernel/module.c | 2 +- drivers/block/drbd/drbd_bitmap.c | 2 +- drivers/gpu/drm/etnaviv/etnaviv_dump.c | 4 ++-- drivers/md/dm-bufio.c | 2 +- fs/btrfs/free-space-tree.c | 3 +-- fs/file.c | 2 +- fs/xfs/kmem.c | 2 +- include/drm/drm_mem_util.h | 9 +++------ kernel/bpf/core.c | 9 +++------ kernel/bpf/syscall.c | 3 +-- kernel/fork.c | 2 +- kernel/groups.c | 2 +- kernel/module.c | 2 +- mm/kasan/kasan.c | 2 +- mm/nommu.c | 3 +-- mm/util.c | 2 +- mm/vmalloc.c | 14 +++++++------- net/ceph/ceph_common.c | 2 +- net/netfilter/x_tables.c | 3 +-- 20 files changed, 31 insertions(+), 41 deletions(-) (limited to 'fs') diff --git a/arch/parisc/kernel/module.c b/arch/parisc/kernel/module.c index c66c943d9322..f1a76935a314 100644 --- a/arch/parisc/kernel/module.c +++ b/arch/parisc/kernel/module.c @@ -218,7 +218,7 @@ void *module_alloc(unsigned long size) * easier than trying to map the text, data, init_text and * init_data correctly */ return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END, - GFP_KERNEL | __GFP_HIGHMEM, + GFP_KERNEL, PAGE_KERNEL_RWX, 0, NUMA_NO_NODE, __builtin_return_address(0)); } diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index 477ae806c2fa..f67bd3205df7 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -85,7 +85,7 @@ void *module_alloc(unsigned long size) p = __vmalloc_node_range(size, MODULE_ALIGN, MODULES_VADDR + get_module_load_offset(), - MODULES_END, GFP_KERNEL | __GFP_HIGHMEM, + MODULES_END, GFP_KERNEL, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, __builtin_return_address(0)); if (p && (kasan_module_alloc(p, size) < 0)) { diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index dece26f119d4..a804a4107fbc 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -409,7 +409,7 @@ static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want) new_pages = kzalloc(bytes, GFP_NOIO | __GFP_NOWARN); if (!new_pages) { new_pages = __vmalloc(bytes, - GFP_NOIO | __GFP_HIGHMEM | __GFP_ZERO, + GFP_NOIO | __GFP_ZERO, PAGE_KERNEL); if (!new_pages) return NULL; diff --git a/drivers/gpu/drm/etnaviv/etnaviv_dump.c b/drivers/gpu/drm/etnaviv/etnaviv_dump.c index d019b5e311cc..2d955d7d7b6d 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_dump.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_dump.c @@ -161,8 +161,8 @@ void etnaviv_core_dump(struct etnaviv_gpu *gpu) file_size += sizeof(*iter.hdr) * n_obj; /* Allocate the file in vmalloc memory, it's likely to be big */ - iter.start = __vmalloc(file_size, GFP_KERNEL | __GFP_HIGHMEM | - __GFP_NOWARN | __GFP_NORETRY, PAGE_KERNEL); + iter.start = __vmalloc(file_size, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY, + PAGE_KERNEL); if (!iter.start) { dev_warn(gpu->dev, "failed to allocate devcoredump file\n"); return; diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index c92c31b23e54..5db11a405129 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -406,7 +406,7 @@ static void *alloc_buffer_data(struct dm_bufio_client *c, gfp_t gfp_mask, if (gfp_mask & __GFP_NORETRY) noio_flag = memalloc_noio_save(); - ptr = __vmalloc(c->block_size, gfp_mask | __GFP_HIGHMEM, PAGE_KERNEL); + ptr = __vmalloc(c->block_size, gfp_mask, PAGE_KERNEL); if (gfp_mask & __GFP_NORETRY) memalloc_noio_restore(noio_flag); diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c index dd7fb22a955a..fc0bd8406758 100644 --- a/fs/btrfs/free-space-tree.c +++ b/fs/btrfs/free-space-tree.c @@ -167,8 +167,7 @@ static u8 *alloc_bitmap(u32 bitmap_size) if (mem) return mem; - return __vmalloc(bitmap_size, GFP_NOFS | __GFP_HIGHMEM | __GFP_ZERO, - PAGE_KERNEL); + return __vmalloc(bitmap_size, GFP_NOFS | __GFP_ZERO, PAGE_KERNEL); } int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans, diff --git a/fs/file.c b/fs/file.c index ad6f094f2eff..1c2972e3a405 100644 --- a/fs/file.c +++ b/fs/file.c @@ -42,7 +42,7 @@ static void *alloc_fdmem(size_t size) if (data != NULL) return data; } - return __vmalloc(size, GFP_KERNEL_ACCOUNT | __GFP_HIGHMEM, PAGE_KERNEL); + return __vmalloc(size, GFP_KERNEL_ACCOUNT, PAGE_KERNEL); } static void __free_fdtable(struct fdtable *fdt) diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c index 780fc8986dab..393b6849aeb3 100644 --- a/fs/xfs/kmem.c +++ b/fs/xfs/kmem.c @@ -67,7 +67,7 @@ kmem_zalloc_large(size_t size, xfs_km_flags_t flags) nofs_flag = memalloc_nofs_save(); lflags = kmem_flags_convert(flags); - ptr = __vmalloc(size, lflags | __GFP_HIGHMEM | __GFP_ZERO, PAGE_KERNEL); + ptr = __vmalloc(size, lflags | __GFP_ZERO, PAGE_KERNEL); if (flags & KM_NOFS) memalloc_nofs_restore(nofs_flag); diff --git a/include/drm/drm_mem_util.h b/include/drm/drm_mem_util.h index 70d4e221a3ad..d0f6cf2e5324 100644 --- a/include/drm/drm_mem_util.h +++ b/include/drm/drm_mem_util.h @@ -37,8 +37,7 @@ static __inline__ void *drm_calloc_large(size_t nmemb, size_t size) if (size * nmemb <= PAGE_SIZE) return kcalloc(nmemb, size, GFP_KERNEL); - return __vmalloc(size * nmemb, - GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, PAGE_KERNEL); + return vzalloc(size * nmemb); } /* Modeled after cairo's malloc_ab, it's like calloc but without the zeroing. */ @@ -50,8 +49,7 @@ static __inline__ void *drm_malloc_ab(size_t nmemb, size_t size) if (size * nmemb <= PAGE_SIZE) return kmalloc(nmemb * size, GFP_KERNEL); - return __vmalloc(size * nmemb, - GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL); + return vmalloc(size * nmemb); } static __inline__ void *drm_malloc_gfp(size_t nmemb, size_t size, gfp_t gfp) @@ -69,8 +67,7 @@ static __inline__ void *drm_malloc_gfp(size_t nmemb, size_t size, gfp_t gfp) return ptr; } - return __vmalloc(size * nmemb, - gfp | __GFP_HIGHMEM, PAGE_KERNEL); + return __vmalloc(size * nmemb, gfp, PAGE_KERNEL); } static __inline void drm_free_large(void *ptr) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 6f81e0f5a0fa..dedf367f59bb 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -76,8 +76,7 @@ void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, uns struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags) { - gfp_t gfp_flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO | - gfp_extra_flags; + gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags; struct bpf_prog_aux *aux; struct bpf_prog *fp; @@ -107,8 +106,7 @@ EXPORT_SYMBOL_GPL(bpf_prog_alloc); struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size, gfp_t gfp_extra_flags) { - gfp_t gfp_flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO | - gfp_extra_flags; + gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags; struct bpf_prog *fp; u32 pages, delta; int ret; @@ -655,8 +653,7 @@ out: static struct bpf_prog *bpf_prog_clone_create(struct bpf_prog *fp_other, gfp_t gfp_extra_flags) { - gfp_t gfp_flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO | - gfp_extra_flags; + gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags; struct bpf_prog *fp; fp = __vmalloc(fp_other->pages * PAGE_SIZE, gfp_flags, PAGE_KERNEL); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 13642c73dca0..fd2411fd6914 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -67,8 +67,7 @@ void *bpf_map_area_alloc(size_t size) return area; } - return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | flags, - PAGE_KERNEL); + return __vmalloc(size, GFP_KERNEL | flags, PAGE_KERNEL); } void bpf_map_area_free(void *area) diff --git a/kernel/fork.c b/kernel/fork.c index 55e325f4b457..08ba696aa561 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -221,7 +221,7 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node) stack = __vmalloc_node_range(THREAD_SIZE, THREAD_SIZE, VMALLOC_START, VMALLOC_END, - THREADINFO_GFP | __GFP_HIGHMEM, + THREADINFO_GFP, PAGE_KERNEL, 0, node, __builtin_return_address(0)); diff --git a/kernel/groups.c b/kernel/groups.c index 8dd7a61b7115..d09727692a2a 100644 --- a/kernel/groups.c +++ b/kernel/groups.c @@ -18,7 +18,7 @@ struct group_info *groups_alloc(int gidsetsize) len = sizeof(struct group_info) + sizeof(kgid_t) * gidsetsize; gi = kmalloc(len, GFP_KERNEL_ACCOUNT|__GFP_NOWARN|__GFP_NORETRY); if (!gi) - gi = __vmalloc(len, GFP_KERNEL_ACCOUNT|__GFP_HIGHMEM, PAGE_KERNEL); + gi = __vmalloc(len, GFP_KERNEL_ACCOUNT, PAGE_KERNEL); if (!gi) return NULL; diff --git a/kernel/module.c b/kernel/module.c index f37308b733d8..2b316b954828 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2864,7 +2864,7 @@ static int copy_module_from_user(const void __user *umod, unsigned long len, /* Suck in entire file: we'll want most of it. */ info->hdr = __vmalloc(info->len, - GFP_KERNEL | __GFP_HIGHMEM | __GFP_NOWARN, PAGE_KERNEL); + GFP_KERNEL | __GFP_NOWARN, PAGE_KERNEL); if (!info->hdr) return -ENOMEM; diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c index 9348d27088c1..b10da59cf765 100644 --- a/mm/kasan/kasan.c +++ b/mm/kasan/kasan.c @@ -691,7 +691,7 @@ int kasan_module_alloc(void *addr, size_t size) ret = __vmalloc_node_range(shadow_size, 1, shadow_start, shadow_start + shadow_size, - GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, + GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL, VM_NO_GUARD, NUMA_NO_NODE, __builtin_return_address(0)); diff --git a/mm/nommu.c b/mm/nommu.c index a80411d258fc..fc184f597d59 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -246,8 +246,7 @@ void *vmalloc_user(unsigned long size) { void *ret; - ret = __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, - PAGE_KERNEL); + ret = __vmalloc(size, GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL); if (ret) { struct vm_area_struct *vma; diff --git a/mm/util.c b/mm/util.c index f4e590b2c0da..718154debc87 100644 --- a/mm/util.c +++ b/mm/util.c @@ -382,7 +382,7 @@ void *kvmalloc_node(size_t size, gfp_t flags, int node) if (ret || size <= PAGE_SIZE) return ret; - return __vmalloc_node_flags(size, node, flags | __GFP_HIGHMEM); + return __vmalloc_node_flags(size, node, flags); } EXPORT_SYMBOL(kvmalloc_node); diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 717b1e8b942c..1dda6d8a200a 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1655,7 +1655,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, struct page **pages; unsigned int nr_pages, array_size, i; const gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO; - const gfp_t alloc_mask = gfp_mask | __GFP_NOWARN; + const gfp_t alloc_mask = gfp_mask | __GFP_HIGHMEM | __GFP_NOWARN; nr_pages = get_vm_area_size(area) >> PAGE_SHIFT; array_size = (nr_pages * sizeof(struct page *)); @@ -1818,7 +1818,7 @@ EXPORT_SYMBOL(__vmalloc); void *vmalloc(unsigned long size) { return __vmalloc_node_flags(size, NUMA_NO_NODE, - GFP_KERNEL | __GFP_HIGHMEM); + GFP_KERNEL); } EXPORT_SYMBOL(vmalloc); @@ -1835,7 +1835,7 @@ EXPORT_SYMBOL(vmalloc); void *vzalloc(unsigned long size) { return __vmalloc_node_flags(size, NUMA_NO_NODE, - GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO); + GFP_KERNEL | __GFP_ZERO); } EXPORT_SYMBOL(vzalloc); @@ -1852,7 +1852,7 @@ void *vmalloc_user(unsigned long size) void *ret; ret = __vmalloc_node(size, SHMLBA, - GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, + GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL, NUMA_NO_NODE, __builtin_return_address(0)); if (ret) { @@ -1876,7 +1876,7 @@ EXPORT_SYMBOL(vmalloc_user); */ void *vmalloc_node(unsigned long size, int node) { - return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL, + return __vmalloc_node(size, 1, GFP_KERNEL, PAGE_KERNEL, node, __builtin_return_address(0)); } EXPORT_SYMBOL(vmalloc_node); @@ -1896,7 +1896,7 @@ EXPORT_SYMBOL(vmalloc_node); void *vzalloc_node(unsigned long size, int node) { return __vmalloc_node_flags(size, node, - GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO); + GFP_KERNEL | __GFP_ZERO); } EXPORT_SYMBOL(vzalloc_node); @@ -1918,7 +1918,7 @@ EXPORT_SYMBOL(vzalloc_node); void *vmalloc_exec(unsigned long size) { - return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC, + return __vmalloc_node(size, 1, GFP_KERNEL, PAGE_KERNEL_EXEC, NUMA_NO_NODE, __builtin_return_address(0)); } diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index 108533859a53..4eb773ccce11 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -187,7 +187,7 @@ void *ceph_kvmalloc(size_t size, gfp_t flags) return ptr; } - return __vmalloc(size, flags | __GFP_HIGHMEM, PAGE_KERNEL); + return __vmalloc(size, flags, PAGE_KERNEL); } diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 3d0584665b5d..8876b7da6884 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -998,8 +998,7 @@ struct xt_table_info *xt_alloc_table_info(unsigned int size) if (sz <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) info = kmalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY); if (!info) { - info = __vmalloc(sz, GFP_KERNEL | __GFP_NOWARN | - __GFP_NORETRY | __GFP_HIGHMEM, + info = __vmalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY, PAGE_KERNEL); if (!info) return NULL; -- cgit v1.2.3 From 6e7c2b4dd36d8336f876c66a31d9b84616f3b2c4 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 8 May 2017 15:57:53 -0700 Subject: scripts/spelling.txt: add "intialise(d)" pattern and fix typo instances Fix typos and add the following to the scripts/spelling.txt: intialisation||initialisation intialised||initialised intialise||initialise This commit does not intend to change the British spelling itself. Link: http://lkml.kernel.org/r/1481573103-11329-18-git-send-email-yamada.masahiro@socionext.com Signed-off-by: Masahiro Yamada Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- certs/blacklist.c | 2 +- drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c | 8 ++++---- drivers/video/fbdev/intelfb/intelfbdrv.c | 2 +- fs/inode.c | 2 +- fs/xfs/xfs_log_recover.c | 2 +- scripts/spelling.txt | 3 +++ 6 files changed, 11 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/certs/blacklist.c b/certs/blacklist.c index 3eddce0e307a..3a507b9e2568 100644 --- a/certs/blacklist.c +++ b/certs/blacklist.c @@ -140,7 +140,7 @@ int is_hash_blacklisted(const u8 *hash, size_t hash_len, const char *type) EXPORT_SYMBOL_GPL(is_hash_blacklisted); /* - * Intialise the blacklist + * Initialise the blacklist */ static int __init blacklist_init(void) { diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c index d54490d3f7ad..1e594351a60f 100644 --- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c +++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c @@ -387,7 +387,7 @@ static void sxgbe_free_rx_buffers(struct net_device *dev, /** * init_tx_ring - init the TX descriptor ring * @dev: net device structure - * @tx_ring: ring to be intialised + * @tx_ring: ring to be initialised * @tx_rsize: ring size * Description: this function initializes the DMA TX descriptor */ @@ -437,7 +437,7 @@ dmamem_err: /** * free_rx_ring - free the RX descriptor ring * @dev: net device structure - * @rx_ring: ring to be intialised + * @rx_ring: ring to be initialised * @rx_rsize: ring size * Description: this function initializes the DMA RX descriptor */ @@ -453,7 +453,7 @@ static void free_rx_ring(struct device *dev, struct sxgbe_rx_queue *rx_ring, /** * init_rx_ring - init the RX descriptor ring * @dev: net device structure - * @rx_ring: ring to be intialised + * @rx_ring: ring to be initialised * @rx_rsize: ring size * Description: this function initializes the DMA RX descriptor */ @@ -539,7 +539,7 @@ err_free_dma_rx: /** * free_tx_ring - free the TX descriptor ring * @dev: net device structure - * @tx_ring: ring to be intialised + * @tx_ring: ring to be initialised * @tx_rsize: ring size * Description: this function initializes the DMA TX descriptor */ diff --git a/drivers/video/fbdev/intelfb/intelfbdrv.c b/drivers/video/fbdev/intelfb/intelfbdrv.c index ff2a5d2023e1..6b444400a86c 100644 --- a/drivers/video/fbdev/intelfb/intelfbdrv.c +++ b/drivers/video/fbdev/intelfb/intelfbdrv.c @@ -934,7 +934,7 @@ static __inline__ int var_to_refresh(const struct fb_var_screeninfo *var) } /*************************************************************** - * Various intialisation functions * + * Various initialisation functions * ***************************************************************/ static void get_initial_mode(struct intelfb_info *dinfo) diff --git a/fs/inode.c b/fs/inode.c index 131b2bcebc48..6ad1edb52045 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -119,7 +119,7 @@ static int no_open(struct inode *inode, struct file *file) } /** - * inode_init_always - perform inode structure intialisation + * inode_init_always - perform inode structure initialisation * @sb: superblock inode belongs to * @inode: inode to initialise * diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 4a98762ec8b4..cd0b077deb35 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -3796,7 +3796,7 @@ xlog_recover_bud_pass2( * This routine is called when an inode create format structure is found in a * committed transaction in the log. It's purpose is to initialise the inodes * being allocated on disk. This requires us to get inode cluster buffers that - * match the range to be intialised, stamped with inode templates and written + * match the range to be initialised, stamped with inode templates and written * by delayed write so that subsequent modifications will hit the cached buffer * and only need writing out at the end of recovery. */ diff --git a/scripts/spelling.txt b/scripts/spelling.txt index 7b6d25a86202..aeca2c25de32 100644 --- a/scripts/spelling.txt +++ b/scripts/spelling.txt @@ -605,6 +605,9 @@ interruptted||interrupted interupted||interrupted interupt||interrupt intial||initial +intialisation||initialisation +intialised||initialised +intialise||initialise intialization||initialization intialized||initialized intialize||initialize -- cgit v1.2.3 From c718a97514e4d77c97a35734b728aaf541a0621b Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Mon, 8 May 2017 15:58:59 -0700 Subject: fs: semove set but not checked AOP_FLAG_UNINTERRUPTIBLE flag Commit afddba49d18f ("fs: introduce write_begin, write_end, and perform_write aops") introduced AOP_FLAG_UNINTERRUPTIBLE flag which was checked in pagecache_write_begin(), but that check was removed by 4e02ed4b4a2f ("fs: remove prepare_write/commit_write"). Between these two commits, commit d9414774dc0c ("cifs: Convert cifs to new aops.") added a check in cifs_write_begin(), but that check was soon removed by commit a98ee8c1c707 ("[CIFS] fix regression in cifs_write_begin/cifs_write_end"). Therefore, AOP_FLAG_UNINTERRUPTIBLE flag is checked nowhere. Let's remove this flag. This patch has no functionality changes. Link: http://lkml.kernel.org/r/1489294781-53494-1-git-send-email-penguin-kernel@I-love.SAKURA.ne.jp Signed-off-by: Tetsuo Handa Reviewed-by: Jeff Layton Reviewed-by: Christoph Hellwig Cc: Nick Piggin Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/filesystems/vfs.txt | 3 +-- fs/buffer.c | 13 +++++-------- fs/exofs/dir.c | 3 +-- fs/hfs/extent.c | 4 ++-- fs/hfsplus/extents.c | 5 ++--- fs/iomap.c | 13 +++---------- fs/namei.c | 2 +- include/linux/fs.h | 5 ++--- mm/filemap.c | 6 ------ 9 files changed, 17 insertions(+), 37 deletions(-) (limited to 'fs') diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index 94dd27ef4a76..f42b90687d40 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -694,8 +694,7 @@ struct address_space_operations { write_end: After a successful write_begin, and data copy, write_end must be called. len is the original len passed to write_begin, and copied - is the amount that was able to be copied (copied == len is always true - if write_begin was called with the AOP_FLAG_UNINTERRUPTIBLE flag). + is the amount that was able to be copied. The filesystem must take care of unlocking the page and releasing it refcount, and updating i_size. diff --git a/fs/buffer.c b/fs/buffer.c index 9196f2a270da..c3c7455efa3f 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2379,8 +2379,7 @@ int generic_cont_expand_simple(struct inode *inode, loff_t size) goto out; err = pagecache_write_begin(NULL, mapping, size, 0, - AOP_FLAG_UNINTERRUPTIBLE|AOP_FLAG_CONT_EXPAND, - &page, &fsdata); + AOP_FLAG_CONT_EXPAND, &page, &fsdata); if (err) goto out; @@ -2415,9 +2414,8 @@ static int cont_expand_zero(struct file *file, struct address_space *mapping, } len = PAGE_SIZE - zerofrom; - err = pagecache_write_begin(file, mapping, curpos, len, - AOP_FLAG_UNINTERRUPTIBLE, - &page, &fsdata); + err = pagecache_write_begin(file, mapping, curpos, len, 0, + &page, &fsdata); if (err) goto out; zero_user(page, zerofrom, len); @@ -2449,9 +2447,8 @@ static int cont_expand_zero(struct file *file, struct address_space *mapping, } len = offset - zerofrom; - err = pagecache_write_begin(file, mapping, curpos, len, - AOP_FLAG_UNINTERRUPTIBLE, - &page, &fsdata); + err = pagecache_write_begin(file, mapping, curpos, len, 0, + &page, &fsdata); if (err) goto out; zero_user(page, zerofrom, len); diff --git a/fs/exofs/dir.c b/fs/exofs/dir.c index 42f9a0a0c4ca..8eeb694332fe 100644 --- a/fs/exofs/dir.c +++ b/fs/exofs/dir.c @@ -405,8 +405,7 @@ int exofs_set_link(struct inode *dir, struct exofs_dir_entry *de, int err; lock_page(page); - err = exofs_write_begin(NULL, page->mapping, pos, len, - AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); + err = exofs_write_begin(NULL, page->mapping, pos, len, 0, &page, NULL); if (err) EXOFS_ERR("exofs_set_link: exofs_write_begin FAILED => %d\n", err); diff --git a/fs/hfs/extent.c b/fs/hfs/extent.c index e33a0d36a93e..5d0182654580 100644 --- a/fs/hfs/extent.c +++ b/fs/hfs/extent.c @@ -485,8 +485,8 @@ void hfs_file_truncate(struct inode *inode) /* XXX: Can use generic_cont_expand? */ size = inode->i_size - 1; - res = pagecache_write_begin(NULL, mapping, size+1, 0, - AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata); + res = pagecache_write_begin(NULL, mapping, size+1, 0, 0, + &page, &fsdata); if (!res) { res = pagecache_write_end(NULL, mapping, size+1, 0, 0, page, fsdata); diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c index feca524ce2a5..a3eb640b4f8f 100644 --- a/fs/hfsplus/extents.c +++ b/fs/hfsplus/extents.c @@ -545,9 +545,8 @@ void hfsplus_file_truncate(struct inode *inode) void *fsdata; loff_t size = inode->i_size; - res = pagecache_write_begin(NULL, mapping, size, 0, - AOP_FLAG_UNINTERRUPTIBLE, - &page, &fsdata); + res = pagecache_write_begin(NULL, mapping, size, 0, 0, + &page, &fsdata); if (res) return; res = pagecache_write_end(NULL, mapping, size, diff --git a/fs/iomap.c b/fs/iomap.c index 1faabe09b8fd..4b10892967a5 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -158,12 +158,6 @@ iomap_write_actor(struct inode *inode, loff_t pos, loff_t length, void *data, ssize_t written = 0; unsigned int flags = AOP_FLAG_NOFS; - /* - * Copies from kernel address space cannot fail (NFSD is a big user). - */ - if (!iter_is_iovec(i)) - flags |= AOP_FLAG_UNINTERRUPTIBLE; - do { struct page *page; unsigned long offset; /* Offset into pagecache page */ @@ -291,8 +285,7 @@ iomap_dirty_actor(struct inode *inode, loff_t pos, loff_t length, void *data, return PTR_ERR(rpage); status = iomap_write_begin(inode, pos, bytes, - AOP_FLAG_NOFS | AOP_FLAG_UNINTERRUPTIBLE, - &page, iomap); + AOP_FLAG_NOFS, &page, iomap); put_page(rpage); if (unlikely(status)) return status; @@ -343,8 +336,8 @@ static int iomap_zero(struct inode *inode, loff_t pos, unsigned offset, struct page *page; int status; - status = iomap_write_begin(inode, pos, bytes, - AOP_FLAG_UNINTERRUPTIBLE | AOP_FLAG_NOFS, &page, iomap); + status = iomap_write_begin(inode, pos, bytes, AOP_FLAG_NOFS, &page, + iomap); if (status) return status; diff --git a/fs/namei.c b/fs/namei.c index 9a7f8bd748d8..7286f87ce863 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -4766,7 +4766,7 @@ int __page_symlink(struct inode *inode, const char *symname, int len, int nofs) struct page *page; void *fsdata; int err; - unsigned int flags = AOP_FLAG_UNINTERRUPTIBLE; + unsigned int flags = 0; if (nofs) flags |= AOP_FLAG_NOFS; diff --git a/include/linux/fs.h b/include/linux/fs.h index 5d62d2c47939..249dad4e8d26 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -250,9 +250,8 @@ enum positive_aop_returns { AOP_TRUNCATED_PAGE = 0x80001, }; -#define AOP_FLAG_UNINTERRUPTIBLE 0x0001 /* will not do a short write */ -#define AOP_FLAG_CONT_EXPAND 0x0002 /* called from cont_expand */ -#define AOP_FLAG_NOFS 0x0004 /* used by filesystem to direct +#define AOP_FLAG_CONT_EXPAND 0x0001 /* called from cont_expand */ +#define AOP_FLAG_NOFS 0x0002 /* used by filesystem to direct * helper code (eg buffer layer) * to clear GFP_FS from alloc */ diff --git a/mm/filemap.c b/mm/filemap.c index 681da61080bc..b7b973b47d8d 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2791,12 +2791,6 @@ ssize_t generic_perform_write(struct file *file, ssize_t written = 0; unsigned int flags = 0; - /* - * Copies from kernel address space cannot fail (NFSD is a big user). - */ - if (!iter_is_iovec(i)) - flags |= AOP_FLAG_UNINTERRUPTIBLE; - do { struct page *page; unsigned long offset; /* Offset into pagecache page */ -- cgit v1.2.3 From 48fbfe50f1d5fef51bac98d105d2a28df42a1205 Mon Sep 17 00:00:00 2001 From: Deepa Dinamani Date: Mon, 8 May 2017 15:59:10 -0700 Subject: fs: f2fs: use ktime_get_real_seconds for sit_info times CURRENT_TIME_SEC is not y2038 safe. Replace use of CURRENT_TIME_SEC with ktime_get_real_seconds in segment timestamps used by GC algorithm including the segment mtime timestamps. Link: http://lkml.kernel.org/r/1491613030-11599-2-git-send-email-deepa.kernel@gmail.com Signed-off-by: Deepa Dinamani Reviewed-by: Arnd Bergmann Cc: Jaegeuk Kim Cc: Chao Yu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/f2fs/segment.c | 2 +- fs/f2fs/segment.h | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 13806f642ab5..87c962705550 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2573,7 +2573,7 @@ static int build_sit_info(struct f2fs_sb_info *sbi) sit_i->dirty_sentries = 0; sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK; sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time); - sit_i->mounted_time = CURRENT_TIME_SEC.tv_sec; + sit_i->mounted_time = ktime_get_real_seconds(); mutex_init(&sit_i->sentry_lock); return 0; } diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 5e8ad4280a50..313b99040aa6 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -691,8 +691,9 @@ static inline void set_to_next_sit(struct sit_info *sit_i, unsigned int start) static inline unsigned long long get_mtime(struct f2fs_sb_info *sbi) { struct sit_info *sit_i = SIT_I(sbi); - return sit_i->elapsed_time + CURRENT_TIME_SEC.tv_sec - - sit_i->mounted_time; + time64_t now = ktime_get_real_seconds(); + + return sit_i->elapsed_time + now - sit_i->mounted_time; } static inline void set_summary(struct f2fs_summary *sum, nid_t nid, -- cgit v1.2.3 From e37fea58f771c2674709099a09ddafd058fef634 Mon Sep 17 00:00:00 2001 From: Deepa Dinamani Date: Mon, 8 May 2017 15:59:16 -0700 Subject: fs: cifs: replace CURRENT_TIME by other appropriate apis CURRENT_TIME macro is not y2038 safe on 32 bit systems. The patch replaces all the uses of CURRENT_TIME by current_time() for filesystem times, and ktime_get_* functions for authentication timestamps and timezone calculations. This is also in preparation for the patch that transitions vfs timestamps to use 64 bit time and hence make them y2038 safe. CURRENT_TIME macro will be deleted before merging the aforementioned change. The inode timestamps read from the server are assumed to have correct granularity and range. The patch also assumes that the difference between server and client times lie in the range INT_MIN..INT_MAX. This is valid because this is the difference between current times between server and client, and the largest timezone difference is in the range of one day. All cifs timestamps currently use timespec representation internally. Authentication and timezone timestamps can also be transitioned into using timespec64 when all other timestamps for cifs is transitioned to use timespec64. Link: http://lkml.kernel.org/r/1491613030-11599-4-git-send-email-deepa.kernel@gmail.com Signed-off-by: Deepa Dinamani Reviewed-by: Arnd Bergmann Cc: Steve French Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/cifs/cifsencrypt.c | 4 +++- fs/cifs/cifssmb.c | 10 +++++----- fs/cifs/inode.c | 28 +++++++++++++++------------- 3 files changed, 23 insertions(+), 19 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index 058ac9b36f04..68abbb0db608 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c @@ -478,6 +478,7 @@ find_timestamp(struct cifs_ses *ses) unsigned char *blobptr; unsigned char *blobend; struct ntlmssp2_name *attrptr; + struct timespec ts; if (!ses->auth_key.len || !ses->auth_key.response) return 0; @@ -502,7 +503,8 @@ find_timestamp(struct cifs_ses *ses) blobptr += attrsize; /* advance attr value */ } - return cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME)); + ktime_get_real_ts(&ts); + return cpu_to_le64(cifs_UnixTimeToNT(ts)); } static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 205fd94f52fd..4c01b3f9abf0 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -478,14 +478,14 @@ decode_lanman_negprot_rsp(struct TCP_Server_Info *server, NEGOTIATE_RSP *pSMBr) * this requirement. */ int val, seconds, remain, result; - struct timespec ts, utc; - utc = CURRENT_TIME; + struct timespec ts; + unsigned long utc = ktime_get_real_seconds(); ts = cnvrtDosUnixTm(rsp->SrvTime.Date, rsp->SrvTime.Time, 0); cifs_dbg(FYI, "SrvTime %d sec since 1970 (utc: %d) diff: %d\n", - (int)ts.tv_sec, (int)utc.tv_sec, - (int)(utc.tv_sec - ts.tv_sec)); - val = (int)(utc.tv_sec - ts.tv_sec); + (int)ts.tv_sec, (int)utc, + (int)(utc - ts.tv_sec)); + val = (int)(utc - ts.tv_sec); seconds = abs(val); result = (seconds / MIN_TZ_ADJ) * MIN_TZ_ADJ; remain = seconds % MIN_TZ_ADJ; diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index b261db34103c..c3b2fa0b2ec8 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -322,9 +322,9 @@ cifs_create_dfs_fattr(struct cifs_fattr *fattr, struct super_block *sb) fattr->cf_mode = S_IFDIR | S_IXUGO | S_IRWXU; fattr->cf_uid = cifs_sb->mnt_uid; fattr->cf_gid = cifs_sb->mnt_gid; - fattr->cf_atime = CURRENT_TIME; - fattr->cf_ctime = CURRENT_TIME; - fattr->cf_mtime = CURRENT_TIME; + ktime_get_real_ts(&fattr->cf_mtime); + fattr->cf_mtime = timespec_trunc(fattr->cf_mtime, sb->s_time_gran); + fattr->cf_atime = fattr->cf_ctime = fattr->cf_mtime; fattr->cf_nlink = 2; fattr->cf_flags |= CIFS_FATTR_DFS_REFERRAL; } @@ -586,9 +586,10 @@ static int cifs_sfu_mode(struct cifs_fattr *fattr, const unsigned char *path, /* Fill a cifs_fattr struct with info from FILE_ALL_INFO */ static void cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info, - struct cifs_sb_info *cifs_sb, bool adjust_tz, + struct super_block *sb, bool adjust_tz, bool symlink) { + struct cifs_sb_info *cifs_sb = CIFS_SB(sb); struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb); memset(fattr, 0, sizeof(*fattr)); @@ -598,8 +599,10 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info, if (info->LastAccessTime) fattr->cf_atime = cifs_NTtimeToUnix(info->LastAccessTime); - else - fattr->cf_atime = CURRENT_TIME; + else { + ktime_get_real_ts(&fattr->cf_atime); + fattr->cf_atime = timespec_trunc(fattr->cf_atime, sb->s_time_gran); + } fattr->cf_ctime = cifs_NTtimeToUnix(info->ChangeTime); fattr->cf_mtime = cifs_NTtimeToUnix(info->LastWriteTime); @@ -659,7 +662,6 @@ cifs_get_file_info(struct file *filp) FILE_ALL_INFO find_data; struct cifs_fattr fattr; struct inode *inode = file_inode(filp); - struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); struct cifsFileInfo *cfile = filp->private_data; struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); struct TCP_Server_Info *server = tcon->ses->server; @@ -671,7 +673,7 @@ cifs_get_file_info(struct file *filp) rc = server->ops->query_file_info(xid, tcon, &cfile->fid, &find_data); switch (rc) { case 0: - cifs_all_info_to_fattr(&fattr, &find_data, cifs_sb, false, + cifs_all_info_to_fattr(&fattr, &find_data, inode->i_sb, false, false); break; case -EREMOTE: @@ -753,7 +755,7 @@ cifs_get_inode_info(struct inode **inode, const char *full_path, } if (!rc) { - cifs_all_info_to_fattr(&fattr, data, cifs_sb, adjust_tz, + cifs_all_info_to_fattr(&fattr, data, sb, adjust_tz, symlink); } else if (rc == -EREMOTE) { cifs_create_dfs_fattr(&fattr, sb); @@ -1363,9 +1365,9 @@ out_reval: cifs_inode = CIFS_I(inode); cifs_inode->time = 0; /* will force revalidate to get info when needed */ - inode->i_ctime = current_fs_time(sb); + inode->i_ctime = current_time(inode); } - dir->i_ctime = dir->i_mtime = current_fs_time(sb); + dir->i_ctime = dir->i_mtime = current_time(dir); cifs_inode = CIFS_I(dir); CIFS_I(dir)->time = 0; /* force revalidate of dir as well */ unlink_out: @@ -1633,7 +1635,7 @@ int cifs_rmdir(struct inode *inode, struct dentry *direntry) cifsInode->time = 0; d_inode(direntry)->i_ctime = inode->i_ctime = inode->i_mtime = - current_fs_time(inode->i_sb); + current_time(inode); rmdir_exit: kfree(full_path); @@ -1806,7 +1808,7 @@ unlink_target: CIFS_I(source_dir)->time = CIFS_I(target_dir)->time = 0; source_dir->i_ctime = source_dir->i_mtime = target_dir->i_ctime = - target_dir->i_mtime = current_fs_time(source_dir->i_sb); + target_dir->i_mtime = current_time(source_dir); cifs_rename_exit: kfree(info_buf_source); -- cgit v1.2.3 From 1134e091006a61d7ea4c33748b598972d1edc5c4 Mon Sep 17 00:00:00 2001 From: Deepa Dinamani Date: Mon, 8 May 2017 15:59:19 -0700 Subject: fs: ceph: CURRENT_TIME with ktime_get_real_ts() CURRENT_TIME is not y2038 safe. The macro will be deleted and all the references to it will be replaced by ktime_get_* apis. struct timespec is also not y2038 safe. Retain timespec for timestamp representation here as ceph uses it internally everywhere. These references will be changed to use struct timespec64 in a separate patch. The current_fs_time() api is being changed to use vfs struct inode* as an argument instead of struct super_block*. Set the new mds client request r_stamp field using ktime_get_real_ts() instead of using current_fs_time(). Also, since r_stamp is used as mtime on the server, use timespec_trunc() to truncate the timestamp, using the right granularity from the superblock. This api will be transitioned to be y2038 safe along with vfs. Link: http://lkml.kernel.org/r/1491613030-11599-5-git-send-email-deepa.kernel@gmail.com Signed-off-by: Deepa Dinamani Reviewed-by: Arnd Bergmann M: Ilya Dryomov M: "Yan, Zheng" M: Sage Weil Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/rbd.c | 2 +- fs/ceph/mds_client.c | 4 +++- net/ceph/messenger.c | 6 ++++-- net/ceph/osd_client.c | 4 ++-- 4 files changed, 10 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 3670e8dd03fe..26812c1ed0cf 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -1922,7 +1922,7 @@ static void rbd_osd_req_format_write(struct rbd_obj_request *obj_request) { struct ceph_osd_request *osd_req = obj_request->osd_req; - osd_req->r_mtime = CURRENT_TIME; + ktime_get_real_ts(&osd_req->r_mtime); osd_req->r_data_offset = obj_request->offset; } diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index c681762d76e6..1d3fa90d40b9 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1666,6 +1666,7 @@ struct ceph_mds_request * ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode) { struct ceph_mds_request *req = kzalloc(sizeof(*req), GFP_NOFS); + struct timespec ts; if (!req) return ERR_PTR(-ENOMEM); @@ -1684,7 +1685,8 @@ ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode) init_completion(&req->r_safe_completion); INIT_LIST_HEAD(&req->r_unsafe_item); - req->r_stamp = current_fs_time(mdsc->fsc->sb); + ktime_get_real_ts(&ts); + req->r_stamp = timespec_trunc(ts, mdsc->fsc->sb->s_time_gran); req->r_op = op; req->r_direct_mode = mode; diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index f76bb3332613..5766a6c896c4 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -1386,8 +1386,9 @@ static void prepare_write_keepalive(struct ceph_connection *con) dout("prepare_write_keepalive %p\n", con); con_out_kvec_reset(con); if (con->peer_features & CEPH_FEATURE_MSGR_KEEPALIVE2) { - struct timespec now = CURRENT_TIME; + struct timespec now; + ktime_get_real_ts(&now); con_out_kvec_add(con, sizeof(tag_keepalive2), &tag_keepalive2); ceph_encode_timespec(&con->out_temp_keepalive2, &now); con_out_kvec_add(con, sizeof(con->out_temp_keepalive2), @@ -3176,8 +3177,9 @@ bool ceph_con_keepalive_expired(struct ceph_connection *con, { if (interval > 0 && (con->peer_features & CEPH_FEATURE_MSGR_KEEPALIVE2)) { - struct timespec now = CURRENT_TIME; + struct timespec now; struct timespec ts; + ktime_get_real_ts(&now); jiffies_to_timespec(interval, &ts); ts = timespec_add(con->last_keepalive_ack, ts); return timespec_compare(&now, &ts) >= 0; diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index e15ea9e4c495..242d7c0d92f8 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -3574,7 +3574,7 @@ ceph_osdc_watch(struct ceph_osd_client *osdc, ceph_oid_copy(&lreq->t.base_oid, oid); ceph_oloc_copy(&lreq->t.base_oloc, oloc); lreq->t.flags = CEPH_OSD_FLAG_WRITE; - lreq->mtime = CURRENT_TIME; + ktime_get_real_ts(&lreq->mtime); lreq->reg_req = alloc_linger_request(lreq); if (!lreq->reg_req) { @@ -3632,7 +3632,7 @@ int ceph_osdc_unwatch(struct ceph_osd_client *osdc, ceph_oid_copy(&req->r_base_oid, &lreq->t.base_oid); ceph_oloc_copy(&req->r_base_oloc, &lreq->t.base_oloc); req->r_flags = CEPH_OSD_FLAG_WRITE; - req->r_mtime = CURRENT_TIME; + ktime_get_real_ts(&req->r_mtime); osd_req_op_watch_init(req, 0, lreq->linger_id, CEPH_OSD_WATCH_OP_UNWATCH); -- cgit v1.2.3 From a88e99e976582814cf73acd04134f52a620f3416 Mon Sep 17 00:00:00 2001 From: Deepa Dinamani Date: Mon, 8 May 2017 15:59:22 -0700 Subject: fs: ufs: use ktime_get_real_ts64() for birthtime CURRENT_TIME is not y2038 safe. Replace it with ktime_get_real_ts64(). Inode time formats are already 64 bit long and accommodates time64_t. Link: http://lkml.kernel.org/r/1491613030-11599-6-git-send-email-deepa.kernel@gmail.com Signed-off-by: Deepa Dinamani Cc: Evgeniy Dushistov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ufs/ialloc.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c index 9774555b3721..d1dd8cc33179 100644 --- a/fs/ufs/ialloc.c +++ b/fs/ufs/ialloc.c @@ -176,6 +176,7 @@ struct inode *ufs_new_inode(struct inode *dir, umode_t mode) struct ufs_cg_private_info * ucpi; struct ufs_cylinder_group * ucg; struct inode * inode; + struct timespec64 ts; unsigned cg, bit, i, j, start; struct ufs_inode_info *ufsi; int err = -ENOSPC; @@ -323,8 +324,9 @@ cg_found: lock_buffer(bh); ufs2_inode = (struct ufs2_inode *)bh->b_data; ufs2_inode += ufs_inotofsbo(inode->i_ino); - ufs2_inode->ui_birthtime = cpu_to_fs64(sb, CURRENT_TIME.tv_sec); - ufs2_inode->ui_birthnsec = cpu_to_fs32(sb, CURRENT_TIME.tv_nsec); + ktime_get_real_ts64(&ts); + ufs2_inode->ui_birthtime = cpu_to_fs64(sb, ts.tv_sec); + ufs2_inode->ui_birthnsec = cpu_to_fs32(sb, ts.tv_nsec); mark_buffer_dirty(bh); unlock_buffer(bh); if (sb->s_flags & MS_SYNCHRONOUS) -- cgit v1.2.3 From 607a11ad947794d0f4f2c0f73c654876d1abb9b1 Mon Sep 17 00:00:00 2001 From: Deepa Dinamani Date: Mon, 8 May 2017 15:59:25 -0700 Subject: fs: ubifs: replace CURRENT_TIME_SEC with current_time CURRENT_TIME_SEC is not y2038 safe. current_time() will be transitioned to use 64 bit time along with vfs in a separate patch. There is no plan to transition CURRENT_TIME_SEC to use y2038 safe time interfaces. current_time() returns timestamps according to the granularities set in the inode's super_block. The granularity check to call current_fs_time() or CURRENT_TIME_SEC is not required. Use current_time() directly to update inode timestamp. Use timespec_trunc during file system creation, before the first inode is created. Link: http://lkml.kernel.org/r/1491613030-11599-9-git-send-email-deepa.kernel@gmail.com Signed-off-by: Deepa Dinamani Reviewed-by: Arnd Bergmann Cc: Richard Weinberger Cc: Artem Bityutskiy Cc: Adrian Hunter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ubifs/dir.c | 12 ++++++------ fs/ubifs/file.c | 12 ++++++------ fs/ubifs/ioctl.c | 2 +- fs/ubifs/misc.h | 10 ---------- fs/ubifs/sb.c | 14 ++++++++++---- fs/ubifs/xattr.c | 6 +++--- 6 files changed, 26 insertions(+), 30 deletions(-) (limited to 'fs') diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index b777bddaa1dd..19fcc9a3364e 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -121,7 +121,7 @@ struct inode *ubifs_new_inode(struct ubifs_info *c, struct inode *dir, inode_init_owner(inode, dir, mode); inode->i_mtime = inode->i_atime = inode->i_ctime = - ubifs_current_time(inode); + current_time(inode); inode->i_mapping->nrpages = 0; switch (mode & S_IFMT) { @@ -755,7 +755,7 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir, inc_nlink(inode); ihold(inode); - inode->i_ctime = ubifs_current_time(inode); + inode->i_ctime = current_time(inode); dir->i_size += sz_change; dir_ui->ui_size = dir->i_size; dir->i_mtime = dir->i_ctime = inode->i_ctime; @@ -830,7 +830,7 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry) } lock_2_inodes(dir, inode); - inode->i_ctime = ubifs_current_time(dir); + inode->i_ctime = current_time(dir); drop_nlink(inode); dir->i_size -= sz_change; dir_ui->ui_size = dir->i_size; @@ -934,7 +934,7 @@ static int ubifs_rmdir(struct inode *dir, struct dentry *dentry) } lock_2_inodes(dir, inode); - inode->i_ctime = ubifs_current_time(dir); + inode->i_ctime = current_time(dir); clear_nlink(inode); drop_nlink(dir); dir->i_size -= sz_change; @@ -1411,7 +1411,7 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry, * Like most other Unix systems, set the @i_ctime for inodes on a * rename. */ - time = ubifs_current_time(old_dir); + time = current_time(old_dir); old_inode->i_ctime = time; /* We must adjust parent link count when renaming directories */ @@ -1584,7 +1584,7 @@ static int ubifs_xrename(struct inode *old_dir, struct dentry *old_dentry, lock_4_inodes(old_dir, new_dir, NULL, NULL); - time = ubifs_current_time(old_dir); + time = current_time(old_dir); fst_inode->i_ctime = time; snd_inode->i_ctime = time; old_dir->i_mtime = old_dir->i_ctime = time; diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index d9ae86f96df7..2cda3d67e2d0 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -1196,7 +1196,7 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode, mutex_lock(&ui->ui_mutex); ui->ui_size = inode->i_size; /* Truncation changes inode [mc]time */ - inode->i_mtime = inode->i_ctime = ubifs_current_time(inode); + inode->i_mtime = inode->i_ctime = current_time(inode); /* Other attributes may be changed at the same time as well */ do_attr_changes(inode, attr); err = ubifs_jnl_truncate(c, inode, old_size, new_size); @@ -1243,7 +1243,7 @@ static int do_setattr(struct ubifs_info *c, struct inode *inode, mutex_lock(&ui->ui_mutex); if (attr->ia_valid & ATTR_SIZE) { /* Truncation changes inode [mc]time */ - inode->i_mtime = inode->i_ctime = ubifs_current_time(inode); + inode->i_mtime = inode->i_ctime = current_time(inode); /* 'truncate_setsize()' changed @i_size, update @ui_size */ ui->ui_size = inode->i_size; } @@ -1420,7 +1420,7 @@ int ubifs_update_time(struct inode *inode, struct timespec *time, */ static int update_mctime(struct inode *inode) { - struct timespec now = ubifs_current_time(inode); + struct timespec now = current_time(inode); struct ubifs_inode *ui = ubifs_inode(inode); struct ubifs_info *c = inode->i_sb->s_fs_info; @@ -1434,7 +1434,7 @@ static int update_mctime(struct inode *inode) return err; mutex_lock(&ui->ui_mutex); - inode->i_mtime = inode->i_ctime = ubifs_current_time(inode); + inode->i_mtime = inode->i_ctime = current_time(inode); release = ui->dirty; mark_inode_dirty_sync(inode); mutex_unlock(&ui->ui_mutex); @@ -1511,7 +1511,7 @@ static int ubifs_vm_page_mkwrite(struct vm_fault *vmf) struct page *page = vmf->page; struct inode *inode = file_inode(vmf->vma->vm_file); struct ubifs_info *c = inode->i_sb->s_fs_info; - struct timespec now = ubifs_current_time(inode); + struct timespec now = current_time(inode); struct ubifs_budget_req req = { .new_page = 1 }; int err, update_time; @@ -1579,7 +1579,7 @@ static int ubifs_vm_page_mkwrite(struct vm_fault *vmf) struct ubifs_inode *ui = ubifs_inode(inode); mutex_lock(&ui->ui_mutex); - inode->i_mtime = inode->i_ctime = ubifs_current_time(inode); + inode->i_mtime = inode->i_ctime = current_time(inode); release = ui->dirty; mark_inode_dirty_sync(inode); mutex_unlock(&ui->ui_mutex); diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c index da519ba205f6..12b9eb5005ff 100644 --- a/fs/ubifs/ioctl.c +++ b/fs/ubifs/ioctl.c @@ -126,7 +126,7 @@ static int setflags(struct inode *inode, int flags) ui->flags = ioctl2ubifs(flags); ubifs_set_inode_flags(inode); - inode->i_ctime = ubifs_current_time(inode); + inode->i_ctime = current_time(inode); release = ui->dirty; mark_inode_dirty_sync(inode); mutex_unlock(&ui->ui_mutex); diff --git a/fs/ubifs/misc.h b/fs/ubifs/misc.h index 8ece6ca58c0b..caf83d68fb38 100644 --- a/fs/ubifs/misc.h +++ b/fs/ubifs/misc.h @@ -224,16 +224,6 @@ static inline void *ubifs_idx_key(const struct ubifs_info *c, return (void *)((struct ubifs_branch *)idx->branches)->key; } -/** - * ubifs_current_time - round current time to time granularity. - * @inode: inode - */ -static inline struct timespec ubifs_current_time(struct inode *inode) -{ - return (inode->i_sb->s_time_gran < NSEC_PER_SEC) ? - current_fs_time(inode->i_sb) : CURRENT_TIME_SEC; -} - /** * ubifs_tnc_lookup - look up a file-system node. * @c: UBIFS file-system description object diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c index 7f1ead29e727..8c25081a5109 100644 --- a/fs/ubifs/sb.c +++ b/fs/ubifs/sb.c @@ -84,6 +84,8 @@ static int create_default_filesystem(struct ubifs_info *c) int min_leb_cnt = UBIFS_MIN_LEB_CNT; long long tmp64, main_bytes; __le64 tmp_le64; + __le32 tmp_le32; + struct timespec ts; /* Some functions called from here depend on the @c->key_len filed */ c->key_len = UBIFS_SK_LEN; @@ -298,13 +300,17 @@ static int create_default_filesystem(struct ubifs_info *c) ino->ch.node_type = UBIFS_INO_NODE; ino->creat_sqnum = cpu_to_le64(++c->max_sqnum); ino->nlink = cpu_to_le32(2); - tmp_le64 = cpu_to_le64(CURRENT_TIME_SEC.tv_sec); + + ktime_get_real_ts(&ts); + ts = timespec_trunc(ts, DEFAULT_TIME_GRAN); + tmp_le64 = cpu_to_le64(ts.tv_sec); ino->atime_sec = tmp_le64; ino->ctime_sec = tmp_le64; ino->mtime_sec = tmp_le64; - ino->atime_nsec = 0; - ino->ctime_nsec = 0; - ino->mtime_nsec = 0; + tmp_le32 = cpu_to_le32(ts.tv_nsec); + ino->atime_nsec = tmp_le32; + ino->ctime_nsec = tmp_le32; + ino->mtime_nsec = tmp_le32; ino->mode = cpu_to_le32(S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO); ino->size = cpu_to_le64(UBIFS_INO_NODE_SZ); diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c index efe00fcb8b75..3e53fdbf7997 100644 --- a/fs/ubifs/xattr.c +++ b/fs/ubifs/xattr.c @@ -152,7 +152,7 @@ static int create_xattr(struct ubifs_info *c, struct inode *host, ui->data_len = size; mutex_lock(&host_ui->ui_mutex); - host->i_ctime = ubifs_current_time(host); + host->i_ctime = current_time(host); host_ui->xattr_cnt += 1; host_ui->xattr_size += CALC_DENT_SIZE(fname_len(nm)); host_ui->xattr_size += CALC_XATTR_BYTES(size); @@ -234,7 +234,7 @@ static int change_xattr(struct ubifs_info *c, struct inode *host, mutex_unlock(&ui->ui_mutex); mutex_lock(&host_ui->ui_mutex); - host->i_ctime = ubifs_current_time(host); + host->i_ctime = current_time(host); host_ui->xattr_size -= CALC_XATTR_BYTES(old_size); host_ui->xattr_size += CALC_XATTR_BYTES(size); @@ -488,7 +488,7 @@ static int remove_xattr(struct ubifs_info *c, struct inode *host, return err; mutex_lock(&host_ui->ui_mutex); - host->i_ctime = ubifs_current_time(host); + host->i_ctime = current_time(host); host_ui->xattr_cnt -= 1; host_ui->xattr_size -= CALC_DENT_SIZE(fname_len(nm)); host_ui->xattr_size -= CALC_XATTR_BYTES(ui->data_len); -- cgit v1.2.3 From b32c8c7648d2fa6ed689fc688ed74baa22f12ca0 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Mon, 8 May 2017 15:59:34 -0700 Subject: gfs2: replace CURRENT_TIME with current_time Link: http://lkml.kernel.org/r/20170420161852.0492bc3f@canb.auug.org.au Signed-off-by: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/gfs2/bmap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 3814a60e0aea..4d810be532dd 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -1072,7 +1072,7 @@ out_unlock: /* Every transaction boundary, we rewrite the dinode to keep its di_blocks current in case of failure. */ ip->i_inode.i_mtime = ip->i_inode.i_ctime = - CURRENT_TIME; + current_time(&ip->i_inode); gfs2_trans_add_meta(ip->i_gl, dibh); gfs2_dinode_out(ip, dibh->b_data); up_write(&ip->i_rw_mutex); @@ -1293,7 +1293,7 @@ static int trunc_dealloc(struct gfs2_inode *ip, u64 newsize) gfs2_statfs_change(sdp, 0, +btotal, 0); gfs2_quota_change(ip, -(s64)btotal, ip->i_inode.i_uid, ip->i_inode.i_gid); - ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; + ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode); gfs2_trans_add_meta(ip->i_gl, dibh); gfs2_dinode_out(ip, dibh->b_data); up_write(&ip->i_rw_mutex); -- cgit v1.2.3 From a9c42b33ed80968dd160e3be48c7e84ccf171cf9 Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Mon, 8 May 2017 16:00:00 -0700 Subject: dax: add tracepoints to dax_iomap_pte_fault() Patch series "second round of tracepoints for DAX". This second round of DAX tracepoint patches adds tracing to the PTE fault path (dax_iomap_pte_fault(), dax_pfn_mkwrite(), dax_load_hole(), dax_insert_mapping()) and to the writeback path (dax_writeback_mapping_range(), dax_writeback_one()). The purpose of this tracing is to give us a high level view of what DAX is doing, whether faults are being serviced by PMDs or PTEs, and by real storage or by zero pages covering holes. I do have some patches nearly ready which also add tracing to grab_mapping_entry() and dax_insert_mapping_entry(). These are more targeted at logging how we are interacting with the radix tree, how we use empty entries for locking, whether we "downgrade" huge zero pages to 4k PTE sized allocations, etc. In the end it seemed to me that this might be too detailed to have as constantly present tracepoints, but if anyone sees value in having tracepoints like this in the DAX code permanently (Jan?), please let me know and I'll add those last two patches. All these tracepoints were done to be consistent with the style of the XFS tracepoints and with the existing DAX PMD tracepoints. This patch (of 6): Add tracepoints to dax_iomap_pte_fault(), following the same logging conventions as the rest of DAX. Here is an example fault that initially tries to be serviced by the PMD fault handler but which falls back to PTEs because the VMA isn't large enough to hold a PMD: small-1086 [005] .... 71.140014: xfs_filemap_huge_fault: dev 259:0 ino 0x1003 small-1086 [005] .... 71.140027: dax_pmd_fault: dev 259:0 ino 0x1003 shared WRITE|ALLOW_RETRY|KILLABLE|USER address 0x10420000 vm_start 0x10200000 vm_end 0x10500000 pgoff 0x220 max_pgoff 0x1400 small-1086 [005] .... 71.140028: dax_pmd_fault_done: dev 259:0 ino 0x1003 shared WRITE|ALLOW_RETRY|KILLABLE|USER address 0x10420000 vm_start 0x10200000 vm_end 0x10500000 pgoff 0x220 max_pgoff 0x1400 FALLBACK small-1086 [005] .... 71.140035: dax_pte_fault: dev 259:0 ino 0x1003 shared WRITE|ALLOW_RETRY|KILLABLE|USER address 0x10420000 pgoff 0x220 small-1086 [005] .... 71.140396: dax_pte_fault_done: dev 259:0 ino 0x1003 shared WRITE|ALLOW_RETRY|KILLABLE|USER address 0x10420000 pgoff 0x220 MAJOR|NOPAGE Link: http://lkml.kernel.org/r/20170221195116.13278-2-ross.zwisler@linux.intel.com Signed-off-by: Ross Zwisler Reviewed-by: Jan Kara Cc: Alexander Viro Cc: Dan Williams Cc: Ingo Molnar Cc: Matthew Wilcox Cc: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/dax.c | 15 +++++++++++---- include/trace/events/fs_dax.h | 41 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/dax.c b/fs/dax.c index 43bbd6d1037d..f6c32d831af6 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1150,13 +1150,16 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, int vmf_ret = 0; void *entry; + trace_dax_pte_fault(inode, vmf, vmf_ret); /* * Check whether offset isn't beyond end of file now. Caller is supposed * to hold locks serializing us with truncate / punch hole so this is * a reliable test. */ - if (pos >= i_size_read(inode)) - return VM_FAULT_SIGBUS; + if (pos >= i_size_read(inode)) { + vmf_ret = VM_FAULT_SIGBUS; + goto out; + } if ((vmf->flags & FAULT_FLAG_WRITE) && !vmf->cow_page) flags |= IOMAP_WRITE; @@ -1167,8 +1170,10 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, * that we never have to deal with more than a single extent here. */ error = ops->iomap_begin(inode, pos, PAGE_SIZE, flags, &iomap); - if (error) - return dax_fault_return(error); + if (error) { + vmf_ret = dax_fault_return(error); + goto out; + } if (WARN_ON_ONCE(iomap.offset + iomap.length < pos + PAGE_SIZE)) { vmf_ret = dax_fault_return(-EIO); /* fs corruption? */ goto finish_iomap; @@ -1252,6 +1257,8 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, */ ops->iomap_end(inode, pos, PAGE_SIZE, copied, flags, &iomap); } +out: + trace_dax_pte_fault_done(inode, vmf, vmf_ret); return vmf_ret; } diff --git a/include/trace/events/fs_dax.h b/include/trace/events/fs_dax.h index c566ddc87f73..cbcd7d64a18d 100644 --- a/include/trace/events/fs_dax.h +++ b/include/trace/events/fs_dax.h @@ -150,6 +150,47 @@ DEFINE_EVENT(dax_pmd_insert_mapping_class, name, \ DEFINE_PMD_INSERT_MAPPING_EVENT(dax_pmd_insert_mapping); DEFINE_PMD_INSERT_MAPPING_EVENT(dax_pmd_insert_mapping_fallback); +DECLARE_EVENT_CLASS(dax_pte_fault_class, + TP_PROTO(struct inode *inode, struct vm_fault *vmf, int result), + TP_ARGS(inode, vmf, result), + TP_STRUCT__entry( + __field(unsigned long, ino) + __field(unsigned long, vm_flags) + __field(unsigned long, address) + __field(pgoff_t, pgoff) + __field(dev_t, dev) + __field(unsigned int, flags) + __field(int, result) + ), + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->vm_flags = vmf->vma->vm_flags; + __entry->address = vmf->address; + __entry->flags = vmf->flags; + __entry->pgoff = vmf->pgoff; + __entry->result = result; + ), + TP_printk("dev %d:%d ino %#lx %s %s address %#lx pgoff %#lx %s", + MAJOR(__entry->dev), + MINOR(__entry->dev), + __entry->ino, + __entry->vm_flags & VM_SHARED ? "shared" : "private", + __print_flags(__entry->flags, "|", FAULT_FLAG_TRACE), + __entry->address, + __entry->pgoff, + __print_flags(__entry->result, "|", VM_FAULT_RESULT_TRACE) + ) +) + +#define DEFINE_PTE_FAULT_EVENT(name) \ +DEFINE_EVENT(dax_pte_fault_class, name, \ + TP_PROTO(struct inode *inode, struct vm_fault *vmf, int result), \ + TP_ARGS(inode, vmf, result)) + +DEFINE_PTE_FAULT_EVENT(dax_pte_fault); +DEFINE_PTE_FAULT_EVENT(dax_pte_fault_done); + #endif /* _TRACE_FS_DAX_H */ /* This part must be outside protection */ -- cgit v1.2.3 From c3ff68d7d1e6a24b7ad76d00ee583929858d4001 Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Mon, 8 May 2017 16:00:03 -0700 Subject: dax: add tracepoints to dax_pfn_mkwrite() Add tracepoints to dax_pfn_mkwrite(), following the same logging conventions as the rest of DAX. Here is an example PTE fault followed by a pfn_mkwrite: small_aligned-1094 [002] .... 374.084998: dax_pte_fault: dev 259:0 ino 0x1003 shared WRITE|ALLOW_RETRY|KILLABLE|USER address 0x10400000 pgoff 0x200 small_aligned-1094 [002] .... 374.085145: dax_pte_fault_done: dev 259:0 ino 0x1003 shared WRITE|ALLOW_RETRY|KILLABLE|USER address 0x10400000 pgoff 0x200 MAJOR|NOPAGE small_aligned-1094 [002] .... 374.085165: dax_pfn_mkwrite: dev 259:0 ino 0x1003 shared WRITE|MKWRITE|ALLOW_RETRY|KILLABLE|USER address 0x10400000 pgoff 0x200 NOPAGE Link: http://lkml.kernel.org/r/20170221195116.13278-3-ross.zwisler@linux.intel.com Signed-off-by: Ross Zwisler Reviewed-by: Jan Kara Cc: Alexander Viro Cc: Dan Williams Cc: Ingo Molnar Cc: Matthew Wilcox Cc: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/dax.c | 3 +++ include/trace/events/fs_dax.h | 2 ++ 2 files changed, 5 insertions(+) (limited to 'fs') diff --git a/fs/dax.c b/fs/dax.c index f6c32d831af6..d10524ab7e55 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -927,6 +927,7 @@ int dax_pfn_mkwrite(struct vm_fault *vmf) { struct file *file = vmf->vma->vm_file; struct address_space *mapping = file->f_mapping; + struct inode *inode = mapping->host; void *entry, **slot; pgoff_t index = vmf->pgoff; @@ -936,6 +937,7 @@ int dax_pfn_mkwrite(struct vm_fault *vmf) if (entry) put_unlocked_mapping_entry(mapping, index, entry); spin_unlock_irq(&mapping->tree_lock); + trace_dax_pfn_mkwrite_no_entry(inode, vmf, VM_FAULT_NOPAGE); return VM_FAULT_NOPAGE; } radix_tree_tag_set(&mapping->page_tree, index, PAGECACHE_TAG_DIRTY); @@ -948,6 +950,7 @@ int dax_pfn_mkwrite(struct vm_fault *vmf) */ finish_mkwrite_fault(vmf); put_locked_mapping_entry(mapping, index, entry); + trace_dax_pfn_mkwrite(inode, vmf, VM_FAULT_NOPAGE); return VM_FAULT_NOPAGE; } EXPORT_SYMBOL_GPL(dax_pfn_mkwrite); diff --git a/include/trace/events/fs_dax.h b/include/trace/events/fs_dax.h index cbcd7d64a18d..b5a520961f8d 100644 --- a/include/trace/events/fs_dax.h +++ b/include/trace/events/fs_dax.h @@ -190,6 +190,8 @@ DEFINE_EVENT(dax_pte_fault_class, name, \ DEFINE_PTE_FAULT_EVENT(dax_pte_fault); DEFINE_PTE_FAULT_EVENT(dax_pte_fault_done); +DEFINE_PTE_FAULT_EVENT(dax_pfn_mkwrite_no_entry); +DEFINE_PTE_FAULT_EVENT(dax_pfn_mkwrite); #endif /* _TRACE_FS_DAX_H */ -- cgit v1.2.3 From 678c9fd0430a1431bd9901c76f41e04fcb3eac87 Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Mon, 8 May 2017 16:00:07 -0700 Subject: dax: add tracepoints to dax_load_hole() Add tracepoints to dax_load_hole(), following the same logging conventions as the rest of DAX. Here is the logging generated by a PTE read from a hole: read-1075 [002] .... 62.362108: dax_pte_fault: dev 259:0 ino 0x1003 shared ALLOW_RETRY|KILLABLE|USER address 0x10480000 pgoff 0x280 read-1075 [002] .... 62.362140: dax_load_hole: dev 259:0 ino 0x1003 shared ALLOW_RETRY|KILLABLE|USER address 0x10480000 pgoff 0x280 NOPAGE read-1075 [002] .... 62.362141: dax_pte_fault_done: dev 259:0 ino 0x1003 shared ALLOW_RETRY|KILLABLE|USER address 0x10480000 pgoff 0x280 NOPAGE Link: http://lkml.kernel.org/r/20170221195116.13278-4-ross.zwisler@linux.intel.com Signed-off-by: Ross Zwisler Reviewed-by: Jan Kara Cc: Alexander Viro Cc: Dan Williams Cc: Ingo Molnar Cc: Matthew Wilcox Cc: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/dax.c | 16 +++++++++++----- include/trace/events/fs_dax.h | 1 + 2 files changed, 12 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/dax.c b/fs/dax.c index d10524ab7e55..36fafff2e82f 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -509,21 +509,25 @@ int dax_invalidate_mapping_entry_sync(struct address_space *mapping, static int dax_load_hole(struct address_space *mapping, void **entry, struct vm_fault *vmf) { + struct inode *inode = mapping->host; struct page *page; int ret; /* Hole page already exists? Return it... */ if (!radix_tree_exceptional_entry(*entry)) { page = *entry; - goto out; + goto finish_fault; } /* This will replace locked radix tree entry with a hole page */ page = find_or_create_page(mapping, vmf->pgoff, vmf->gfp_mask | __GFP_ZERO); - if (!page) - return VM_FAULT_OOM; - out: + if (!page) { + ret = VM_FAULT_OOM; + goto out; + } + +finish_fault: vmf->page = page; ret = finish_fault(vmf); vmf->page = NULL; @@ -531,8 +535,10 @@ static int dax_load_hole(struct address_space *mapping, void **entry, if (!ret) { /* Grab reference for PTE that is now referencing the page */ get_page(page); - return VM_FAULT_NOPAGE; + ret = VM_FAULT_NOPAGE; } +out: + trace_dax_load_hole(inode, vmf, ret); return ret; } diff --git a/include/trace/events/fs_dax.h b/include/trace/events/fs_dax.h index b5a520961f8d..2f15dfea7fb1 100644 --- a/include/trace/events/fs_dax.h +++ b/include/trace/events/fs_dax.h @@ -192,6 +192,7 @@ DEFINE_PTE_FAULT_EVENT(dax_pte_fault); DEFINE_PTE_FAULT_EVENT(dax_pte_fault_done); DEFINE_PTE_FAULT_EVENT(dax_pfn_mkwrite_no_entry); DEFINE_PTE_FAULT_EVENT(dax_pfn_mkwrite); +DEFINE_PTE_FAULT_EVENT(dax_load_hole); #endif /* _TRACE_FS_DAX_H */ -- cgit v1.2.3 From d14a3f48a152b75a1e690d443f509c07c7b06c0e Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Mon, 8 May 2017 16:00:10 -0700 Subject: dax: add tracepoints to dax_writeback_mapping_range() Add tracepoints to dax_writeback_mapping_range(), following the same logging conventions as the rest of DAX. Here is an example writeback call: msync-1085 [006] .... 200.902565: dax_writeback_range: dev 259:0 ino 0x1003 pgoff 0x200-0x2ff msync-1085 [006] .... 200.902579: dax_writeback_range_done: dev 259:0 ino 0x1003 pgoff 0x200-0x2ff [ross.zwisler@linux.intel.com: fix regression in dax_writeback_mapping_range()] Link: http://lkml.kernel.org/r/20170314215358.31451-1-ross.zwisler@linux.intel.com Link: http://lkml.kernel.org/r/20170221195116.13278-5-ross.zwisler@linux.intel.com Signed-off-by: Ross Zwisler Reviewed-by: Jan Kara Cc: Alexander Viro Cc: Dan Williams Cc: Ingo Molnar Cc: Matthew Wilcox Cc: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/dax.c | 12 +++++++----- include/trace/events/fs_dax.h | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/dax.c b/fs/dax.c index 36fafff2e82f..7cf2686761e2 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -863,6 +863,8 @@ int dax_writeback_mapping_range(struct address_space *mapping, start_index = wbc->range_start >> PAGE_SHIFT; end_index = wbc->range_end >> PAGE_SHIFT; + trace_dax_writeback_range(inode, start_index, end_index); + tag_pages_for_writeback(mapping, start_index, end_index); pagevec_init(&pvec, 0); @@ -882,14 +884,14 @@ int dax_writeback_mapping_range(struct address_space *mapping, ret = dax_writeback_one(bdev, dax_dev, mapping, indices[i], pvec.pages[i]); - if (ret < 0) { - put_dax(dax_dev); - return ret; - } + if (ret < 0) + goto out; } } +out: put_dax(dax_dev); - return 0; + trace_dax_writeback_range_done(inode, start_index, end_index); + return (ret < 0 ? ret : 0); } EXPORT_SYMBOL_GPL(dax_writeback_mapping_range); diff --git a/include/trace/events/fs_dax.h b/include/trace/events/fs_dax.h index 2f15dfea7fb1..9afe8c8f0bef 100644 --- a/include/trace/events/fs_dax.h +++ b/include/trace/events/fs_dax.h @@ -194,6 +194,38 @@ DEFINE_PTE_FAULT_EVENT(dax_pfn_mkwrite_no_entry); DEFINE_PTE_FAULT_EVENT(dax_pfn_mkwrite); DEFINE_PTE_FAULT_EVENT(dax_load_hole); +DECLARE_EVENT_CLASS(dax_writeback_range_class, + TP_PROTO(struct inode *inode, pgoff_t start_index, pgoff_t end_index), + TP_ARGS(inode, start_index, end_index), + TP_STRUCT__entry( + __field(unsigned long, ino) + __field(pgoff_t, start_index) + __field(pgoff_t, end_index) + __field(dev_t, dev) + ), + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->start_index = start_index; + __entry->end_index = end_index; + ), + TP_printk("dev %d:%d ino %#lx pgoff %#lx-%#lx", + MAJOR(__entry->dev), + MINOR(__entry->dev), + __entry->ino, + __entry->start_index, + __entry->end_index + ) +) + +#define DEFINE_WRITEBACK_RANGE_EVENT(name) \ +DEFINE_EVENT(dax_writeback_range_class, name, \ + TP_PROTO(struct inode *inode, pgoff_t start_index, pgoff_t end_index),\ + TP_ARGS(inode, start_index, end_index)) + +DEFINE_WRITEBACK_RANGE_EVENT(dax_writeback_range); +DEFINE_WRITEBACK_RANGE_EVENT(dax_writeback_range_done); + #endif /* _TRACE_FS_DAX_H */ /* This part must be outside protection */ -- cgit v1.2.3 From f9bc3a07539bc80b4da9ff2f5d6c13d5c7a4f073 Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Mon, 8 May 2017 16:00:13 -0700 Subject: dax: add tracepoint to dax_writeback_one() Add a tracepoint to dax_writeback_one(), following the same logging conventions as the rest of DAX. Here is an example range writeback which ends up flushing one PMD and one PTE: test-1265 [003] .... 496.615250: dax_writeback_range: dev 259:0 ino 0x1003 pgoff 0x0-0x7ffffffffffff test-1265 [003] .... 496.616263: dax_writeback_one: dev 259:0 ino 0x1003 pgoff 0x0 pglen 0x200 test-1265 [003] .... 496.616270: dax_writeback_one: dev 259:0 ino 0x1003 pgoff 0x305 pglen 0x1 test-1265 [003] .... 496.616272: dax_writeback_range_done: dev 259:0 ino 0x1003 pgoff 0x0-0x7ffffffffffff [akpm@linux-foundation.org: struct blk_dax_ctl has disappeared] Link: http://lkml.kernel.org/r/20170221195116.13278-6-ross.zwisler@linux.intel.com Signed-off-by: Ross Zwisler Reviewed-by: Jan Kara Cc: Alexander Viro Cc: Dan Williams Cc: Ingo Molnar Cc: Matthew Wilcox Cc: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/dax.c | 1 + include/trace/events/fs_dax.h | 24 ++++++++++++++++++++++++ 2 files changed, 25 insertions(+) (limited to 'fs') diff --git a/fs/dax.c b/fs/dax.c index 7cf2686761e2..9bec30e06211 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -823,6 +823,7 @@ static int dax_writeback_one(struct block_device *bdev, spin_lock_irq(&mapping->tree_lock); radix_tree_tag_clear(page_tree, index, PAGECACHE_TAG_DIRTY); spin_unlock_irq(&mapping->tree_lock); + trace_dax_writeback_one(mapping->host, index, size >> PAGE_SHIFT); dax_unlock: dax_read_unlock(id); put_locked_mapping_entry(mapping, index, entry); diff --git a/include/trace/events/fs_dax.h b/include/trace/events/fs_dax.h index 9afe8c8f0bef..292a4719edd0 100644 --- a/include/trace/events/fs_dax.h +++ b/include/trace/events/fs_dax.h @@ -226,6 +226,30 @@ DEFINE_EVENT(dax_writeback_range_class, name, \ DEFINE_WRITEBACK_RANGE_EVENT(dax_writeback_range); DEFINE_WRITEBACK_RANGE_EVENT(dax_writeback_range_done); +TRACE_EVENT(dax_writeback_one, + TP_PROTO(struct inode *inode, pgoff_t pgoff, pgoff_t pglen), + TP_ARGS(inode, pgoff, pglen), + TP_STRUCT__entry( + __field(unsigned long, ino) + __field(pgoff_t, pgoff) + __field(pgoff_t, pglen) + __field(dev_t, dev) + ), + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->pgoff = pgoff; + __entry->pglen = pglen; + ), + TP_printk("dev %d:%d ino %#lx pgoff %#lx pglen %#lx", + MAJOR(__entry->dev), + MINOR(__entry->dev), + __entry->ino, + __entry->pgoff, + __entry->pglen + ) +) + #endif /* _TRACE_FS_DAX_H */ /* This part must be outside protection */ -- cgit v1.2.3 From b4440734583c3addf80558e8fde2b61e2d76328c Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Mon, 8 May 2017 16:00:16 -0700 Subject: dax: add tracepoint to dax_insert_mapping() Add a tracepoint to dax_insert_mapping(), following the same logging conventions as the rest of DAX. This tracepoint, along with the one in dax_load_hole(), lets us know how a DAX PTE fault was serviced. Here is an example DAX fault that inserts a PTE mapping: small-1126 [007] .... 145.451604: dax_pte_fault: dev 259:0 ino 0x1003 shared WRITE|ALLOW_RETRY|KILLABLE|USER address 0x10420000 pgoff 0x220 small-1126 [007] .... 145.452317: dax_insert_mapping: dev 259:0 ino 0x1003 shared write address 0x10420000 radix_entry 0x100006 small-1126 [007] .... 145.452399: dax_pte_fault_done: dev 259:0 ino 0x1003 shared WRITE|ALLOW_RETRY|KILLABLE|USER address 0x10420000 pgoff 0x220 MAJOR|NOPAGE Link: http://lkml.kernel.org/r/20170221195116.13278-7-ross.zwisler@linux.intel.com Signed-off-by: Ross Zwisler Reviewed-by: Jan Kara Cc: Alexander Viro Cc: Dan Williams Cc: Ingo Molnar Cc: Matthew Wilcox Cc: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/dax.c | 1 + include/trace/events/fs_dax.h | 30 ++++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+) (limited to 'fs') diff --git a/fs/dax.c b/fs/dax.c index 9bec30e06211..66d79067eedf 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -925,6 +925,7 @@ static int dax_insert_mapping(struct address_space *mapping, return PTR_ERR(ret); *entryp = ret; + trace_dax_insert_mapping(mapping->host, vmf, ret); return vm_insert_mixed(vma, vaddr, pfn); } diff --git a/include/trace/events/fs_dax.h b/include/trace/events/fs_dax.h index 292a4719edd0..08bb3ed18dcc 100644 --- a/include/trace/events/fs_dax.h +++ b/include/trace/events/fs_dax.h @@ -194,6 +194,36 @@ DEFINE_PTE_FAULT_EVENT(dax_pfn_mkwrite_no_entry); DEFINE_PTE_FAULT_EVENT(dax_pfn_mkwrite); DEFINE_PTE_FAULT_EVENT(dax_load_hole); +TRACE_EVENT(dax_insert_mapping, + TP_PROTO(struct inode *inode, struct vm_fault *vmf, void *radix_entry), + TP_ARGS(inode, vmf, radix_entry), + TP_STRUCT__entry( + __field(unsigned long, ino) + __field(unsigned long, vm_flags) + __field(unsigned long, address) + __field(void *, radix_entry) + __field(dev_t, dev) + __field(int, write) + ), + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->vm_flags = vmf->vma->vm_flags; + __entry->address = vmf->address; + __entry->write = vmf->flags & FAULT_FLAG_WRITE; + __entry->radix_entry = radix_entry; + ), + TP_printk("dev %d:%d ino %#lx %s %s address %#lx radix_entry %#lx", + MAJOR(__entry->dev), + MINOR(__entry->dev), + __entry->ino, + __entry->vm_flags & VM_SHARED ? "shared" : "private", + __entry->write ? "write" : "read", + __entry->address, + (unsigned long)__entry->radix_entry + ) +) + DECLARE_EVENT_CLASS(dax_writeback_range_class, TP_PROTO(struct inode *inode, pgoff_t start_index, pgoff_t end_index), TP_ARGS(inode, start_index, end_index), -- cgit v1.2.3