diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-08-17 16:49:31 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-08-17 16:49:31 -0700 |
commit | 6ada4e2826794bdf8d88f938a9ced0b80894b037 (patch) | |
tree | 4a39e46d6c4502ae9346566b2e384dcc2205c014 /fs | |
parent | 9bd553929f68921be0f2014dd06561e0c8249a0d (diff) | |
parent | 1e9264192961aa519595170aa8b0f7651a2ad28e (diff) | |
download | linux-6ada4e2826794bdf8d88f938a9ced0b80894b037.tar.bz2 |
Merge branch 'akpm' (patches from Andrew)
Merge updates from Andrew Morton:
- a few misc things
- a few Y2038 fixes
- ntfs fixes
- arch/sh tweaks
- ocfs2 updates
- most of MM
* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (111 commits)
mm/hmm.c: remove unused variables align_start and align_end
fs/userfaultfd.c: remove redundant pointer uwq
mm, vmacache: hash addresses based on pmd
mm/list_lru: introduce list_lru_shrink_walk_irq()
mm/list_lru.c: pass struct list_lru_node* as an argument to __list_lru_walk_one()
mm/list_lru.c: move locking from __list_lru_walk_one() to its caller
mm/list_lru.c: use list_lru_walk_one() in list_lru_walk_node()
mm, swap: make CONFIG_THP_SWAP depend on CONFIG_SWAP
mm/sparse: delete old sparse_init and enable new one
mm/sparse: add new sparse_init_nid() and sparse_init()
mm/sparse: move buffer init/fini to the common place
mm/sparse: use the new sparse buffer functions in non-vmemmap
mm/sparse: abstract sparse buffer allocations
mm/hugetlb.c: don't zero 1GiB bootmem pages
mm, page_alloc: double zone's batchsize
mm/oom_kill.c: document oom_lock
mm/hugetlb: remove gigantic page support for HIGHMEM
mm, oom: remove sleep from under oom_lock
kernel/dma: remove unsupported gfp_mask parameter from dma_alloc_from_contiguous()
mm/cma: remove unsupported gfp_mask parameter from cma_alloc()
...
Diffstat (limited to 'fs')
40 files changed, 283 insertions, 231 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 628f1aef34b0..4dd6faab02bb 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3102,7 +3102,7 @@ static inline void __do_contiguous_readpages(struct extent_io_tree *tree, for (index = 0; index < nr_pages; index++) { __do_readpage(tree, pages[index], btrfs_get_extent, em_cached, - bio, 0, bio_flags, 0, prev_em_start); + bio, 0, bio_flags, REQ_RAHEAD, prev_em_start); put_page(pages[index]); } } diff --git a/fs/buffer.c b/fs/buffer.c index c8c2b7d8b8d6..4cc679d5bf58 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -45,6 +45,7 @@ #include <linux/mpage.h> #include <linux/bit_spinlock.h> #include <linux/pagevec.h> +#include <linux/sched/mm.h> #include <trace/events/block.h> static int fsync_buffers_list(spinlock_t *lock, struct list_head *list); @@ -813,12 +814,16 @@ struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size, bool retry) { struct buffer_head *bh, *head; - gfp_t gfp = GFP_NOFS; + gfp_t gfp = GFP_NOFS | __GFP_ACCOUNT; long offset; + struct mem_cgroup *memcg; if (retry) gfp |= __GFP_NOFAIL; + memcg = get_mem_cgroup_from_page(page); + memalloc_use_memcg(memcg); + head = NULL; offset = PAGE_SIZE; while ((offset -= size) >= 0) { @@ -835,6 +840,9 @@ struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size, /* Link the buffer to its page */ set_bh_page(bh, page, offset); } +out: + memalloc_unuse_memcg(); + mem_cgroup_put(memcg); return head; /* * In case anything failed, we just free everything we got. @@ -848,7 +856,7 @@ no_grow: } while (head); } - return NULL; + goto out; } EXPORT_SYMBOL_GPL(alloc_page_buffers); diff --git a/fs/dcache.c b/fs/dcache.c index 8d2ec4898c2b..2e7e8d85e9b4 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -292,7 +292,8 @@ void take_dentry_name_snapshot(struct name_snapshot *name, struct dentry *dentry spin_unlock(&dentry->d_lock); name->name = p->name; } else { - memcpy(name->inline_name, dentry->d_iname, DNAME_INLINE_LEN); + memcpy(name->inline_name, dentry->d_iname, + dentry->d_name.len + 1); spin_unlock(&dentry->d_lock); name->name = name->inline_name; } diff --git a/fs/ext2/file.c b/fs/ext2/file.c index 047c327a6b23..28b2609f25c1 100644 --- a/fs/ext2/file.c +++ b/fs/ext2/file.c @@ -126,7 +126,6 @@ static int ext2_file_mmap(struct file *file, struct vm_area_struct *vma) file_accessed(file); vma->vm_ops = &ext2_dax_vm_ops; - vma->vm_flags |= VM_MIXEDMAP; return 0; } #else diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 1fc013f3d944..0f0edd1cd0cd 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -3062,7 +3062,7 @@ static inline void ext4_set_de_type(struct super_block *sb, /* readpages.c */ extern int ext4_mpage_readpages(struct address_space *mapping, struct list_head *pages, struct page *page, - unsigned nr_pages); + unsigned nr_pages, bool is_readahead); /* symlink.c */ extern const struct inode_operations ext4_encrypted_symlink_inode_operations; diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 7f8023340eb8..69d65d49837b 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -374,7 +374,7 @@ static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma) file_accessed(file); if (IS_DAX(file_inode(file))) { vma->vm_ops = &ext4_dax_vm_ops; - vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE; + vma->vm_flags |= VM_HUGEPAGE; } else { vma->vm_ops = &ext4_file_vm_ops; } diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 8f6ad7667974..d0dd585add6a 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3325,7 +3325,8 @@ static int ext4_readpage(struct file *file, struct page *page) ret = ext4_readpage_inline(inode, page); if (ret == -EAGAIN) - return ext4_mpage_readpages(page->mapping, NULL, page, 1); + return ext4_mpage_readpages(page->mapping, NULL, page, 1, + false); return ret; } @@ -3340,7 +3341,7 @@ ext4_readpages(struct file *file, struct address_space *mapping, if (ext4_has_inline_data(inode)) return 0; - return ext4_mpage_readpages(mapping, pages, NULL, nr_pages); + return ext4_mpage_readpages(mapping, pages, NULL, nr_pages, true); } static void ext4_invalidatepage(struct page *page, unsigned int offset, diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c index 19b87a8de6ff..f461d75ac049 100644 --- a/fs/ext4/readpage.c +++ b/fs/ext4/readpage.c @@ -98,7 +98,7 @@ static void mpage_end_io(struct bio *bio) int ext4_mpage_readpages(struct address_space *mapping, struct list_head *pages, struct page *page, - unsigned nr_pages) + unsigned nr_pages, bool is_readahead) { struct bio *bio = NULL; sector_t last_block_in_bio = 0; @@ -259,7 +259,8 @@ int ext4_mpage_readpages(struct address_space *mapping, bio->bi_iter.bi_sector = blocks[0] << (blkbits - 9); bio->bi_end_io = mpage_end_io; bio->bi_private = ctx; - bio_set_op_attrs(bio, REQ_OP_READ, 0); + bio_set_op_attrs(bio, REQ_OP_READ, + is_readahead ? REQ_RAHEAD : 0); } length = first_hole << blkbits; diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 8f931d699287..b7c9b58acf3e 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1421,6 +1421,11 @@ out: /* * This function was originally taken from fs/mpage.c, and customized for f2fs. * Major change was from block_size == page_size in f2fs by default. + * + * Note that the aops->readpages() function is ONLY used for read-ahead. If + * this function ever deviates from doing just read-ahead, it should either + * use ->readpage() or do the necessary surgery to decouple ->readpages() + * readom read-ahead. */ static int f2fs_mpage_readpages(struct address_space *mapping, struct list_head *pages, struct page *page, diff --git a/fs/hostfs/hostfs.h b/fs/hostfs/hostfs.h index cb8374af08a6..33b8423ef0c9 100644 --- a/fs/hostfs/hostfs.h +++ b/fs/hostfs/hostfs.h @@ -19,7 +19,7 @@ #define HOSTFS_ATTR_ATIME_SET 128 #define HOSTFS_ATTR_MTIME_SET 256 -/* These two are unused by hostfs. */ +/* This one is unused by hostfs. */ #define HOSTFS_ATTR_FORCE 512 /* Not a change, but a change it */ #define HOSTFS_ATTR_ATTR_FLAG 1024 diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h index 2a153aed4c19..ab2e7cc2ff33 100644 --- a/fs/hpfs/hpfs_fn.h +++ b/fs/hpfs/hpfs_fn.h @@ -334,16 +334,23 @@ long hpfs_ioctl(struct file *file, unsigned cmd, unsigned long arg); * local time (HPFS) to GMT (Unix) */ -static inline time_t local_to_gmt(struct super_block *s, time32_t t) +static inline time64_t local_to_gmt(struct super_block *s, time32_t t) { extern struct timezone sys_tz; return t + sys_tz.tz_minuteswest * 60 + hpfs_sb(s)->sb_timeshift; } -static inline time32_t gmt_to_local(struct super_block *s, time_t t) +static inline time32_t gmt_to_local(struct super_block *s, time64_t t) { extern struct timezone sys_tz; - return t - sys_tz.tz_minuteswest * 60 - hpfs_sb(s)->sb_timeshift; + t = t - sys_tz.tz_minuteswest * 60 - hpfs_sb(s)->sb_timeshift; + + return clamp_t(time64_t, t, 0, U32_MAX); +} + +static inline time32_t local_get_seconds(struct super_block *s) +{ + return gmt_to_local(s, ktime_get_real_seconds()); } /* diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c index a3615e4c730d..082b7c76dd0c 100644 --- a/fs/hpfs/namei.c +++ b/fs/hpfs/namei.c @@ -11,7 +11,7 @@ static void hpfs_update_directory_times(struct inode *dir) { - time_t t = get_seconds(); + time64_t t = local_to_gmt(dir->i_sb, local_get_seconds(dir->i_sb)); if (t == dir->i_mtime.tv_sec && t == dir->i_ctime.tv_sec) return; @@ -50,7 +50,7 @@ static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) /*dee.archive = 0;*/ dee.hidden = name[0] == '.'; dee.fnode = cpu_to_le32(fno); - dee.creation_date = dee.write_date = dee.read_date = cpu_to_le32(gmt_to_local(dir->i_sb, get_seconds())); + dee.creation_date = dee.write_date = dee.read_date = cpu_to_le32(local_get_seconds(dir->i_sb)); result = new_inode(dir->i_sb); if (!result) goto bail2; @@ -91,7 +91,7 @@ static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) dnode->root_dnode = 1; dnode->up = cpu_to_le32(fno); de = hpfs_add_de(dir->i_sb, dnode, "\001\001", 2, 0); - de->creation_date = de->write_date = de->read_date = cpu_to_le32(gmt_to_local(dir->i_sb, get_seconds())); + de->creation_date = de->write_date = de->read_date = cpu_to_le32(local_get_seconds(dir->i_sb)); if (!(mode & 0222)) de->read_only = 1; de->first = de->directory = 1; /*de->hidden = de->system = 0;*/ @@ -151,7 +151,7 @@ static int hpfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, b dee.archive = 1; dee.hidden = name[0] == '.'; dee.fnode = cpu_to_le32(fno); - dee.creation_date = dee.write_date = dee.read_date = cpu_to_le32(gmt_to_local(dir->i_sb, get_seconds())); + dee.creation_date = dee.write_date = dee.read_date = cpu_to_le32(local_get_seconds(dir->i_sb)); result = new_inode(dir->i_sb); if (!result) @@ -238,7 +238,7 @@ static int hpfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, de dee.archive = 1; dee.hidden = name[0] == '.'; dee.fnode = cpu_to_le32(fno); - dee.creation_date = dee.write_date = dee.read_date = cpu_to_le32(gmt_to_local(dir->i_sb, get_seconds())); + dee.creation_date = dee.write_date = dee.read_date = cpu_to_le32(local_get_seconds(dir->i_sb)); result = new_inode(dir->i_sb); if (!result) @@ -314,7 +314,7 @@ static int hpfs_symlink(struct inode *dir, struct dentry *dentry, const char *sy dee.archive = 1; dee.hidden = name[0] == '.'; dee.fnode = cpu_to_le32(fno); - dee.creation_date = dee.write_date = dee.read_date = cpu_to_le32(gmt_to_local(dir->i_sb, get_seconds())); + dee.creation_date = dee.write_date = dee.read_date = cpu_to_le32(local_get_seconds(dir->i_sb)); result = new_inode(dir->i_sb); if (!result) diff --git a/fs/mpage.c b/fs/mpage.c index b73638db9866..c820dc9bebab 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -133,6 +133,17 @@ map_buffer_to_page(struct page *page, struct buffer_head *bh, int page_block) } while (page_bh != head); } +struct mpage_readpage_args { + struct bio *bio; + struct page *page; + unsigned int nr_pages; + bool is_readahead; + sector_t last_block_in_bio; + struct buffer_head map_bh; + unsigned long first_logical_block; + get_block_t *get_block; +}; + /* * This is the worker routine which does all the work of mapping the disk * blocks and constructs largest possible bios, submits them for IO if the @@ -142,16 +153,14 @@ map_buffer_to_page(struct page *page, struct buffer_head *bh, int page_block) * represent the validity of its disk mapping and to decide when to do the next * get_block() call. */ -static struct bio * -do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages, - sector_t *last_block_in_bio, struct buffer_head *map_bh, - unsigned long *first_logical_block, get_block_t get_block, - gfp_t gfp) +static struct bio *do_mpage_readpage(struct mpage_readpage_args *args) { + struct page *page = args->page; struct inode *inode = page->mapping->host; const unsigned blkbits = inode->i_blkbits; const unsigned blocks_per_page = PAGE_SIZE >> blkbits; const unsigned blocksize = 1 << blkbits; + struct buffer_head *map_bh = &args->map_bh; sector_t block_in_file; sector_t last_block; sector_t last_block_in_file; @@ -161,14 +170,24 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages, struct block_device *bdev = NULL; int length; int fully_mapped = 1; + int op_flags; unsigned nblocks; unsigned relative_block; + gfp_t gfp; + + if (args->is_readahead) { + op_flags = REQ_RAHEAD; + gfp = readahead_gfp_mask(page->mapping); + } else { + op_flags = 0; + gfp = mapping_gfp_constraint(page->mapping, GFP_KERNEL); + } if (page_has_buffers(page)) goto confused; block_in_file = (sector_t)page->index << (PAGE_SHIFT - blkbits); - last_block = block_in_file + nr_pages * blocks_per_page; + last_block = block_in_file + args->nr_pages * blocks_per_page; last_block_in_file = (i_size_read(inode) + blocksize - 1) >> blkbits; if (last_block > last_block_in_file) last_block = last_block_in_file; @@ -178,9 +197,10 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages, * Map blocks using the result from the previous get_blocks call first. */ nblocks = map_bh->b_size >> blkbits; - if (buffer_mapped(map_bh) && block_in_file > *first_logical_block && - block_in_file < (*first_logical_block + nblocks)) { - unsigned map_offset = block_in_file - *first_logical_block; + if (buffer_mapped(map_bh) && + block_in_file > args->first_logical_block && + block_in_file < (args->first_logical_block + nblocks)) { + unsigned map_offset = block_in_file - args->first_logical_block; unsigned last = nblocks - map_offset; for (relative_block = 0; ; relative_block++) { @@ -208,9 +228,9 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages, if (block_in_file < last_block) { map_bh->b_size = (last_block-block_in_file) << blkbits; - if (get_block(inode, block_in_file, map_bh, 0)) + if (args->get_block(inode, block_in_file, map_bh, 0)) goto confused; - *first_logical_block = block_in_file; + args->first_logical_block = block_in_file; } if (!buffer_mapped(map_bh)) { @@ -273,43 +293,45 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages, /* * This page will go to BIO. Do we need to send this BIO off first? */ - if (bio && (*last_block_in_bio != blocks[0] - 1)) - bio = mpage_bio_submit(REQ_OP_READ, 0, bio); + if (args->bio && (args->last_block_in_bio != blocks[0] - 1)) + args->bio = mpage_bio_submit(REQ_OP_READ, op_flags, args->bio); alloc_new: - if (bio == NULL) { + if (args->bio == NULL) { if (first_hole == blocks_per_page) { if (!bdev_read_page(bdev, blocks[0] << (blkbits - 9), page)) goto out; } - bio = mpage_alloc(bdev, blocks[0] << (blkbits - 9), - min_t(int, nr_pages, BIO_MAX_PAGES), gfp); - if (bio == NULL) + args->bio = mpage_alloc(bdev, blocks[0] << (blkbits - 9), + min_t(int, args->nr_pages, + BIO_MAX_PAGES), + gfp); + if (args->bio == NULL) goto confused; } length = first_hole << blkbits; - if (bio_add_page(bio, page, length, 0) < length) { - bio = mpage_bio_submit(REQ_OP_READ, 0, bio); + if (bio_add_page(args->bio, page, length, 0) < length) { + args->bio = mpage_bio_submit(REQ_OP_READ, op_flags, args->bio); goto alloc_new; } - relative_block = block_in_file - *first_logical_block; + relative_block = block_in_file - args->first_logical_block; nblocks = map_bh->b_size >> blkbits; if ((buffer_boundary(map_bh) && relative_block == nblocks) || (first_hole != blocks_per_page)) - bio = mpage_bio_submit(REQ_OP_READ, 0, bio); + args->bio = mpage_bio_submit(REQ_OP_READ, op_flags, args->bio); else - *last_block_in_bio = blocks[blocks_per_page - 1]; + args->last_block_in_bio = blocks[blocks_per_page - 1]; out: - return bio; + return args->bio; confused: - if (bio) - bio = mpage_bio_submit(REQ_OP_READ, 0, bio); + if (args->bio) + args->bio = mpage_bio_submit(REQ_OP_READ, op_flags, args->bio); if (!PageUptodate(page)) - block_read_full_page(page, get_block); + block_read_full_page(page, args->get_block); else unlock_page(page); goto out; @@ -363,15 +385,12 @@ int mpage_readpages(struct address_space *mapping, struct list_head *pages, unsigned nr_pages, get_block_t get_block) { - struct bio *bio = NULL; + struct mpage_readpage_args args = { + .get_block = get_block, + .is_readahead = true, + }; unsigned page_idx; - sector_t last_block_in_bio = 0; - struct buffer_head map_bh; - unsigned long first_logical_block = 0; - gfp_t gfp = readahead_gfp_mask(mapping); - map_bh.b_state = 0; - map_bh.b_size = 0; for (page_idx = 0; page_idx < nr_pages; page_idx++) { struct page *page = lru_to_page(pages); @@ -379,18 +398,16 @@ mpage_readpages(struct address_space *mapping, struct list_head *pages, list_del(&page->lru); if (!add_to_page_cache_lru(page, mapping, page->index, - gfp)) { - bio = do_mpage_readpage(bio, page, - nr_pages - page_idx, - &last_block_in_bio, &map_bh, - &first_logical_block, - get_block, gfp); + readahead_gfp_mask(mapping))) { + args.page = page; + args.nr_pages = nr_pages - page_idx; + args.bio = do_mpage_readpage(&args); } put_page(page); } BUG_ON(!list_empty(pages)); - if (bio) - mpage_bio_submit(REQ_OP_READ, 0, bio); + if (args.bio) + mpage_bio_submit(REQ_OP_READ, REQ_RAHEAD, args.bio); return 0; } EXPORT_SYMBOL(mpage_readpages); @@ -400,18 +417,15 @@ EXPORT_SYMBOL(mpage_readpages); */ int mpage_readpage(struct page *page, get_block_t get_block) { - struct bio *bio = NULL; - sector_t last_block_in_bio = 0; - struct buffer_head map_bh; - unsigned long first_logical_block = 0; - gfp_t gfp = mapping_gfp_constraint(page->mapping, GFP_KERNEL); + struct mpage_readpage_args args = { + .page = page, + .nr_pages = 1, + .get_block = get_block, + }; - map_bh.b_state = 0; - map_bh.b_size = 0; - bio = do_mpage_readpage(bio, page, 1, &last_block_in_bio, - &map_bh, &first_logical_block, get_block, gfp); - if (bio) - mpage_bio_submit(REQ_OP_READ, 0, bio); + args.bio = do_mpage_readpage(&args); + if (args.bio) + mpage_bio_submit(REQ_OP_READ, 0, args.bio); return 0; } EXPORT_SYMBOL(mpage_readpage); diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c index e2bea2ac5dfb..a6365e6bc047 100644 --- a/fs/notify/dnotify/dnotify.c +++ b/fs/notify/dnotify/dnotify.c @@ -384,8 +384,9 @@ out_err: static int __init dnotify_init(void) { - dnotify_struct_cache = KMEM_CACHE(dnotify_struct, SLAB_PANIC); - dnotify_mark_cache = KMEM_CACHE(dnotify_mark, SLAB_PANIC); + dnotify_struct_cache = KMEM_CACHE(dnotify_struct, + SLAB_PANIC|SLAB_ACCOUNT); + dnotify_mark_cache = KMEM_CACHE(dnotify_mark, SLAB_PANIC|SLAB_ACCOUNT); dnotify_group = fsnotify_alloc_group(&dnotify_fsnotify_ops); if (IS_ERR(dnotify_group)) diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index f90842efea13..eb4e75175cfb 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -11,6 +11,7 @@ #include <linux/types.h> #include <linux/wait.h> #include <linux/audit.h> +#include <linux/sched/mm.h> #include "fanotify.h" @@ -140,8 +141,8 @@ struct fanotify_event_info *fanotify_alloc_event(struct fsnotify_group *group, struct inode *inode, u32 mask, const struct path *path) { - struct fanotify_event_info *event; - gfp_t gfp = GFP_KERNEL; + struct fanotify_event_info *event = NULL; + gfp_t gfp = GFP_KERNEL_ACCOUNT; /* * For queues with unlimited length lost events are not expected and @@ -151,19 +152,22 @@ struct fanotify_event_info *fanotify_alloc_event(struct fsnotify_group *group, if (group->max_events == UINT_MAX) gfp |= __GFP_NOFAIL; + /* Whoever is interested in the event, pays for the allocation. */ + memalloc_use_memcg(group->memcg); + if (fanotify_is_perm_event(mask)) { struct fanotify_perm_event_info *pevent; pevent = kmem_cache_alloc(fanotify_perm_event_cachep, gfp); if (!pevent) - return NULL; + goto out; event = &pevent->fae; pevent->response = 0; goto init; } event = kmem_cache_alloc(fanotify_event_cachep, gfp); if (!event) - return NULL; + goto out; init: __maybe_unused fsnotify_init_event(&event->fse, inode, mask); event->tgid = get_pid(task_tgid(current)); @@ -174,6 +178,8 @@ init: __maybe_unused event->path.mnt = NULL; event->path.dentry = NULL; } +out: + memalloc_unuse_memcg(); return event; } diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index d736a833fe39..69054886915b 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -16,6 +16,7 @@ #include <linux/uaccess.h> #include <linux/compat.h> #include <linux/sched/signal.h> +#include <linux/memcontrol.h> #include <asm/ioctls.h> @@ -731,6 +732,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) group->fanotify_data.user = user; atomic_inc(&user->fanotify_listeners); + group->memcg = get_mem_cgroup_from_mm(current->mm); oevent = fanotify_alloc_event(group, NULL, FS_Q_OVERFLOW, NULL); if (unlikely(!oevent)) { @@ -932,7 +934,8 @@ COMPAT_SYSCALL_DEFINE6(fanotify_mark, */ static int __init fanotify_user_setup(void) { - fanotify_mark_cache = KMEM_CACHE(fsnotify_mark, SLAB_PANIC); + fanotify_mark_cache = KMEM_CACHE(fsnotify_mark, + SLAB_PANIC|SLAB_ACCOUNT); fanotify_event_cachep = KMEM_CACHE(fanotify_event_info, SLAB_PANIC); if (IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS)) { fanotify_perm_event_cachep = diff --git a/fs/notify/group.c b/fs/notify/group.c index aa5468f23e45..c03b83662876 100644 --- a/fs/notify/group.c +++ b/fs/notify/group.c @@ -22,6 +22,7 @@ #include <linux/srcu.h> #include <linux/rculist.h> #include <linux/wait.h> +#include <linux/memcontrol.h> #include <linux/fsnotify_backend.h> #include "fsnotify.h" @@ -36,6 +37,8 @@ static void fsnotify_final_destroy_group(struct fsnotify_group *group) if (group->ops->free_group_priv) group->ops->free_group_priv(group); + mem_cgroup_put(group->memcg); + kfree(group); } diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c index 9ab6dde38a14..f4184b4f3815 100644 --- a/fs/notify/inotify/inotify_fsnotify.c +++ b/fs/notify/inotify/inotify_fsnotify.c @@ -31,6 +31,7 @@ #include <linux/types.h> #include <linux/sched.h> #include <linux/sched/user.h> +#include <linux/sched/mm.h> #include "inotify.h" @@ -98,7 +99,11 @@ int inotify_handle_event(struct fsnotify_group *group, i_mark = container_of(inode_mark, struct inotify_inode_mark, fsn_mark); - event = kmalloc(alloc_len, GFP_KERNEL); + /* Whoever is interested in the event, pays for the allocation. */ + memalloc_use_memcg(group->memcg); + event = kmalloc(alloc_len, GFP_KERNEL_ACCOUNT); + memalloc_unuse_memcg(); + if (unlikely(!event)) { /* * Treat lost event due to ENOMEM the same way as queue diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 6f48d325c350..ac6978d3208c 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -38,6 +38,7 @@ #include <linux/uaccess.h> #include <linux/poll.h> #include <linux/wait.h> +#include <linux/memcontrol.h> #include "inotify.h" #include "../fdinfo.h" @@ -639,6 +640,7 @@ static struct fsnotify_group *inotify_new_group(unsigned int max_events) oevent->name_len = 0; group->max_events = max_events; + group->memcg = get_mem_cgroup_from_mm(current->mm); spin_lock_init(&group->inotify_data.idr_lock); idr_init(&group->inotify_data.idr); @@ -815,7 +817,8 @@ static int __init inotify_user_setup(void) BUG_ON(hweight32(ALL_INOTIFY_BITS) != 22); - inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark, SLAB_PANIC); + inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark, + SLAB_PANIC|SLAB_ACCOUNT); inotify_max_queued_events = 16384; init_user_ns.ucount_max[UCOUNT_INOTIFY_INSTANCES] = 128; diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c index 3a2e509c77c5..8946130c87ad 100644 --- a/fs/ntfs/aops.c +++ b/fs/ntfs/aops.c @@ -93,13 +93,11 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) ofs = 0; if (file_ofs < init_size) ofs = init_size - file_ofs; - local_irq_save(flags); kaddr = kmap_atomic(page); memset(kaddr + bh_offset(bh) + ofs, 0, bh->b_size - ofs); flush_dcache_page(page); kunmap_atomic(kaddr); - local_irq_restore(flags); } } else { clear_buffer_uptodate(bh); @@ -146,13 +144,11 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) recs = PAGE_SIZE / rec_size; /* Should have been verified before we got here... */ BUG_ON(!recs); - local_irq_save(flags); kaddr = kmap_atomic(page); for (i = 0; i < recs; i++) post_read_mst_fixup((NTFS_RECORD*)(kaddr + i * rec_size), rec_size); kunmap_atomic(kaddr); - local_irq_restore(flags); flush_dcache_page(page); if (likely(page_uptodate && !PageError(page))) SetPageUptodate(page); @@ -926,7 +922,7 @@ static int ntfs_write_mst_block(struct page *page, ntfs_volume *vol = ni->vol; u8 *kaddr; unsigned int rec_size = ni->itype.index.block_size; - ntfs_inode *locked_nis[PAGE_SIZE / rec_size]; + ntfs_inode *locked_nis[PAGE_SIZE / NTFS_BLOCK_SIZE]; struct buffer_head *bh, *head, *tbh, *rec_start_bh; struct buffer_head *bhs[MAX_BUF_PER_PAGE]; runlist_element *rl; @@ -935,6 +931,9 @@ static int ntfs_write_mst_block(struct page *page, bool sync, is_mft, page_is_dirty, rec_is_dirty; unsigned char bh_size_bits; + if (WARN_ON(rec_size < NTFS_BLOCK_SIZE)) + return -EINVAL; + ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index " "0x%lx.", vi->i_ino, ni->type, page->index); BUG_ON(!NInoNonResident(ni)); diff --git a/fs/ntfs/compress.c b/fs/ntfs/compress.c index fbd0090d7d0c..df7c32b5fac7 100644 --- a/fs/ntfs/compress.c +++ b/fs/ntfs/compress.c @@ -128,6 +128,7 @@ static inline void handle_bounds_compressed_page(struct page *page, /** * ntfs_decompress - decompress a compression block into an array of pages * @dest_pages: destination array of pages + * @completed_pages: scratch space to track completed pages * @dest_index: current index into @dest_pages (IN/OUT) * @dest_ofs: current offset within @dest_pages[@dest_index] (IN/OUT) * @dest_max_index: maximum index into @dest_pages (IN) @@ -162,10 +163,10 @@ static inline void handle_bounds_compressed_page(struct page *page, * Note to hackers: This function may not sleep until it has finished accessing * the compression block @cb_start as it is a per-CPU buffer. */ -static int ntfs_decompress(struct page *dest_pages[], int *dest_index, - int *dest_ofs, const int dest_max_index, const int dest_max_ofs, - const int xpage, char *xpage_done, u8 *const cb_start, - const u32 cb_size, const loff_t i_size, +static int ntfs_decompress(struct page *dest_pages[], int completed_pages[], + int *dest_index, int *dest_ofs, const int dest_max_index, + const int dest_max_ofs, const int xpage, char *xpage_done, + u8 *const cb_start, const u32 cb_size, const loff_t i_size, const s64 initialized_size) { /* @@ -190,9 +191,6 @@ static int ntfs_decompress(struct page *dest_pages[], int *dest_index, /* Variables for tag and token parsing. */ u8 tag; /* Current tag. */ int token; /* Loop counter for the eight tokens in tag. */ - - /* Need this because we can't sleep, so need two stages. */ - int completed_pages[dest_max_index - *dest_index + 1]; int nr_completed_pages = 0; /* Default error code. */ @@ -516,6 +514,7 @@ int ntfs_read_compressed_block(struct page *page) unsigned int cb_clusters, cb_max_ofs; int block, max_block, cb_max_page, bhs_size, nr_bhs, err = 0; struct page **pages; + int *completed_pages; unsigned char xpage_done = 0; ntfs_debug("Entering, page->index = 0x%lx, cb_size = 0x%x, nr_pages = " @@ -528,14 +527,16 @@ int ntfs_read_compressed_block(struct page *page) BUG_ON(ni->name_len); pages = kmalloc_array(nr_pages, sizeof(struct page *), GFP_NOFS); + completed_pages = kmalloc_array(nr_pages + 1, sizeof(int), GFP_NOFS); /* Allocate memory to store the buffer heads we need. */ bhs_size = cb_size / block_size * sizeof(struct buffer_head *); bhs = kmalloc(bhs_size, GFP_NOFS); - if (unlikely(!pages || !bhs)) { + if (unlikely(!pages || !bhs || !completed_pages)) { kfree(bhs); kfree(pages); + kfree(completed_pages); unlock_page(page); ntfs_error(vol->sb, "Failed to allocate internal buffers."); return -ENOMEM; @@ -562,6 +563,7 @@ int ntfs_read_compressed_block(struct page *page) if (xpage >= max_page) { kfree(bhs); kfree(pages); + kfree(completed_pages); zero_user(page, 0, PAGE_SIZE); ntfs_debug("Compressed read outside i_size - truncated?"); SetPageUptodate(page); @@ -854,10 +856,10 @@ lock_retry_remap: unsigned int prev_cur_page = cur_page; ntfs_debug("Found compressed compression block."); - err = ntfs_decompress(pages, &cur_page, &cur_ofs, - cb_max_page, cb_max_ofs, xpage, &xpage_done, - cb_pos, cb_size - (cb_pos - cb), i_size, - initialized_size); + err = ntfs_decompress(pages, completed_pages, &cur_page, + &cur_ofs, cb_max_page, cb_max_ofs, xpage, + &xpage_done, cb_pos, cb_size - (cb_pos - cb), + i_size, initialized_size); /* * We can sleep from now on, lock already dropped by * ntfs_decompress(). @@ -912,6 +914,7 @@ lock_retry_remap: /* We no longer need the list of pages. */ kfree(pages); + kfree(completed_pages); /* If we have completed the requested page, we return success. */ if (likely(xpage_done)) @@ -956,5 +959,6 @@ err_out: } } kfree(pages); + kfree(completed_pages); return -EIO; } diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index decaf75d1cd5..bd3221cbdd95 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c @@ -667,18 +667,18 @@ static int ntfs_read_locked_inode(struct inode *vi) * mtime is the last change of the data within the file. Not changed * when only metadata is changed, e.g. a rename doesn't affect mtime. */ - vi->i_mtime = timespec_to_timespec64(ntfs2utc(si->last_data_change_time)); + vi->i_mtime = ntfs2utc(si->last_data_change_time); /* * ctime is the last change of the metadata of the file. This obviously * always changes, when mtime is changed. ctime can be changed on its * own, mtime is then not changed, e.g. when a file is renamed. */ - vi->i_ctime = timespec_to_timespec64(ntfs2utc(si->last_mft_change_time)); + vi->i_ctime = ntfs2utc(si->last_mft_change_time); /* * Last access to the data within the file. Not changed during a rename * for example but changed whenever the file is written to. */ - vi->i_atime = timespec_to_timespec64(ntfs2utc(si->last_access_time)); + vi->i_atime = ntfs2utc(si->last_access_time); /* Find the attribute list attribute if present. */ ntfs_attr_reinit_search_ctx(ctx); @@ -2997,7 +2997,7 @@ int __ntfs_write_inode(struct inode *vi, int sync) si = (STANDARD_INFORMATION*)((u8*)ctx->attr + le16_to_cpu(ctx->attr->data.resident.value_offset)); /* Update the access times if they have changed. */ - nt = utc2ntfs(timespec64_to_timespec(vi->i_mtime)); + nt = utc2ntfs(vi->i_mtime); if (si->last_data_change_time != nt) { ntfs_debug("Updating mtime for inode 0x%lx: old = 0x%llx, " "new = 0x%llx", vi->i_ino, (long long) @@ -3006,7 +3006,7 @@ int __ntfs_write_inode(struct inode *vi, int sync) si->last_data_change_time = nt; modified = true; } - nt = utc2ntfs(timespec64_to_timespec(vi->i_ctime)); + nt = utc2ntfs(vi->i_ctime); if (si->last_mft_change_time != nt) { ntfs_debug("Updating ctime for inode 0x%lx: old = 0x%llx, " "new = 0x%llx", vi->i_ino, (long long) @@ -3015,7 +3015,7 @@ int __ntfs_write_inode(struct inode *vi, int sync) si->last_mft_change_time = nt; modified = true; } - nt = utc2ntfs(timespec64_to_timespec(vi->i_atime)); + nt = utc2ntfs(vi->i_atime); if (si->last_access_time != nt) { ntfs_debug("Updating atime for inode 0x%lx: old = 0x%llx, " "new = 0x%llx", vi->i_ino, diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c index 32c523cf5a2d..fb14d17666c8 100644 --- a/fs/ntfs/mft.c +++ b/fs/ntfs/mft.c @@ -35,6 +35,8 @@ #include "mft.h" #include "ntfs.h" +#define MAX_BHS (PAGE_SIZE / NTFS_BLOCK_SIZE) + /** * map_mft_record_page - map the page in which a specific mft record resides * @ni: ntfs inode whose mft record page to map @@ -469,7 +471,7 @@ int ntfs_sync_mft_mirror(ntfs_volume *vol, const unsigned long mft_no, struct page *page; unsigned int blocksize = vol->sb->s_blocksize; int max_bhs = vol->mft_record_size / blocksize; - struct buffer_head *bhs[max_bhs]; + struct buffer_head *bhs[MAX_BHS]; struct buffer_head *bh, *head; u8 *kmirr; runlist_element *rl; @@ -479,6 +481,8 @@ int ntfs_sync_mft_mirror(ntfs_volume *vol, const unsigned long mft_no, ntfs_debug("Entering for inode 0x%lx.", mft_no); BUG_ON(!max_bhs); + if (WARN_ON(max_bhs > MAX_BHS)) + return -EINVAL; if (unlikely(!vol->mftmirr_ino)) { /* This could happen during umount... */ err = ntfs_sync_mft_mirror_umount(vol, mft_no, m); @@ -674,7 +678,7 @@ int write_mft_record_nolock(ntfs_inode *ni, MFT_RECORD *m, int sync) unsigned int blocksize = vol->sb->s_blocksize; unsigned char blocksize_bits = vol->sb->s_blocksize_bits; int max_bhs = vol->mft_record_size / blocksize; - struct buffer_head *bhs[max_bhs]; + struct buffer_head *bhs[MAX_BHS]; struct buffer_head *bh, *head; runlist_element *rl; unsigned int block_start, block_end, m_start, m_end; @@ -684,6 +688,10 @@ int write_mft_record_nolock(ntfs_inode *ni, MFT_RECORD *m, int sync) BUG_ON(NInoAttr(ni)); BUG_ON(!max_bhs); BUG_ON(!PageLocked(page)); + if (WARN_ON(max_bhs > MAX_BHS)) { + err = -EINVAL; + goto err_out; + } /* * If the ntfs_inode is clean no need to do anything. If it is dirty, * mark it as clean now so that it can be redirtied later on if needed. diff --git a/fs/ntfs/time.h b/fs/ntfs/time.h index 01233989d5d1..24cd719f1fd2 100644 --- a/fs/ntfs/time.h +++ b/fs/ntfs/time.h @@ -36,16 +36,16 @@ * Convert the Linux UTC time @ts to its corresponding NTFS time and return * that in little endian format. * - * Linux stores time in a struct timespec consisting of a time_t (long at - * present) tv_sec and a long tv_nsec where tv_sec is the number of 1-second - * intervals since 1st January 1970, 00:00:00 UTC and tv_nsec is the number of - * 1-nano-second intervals since the value of tv_sec. + * Linux stores time in a struct timespec64 consisting of a time64_t tv_sec + * and a long tv_nsec where tv_sec is the number of 1-second intervals since + * 1st January 1970, 00:00:00 UTC and tv_nsec is the number of 1-nano-second + * intervals since the value of tv_sec. * * NTFS uses Microsoft's standard time format which is stored in a s64 and is * measured as the number of 100-nano-second intervals since 1st January 1601, * 00:00:00 UTC. */ -static inline sle64 utc2ntfs(const struct timespec ts) +static inline sle64 utc2ntfs(const struct timespec64 ts) { /* * Convert the seconds to 100ns intervals, add the nano-seconds @@ -63,7 +63,10 @@ static inline sle64 utc2ntfs(const struct timespec ts) */ static inline sle64 get_current_ntfs_time(void) { - return utc2ntfs(current_kernel_time()); + struct timespec64 ts; + + ktime_get_coarse_real_ts64(&ts); + return utc2ntfs(ts); } /** @@ -73,18 +76,18 @@ static inline sle64 get_current_ntfs_time(void) * Convert the little endian NTFS time @time to its corresponding Linux UTC * time and return that in cpu format. * - * Linux stores time in a struct timespec consisting of a time_t (long at - * present) tv_sec and a long tv_nsec where tv_sec is the number of 1-second - * intervals since 1st January 1970, 00:00:00 UTC and tv_nsec is the number of - * 1-nano-second intervals since the value of tv_sec. + * Linux stores time in a struct timespec64 consisting of a time64_t tv_sec + * and a long tv_nsec where tv_sec is the number of 1-second intervals since + * 1st January 1970, 00:00:00 UTC and tv_nsec is the number of 1-nano-second + * intervals since the value of tv_sec. * * NTFS uses Microsoft's standard time format which is stored in a s64 and is * measured as the number of 100 nano-second intervals since 1st January 1601, * 00:00:00 UTC. */ -static inline struct timespec ntfs2utc(const sle64 time) +static inline struct timespec64 ntfs2utc(const sle64 time) { - struct timespec ts; + struct timespec64 ts; /* Subtract the NTFS time offset. */ u64 t = (u64)(sle64_to_cpu(time) - NTFS_TIME_OFFSET); diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 0f157bbd3e0f..a342f008e42f 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -932,13 +932,11 @@ static int ocfs2_validate_extent_block(struct super_block *sb, goto bail; } - if (le32_to_cpu(eb->h_fs_generation) != OCFS2_SB(sb)->fs_generation) { + if (le32_to_cpu(eb->h_fs_generation) != OCFS2_SB(sb)->fs_generation) rc = ocfs2_error(sb, "Extent block #%llu has an invalid h_fs_generation of #%u\n", (unsigned long long)bh->b_blocknr, le32_to_cpu(eb->h_fs_generation)); - goto bail; - } bail: return rc; } @@ -1481,19 +1479,17 @@ static int ocfs2_find_branch_target(struct ocfs2_extent_tree *et, while(le16_to_cpu(el->l_tree_depth) > 1) { if (le16_to_cpu(el->l_next_free_rec) == 0) { - ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci), - "Owner %llu has empty extent list (next_free_rec == 0)\n", - (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci)); - status = -EIO; + status = ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci), + "Owner %llu has empty extent list (next_free_rec == 0)\n", + (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci)); goto bail; } i = le16_to_cpu(el->l_next_free_rec) - 1; blkno = le64_to_cpu(el->l_recs[i].e_blkno); if (!blkno) { - ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci), - "Owner %llu has extent list where extent # %d has no physical block start\n", - (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci), i); - status = -EIO; + status = ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci), + "Owner %llu has extent list where extent # %d has no physical block start\n", + (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci), i); goto bail; } @@ -1598,10 +1594,8 @@ static int ocfs2_grow_tree(handle_t *handle, struct ocfs2_extent_tree *et, * the new data. */ ret = ocfs2_add_branch(handle, et, bh, last_eb_bh, meta_ac); - if (ret < 0) { + if (ret < 0) mlog_errno(ret); - goto out; - } out: if (final_depth) @@ -3214,11 +3208,10 @@ rightmost_no_delete: goto rightmost_no_delete; if (le16_to_cpu(el->l_next_free_rec) == 0) { - ret = -EIO; - ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci), - "Owner %llu has empty extent block at %llu\n", - (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci), - (unsigned long long)le64_to_cpu(eb->h_blkno)); + ret = ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci), + "Owner %llu has empty extent block at %llu\n", + (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci), + (unsigned long long)le64_to_cpu(eb->h_blkno)); goto out; } @@ -4411,12 +4404,11 @@ static int ocfs2_figure_merge_contig_type(struct ocfs2_extent_tree *et, le16_to_cpu(new_el->l_count)) { bh = path_leaf_bh(left_path); eb = (struct ocfs2_extent_block *)bh->b_data; - ocfs2_error(sb, - "Extent block #%llu has an invalid l_next_free_rec of %d. It should have matched the l_count of %d\n", - (unsigned long long)le64_to_cpu(eb->h_blkno), - le16_to_cpu(new_el->l_next_free_rec), - le16_to_cpu(new_el->l_count)); - status = -EINVAL; + status = ocfs2_error(sb, + "Extent block #%llu has an invalid l_next_free_rec of %d. It should have matched the l_count of %d\n", + (unsigned long long)le64_to_cpu(eb->h_blkno), + le16_to_cpu(new_el->l_next_free_rec), + le16_to_cpu(new_el->l_count)); goto free_left_path; } rec = &new_el->l_recs[ @@ -4466,11 +4458,10 @@ static int ocfs2_figure_merge_contig_type(struct ocfs2_extent_tree *et, if (le16_to_cpu(new_el->l_next_free_rec) <= 1) { bh = path_leaf_bh(right_path); eb = (struct ocfs2_extent_block *)bh->b_data; - ocfs2_error(sb, - "Extent block #%llu has an invalid l_next_free_rec of %d\n", - (unsigned long long)le64_to_cpu(eb->h_blkno), - le16_to_cpu(new_el->l_next_free_rec)); - status = -EINVAL; + status = ocfs2_error(sb, + "Extent block #%llu has an invalid l_next_free_rec of %d\n", + (unsigned long long)le64_to_cpu(eb->h_blkno), + le16_to_cpu(new_el->l_next_free_rec)); goto free_right_path; } rec = &new_el->l_recs[1]; @@ -5523,10 +5514,8 @@ static int ocfs2_truncate_rec(handle_t *handle, ocfs2_journal_dirty(handle, path_leaf_bh(path)); ret = ocfs2_rotate_tree_left(handle, et, path, dealloc); - if (ret) { + if (ret) mlog_errno(ret); - goto out; - } out: ocfs2_free_path(left_path); @@ -5659,10 +5648,8 @@ int ocfs2_remove_extent(handle_t *handle, ret = ocfs2_truncate_rec(handle, et, path, index, dealloc, cpos, len); - if (ret) { + if (ret) mlog_errno(ret); - goto out; - } } out: @@ -5707,7 +5694,6 @@ static int ocfs2_reserve_blocks_for_rec_trunc(struct inode *inode, if (ret < 0) { if (ret != -ENOSPC) mlog_errno(ret); - goto out; } } diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index ea8c551bcd7e..9b2ed62dd638 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -127,13 +127,13 @@ enum o2hb_heartbeat_modes { O2HB_HEARTBEAT_NUM_MODES, }; -char *o2hb_heartbeat_mode_desc[O2HB_HEARTBEAT_NUM_MODES] = { - "local", /* O2HB_HEARTBEAT_LOCAL */ - "global", /* O2HB_HEARTBEAT_GLOBAL */ +static const char *o2hb_heartbeat_mode_desc[O2HB_HEARTBEAT_NUM_MODES] = { + "local", /* O2HB_HEARTBEAT_LOCAL */ + "global", /* O2HB_HEARTBEAT_GLOBAL */ }; unsigned int o2hb_dead_threshold = O2HB_DEFAULT_DEAD_THRESHOLD; -unsigned int o2hb_heartbeat_mode = O2HB_HEARTBEAT_LOCAL; +static unsigned int o2hb_heartbeat_mode = O2HB_HEARTBEAT_LOCAL; /* * o2hb_dependent_users tracks the number of registered callbacks that depend @@ -141,7 +141,7 @@ unsigned int o2hb_heartbeat_mode = O2HB_HEARTBEAT_LOCAL; * However only o2dlm depends on the heartbeat. It does not want the heartbeat * to stop while a dlm domain is still active. */ -unsigned int o2hb_dependent_users; +static unsigned int o2hb_dependent_users; /* * In global heartbeat mode, all regions are pinned if there are one or more @@ -2486,7 +2486,7 @@ unlock: return ret; } -void o2hb_region_dec_user(const char *region_uuid) +static void o2hb_region_dec_user(const char *region_uuid) { spin_lock(&o2hb_live_lock); diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c index da64c3a20eeb..0e4166cc23a0 100644 --- a/fs/ocfs2/cluster/nodemanager.c +++ b/fs/ocfs2/cluster/nodemanager.c @@ -35,9 +35,9 @@ * cluster references throughout where nodes are looked up */ struct o2nm_cluster *o2nm_single_cluster = NULL; -char *o2nm_fence_method_desc[O2NM_FENCE_METHODS] = { - "reset", /* O2NM_FENCE_RESET */ - "panic", /* O2NM_FENCE_PANIC */ +static const char *o2nm_fence_method_desc[O2NM_FENCE_METHODS] = { + "reset", /* O2NM_FENCE_RESET */ + "panic", /* O2NM_FENCE_PANIC */ }; static inline void o2nm_lock_subsystem(void); diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index 1296f78ae966..7d9eea7d4a87 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c @@ -872,8 +872,6 @@ int o2net_register_handler(u32 msg_type, u32 key, u32 max_len, "for type %u key %08x\n", msg_type, key); } write_unlock(&o2net_handler_lock); - if (ret) - goto out; out: if (ret) diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 0ff424c6d17c..8e712b614e6e 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -96,7 +96,7 @@ struct ocfs2_unblock_ctl { }; /* Lockdep class keys */ -struct lock_class_key lockdep_keys[OCFS2_NUM_LOCK_TYPES]; +static struct lock_class_key lockdep_keys[OCFS2_NUM_LOCK_TYPES]; static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres, int new_level); diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index ddc3e9470c87..79279240fb6e 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -637,10 +637,8 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb, handle = NULL; status = ocfs2_commit_truncate(osb, inode, fe_bh); - if (status < 0) { + if (status < 0) mlog_errno(status); - goto out; - } } out: @@ -1499,7 +1497,6 @@ static int ocfs2_filecheck_validate_inode_block(struct super_block *sb, (unsigned long long)bh->b_blocknr, le32_to_cpu(di->i_fs_generation)); rc = -OCFS2_FILECHECK_ERR_GENERATION; - goto bail; } bail: diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c index fe0d1f9571bb..7642b6712c39 100644 --- a/fs/ocfs2/localalloc.c +++ b/fs/ocfs2/localalloc.c @@ -663,11 +663,10 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb, #ifdef CONFIG_OCFS2_DEBUG_FS if (le32_to_cpu(alloc->id1.bitmap1.i_used) != ocfs2_local_alloc_count_bits(alloc)) { - ocfs2_error(osb->sb, "local alloc inode %llu says it has %u used bits, but a count shows %u\n", - (unsigned long long)le64_to_cpu(alloc->i_blkno), - le32_to_cpu(alloc->id1.bitmap1.i_used), - ocfs2_local_alloc_count_bits(alloc)); - status = -EIO; + status = ocfs2_error(osb->sb, "local alloc inode %llu says it has %u used bits, but a count shows %u\n", + (unsigned long long)le64_to_cpu(alloc->i_blkno), + le32_to_cpu(alloc->id1.bitmap1.i_used), + ocfs2_local_alloc_count_bits(alloc)); goto bail; } #endif diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c index 16c42ed0dca8..b1a8b046f4c2 100644 --- a/fs/ocfs2/quota_local.c +++ b/fs/ocfs2/quota_local.c @@ -137,14 +137,13 @@ static int ocfs2_read_quota_block(struct inode *inode, u64 v_block, int rc = 0; struct buffer_head *tmp = *bh; - if (i_size_read(inode) >> inode->i_sb->s_blocksize_bits <= v_block) { - ocfs2_error(inode->i_sb, - "Quota file %llu is probably corrupted! Requested to read block %Lu but file has size only %Lu\n", - (unsigned long long)OCFS2_I(inode)->ip_blkno, - (unsigned long long)v_block, - (unsigned long long)i_size_read(inode)); - return -EIO; - } + if (i_size_read(inode) >> inode->i_sb->s_blocksize_bits <= v_block) + return ocfs2_error(inode->i_sb, + "Quota file %llu is probably corrupted! Requested to read block %Lu but file has size only %Lu\n", + (unsigned long long)OCFS2_I(inode)->ip_blkno, + (unsigned long long)v_block, + (unsigned long long)i_size_read(inode)); + rc = ocfs2_read_virt_blocks(inode, v_block, 1, &tmp, 0, ocfs2_validate_quota_block); if (rc) diff --git a/fs/seq_file.c b/fs/seq_file.c index 4cc090b50cc5..1dea7a8a5255 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -90,23 +90,22 @@ EXPORT_SYMBOL(seq_open); static int traverse(struct seq_file *m, loff_t offset) { - loff_t pos = 0, index; + loff_t pos = 0; int error = 0; void *p; m->version = 0; - index = 0; + m->index = 0; m->count = m->from = 0; - if (!offset) { - m->index = index; + if (!offset) return 0; - } + if (!m->buf) { m->buf = seq_buf_alloc(m->size = PAGE_SIZE); if (!m->buf) return -ENOMEM; } - p = m->op->start(m, &index); + p = m->op->start(m, &m->index); while (p) { error = PTR_ERR(p); if (IS_ERR(p)) @@ -123,20 +122,15 @@ static int traverse(struct seq_file *m, loff_t offset) if (pos + m->count > offset) { m->from = offset - pos; m->count -= m->from; - m->index = index; break; } pos += m->count; m->count = 0; - if (pos == offset) { - index++; - m->index = index; + p = m->op->next(m, p, &m->index); + if (pos == offset) break; - } - p = m->op->next(m, p, &index); } m->op->stop(m, p); - m->index = index; return error; Eoverflow: @@ -160,7 +154,6 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) { struct seq_file *m = file->private_data; size_t copied = 0; - loff_t pos; size_t n; void *p; int err = 0; @@ -223,16 +216,12 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) size -= n; buf += n; copied += n; - if (!m->count) { - m->from = 0; - m->index++; - } if (!size) goto Done; } /* we need at least one record in buffer */ - pos = m->index; - p = m->op->start(m, &pos); + m->from = 0; + p = m->op->start(m, &m->index); while (1) { err = PTR_ERR(p); if (!p || IS_ERR(p)) @@ -243,8 +232,7 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) if (unlikely(err)) m->count = 0; if (unlikely(!m->count)) { - p = m->op->next(m, p, &pos); - m->index = pos; + p = m->op->next(m, p, &m->index); continue; } if (m->count < m->size) @@ -256,29 +244,33 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) if (!m->buf) goto Enomem; m->version = 0; - pos = m->index; - p = m->op->start(m, &pos); + p = m->op->start(m, &m->index); } m->op->stop(m, p); m->count = 0; goto Done; Fill: /* they want more? let's try to get some more */ - while (m->count < size) { + while (1) { size_t offs = m->count; - loff_t next = pos; - p = m->op->next(m, p, &next); + loff_t pos = m->index; + + p = m->op->next(m, p, &m->index); + if (pos == m->index) + /* Buggy ->next function */ + m->index++; if (!p || IS_ERR(p)) { err = PTR_ERR(p); break; } + if (m->count >= size) + break; err = m->op->show(m, p); if (seq_has_overflowed(m) || err) { m->count = offs; if (likely(err <= 0)) break; } - pos = next; } m->op->stop(m, p); n = min(m->count, size); @@ -287,11 +279,7 @@ Fill: goto Efault; copied += n; m->count -= n; - if (m->count) - m->from = n; - else - pos++; - m->index = pos; + m->from = n; Done: if (!copied) copied = err; diff --git a/fs/super.c b/fs/super.c index 50728d9c1a05..7429588d6b49 100644 --- a/fs/super.c +++ b/fs/super.c @@ -144,6 +144,9 @@ static unsigned long super_cache_count(struct shrinker *shrink, total_objects += list_lru_shrink_count(&sb->s_dentry_lru, sc); total_objects += list_lru_shrink_count(&sb->s_inode_lru, sc); + if (!total_objects) + return SHRINK_EMPTY; + total_objects = vfs_pressure_ratio(total_objects); return total_objects; } @@ -244,10 +247,6 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags, INIT_LIST_HEAD(&s->s_inodes_wb); spin_lock_init(&s->s_inode_wblist_lock); - if (list_lru_init_memcg(&s->s_dentry_lru)) - goto fail; - if (list_lru_init_memcg(&s->s_inode_lru)) - goto fail; s->s_count = 1; atomic_set(&s->s_active, 1); mutex_init(&s->s_vfs_rename_mutex); @@ -265,6 +264,10 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags, s->s_shrink.flags = SHRINKER_NUMA_AWARE | SHRINKER_MEMCG_AWARE; if (prealloc_shrinker(&s->s_shrink)) goto fail; + if (list_lru_init_memcg(&s->s_dentry_lru, &s->s_shrink)) + goto fail; + if (list_lru_init_memcg(&s->s_inode_lru, &s->s_shrink)) + goto fail; return s; fail: diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c index e727ee07dbe4..075d3d9114c8 100644 --- a/fs/ufs/balloc.c +++ b/fs/ufs/balloc.c @@ -547,7 +547,7 @@ static u64 ufs_add_fragments(struct inode *inode, u64 fragment, /* * Block can be extended */ - ucg->cg_time = cpu_to_fs32(sb, get_seconds()); + ucg->cg_time = ufs_get_seconds(sb); for (i = newcount; i < (uspi->s_fpb - fragoff); i++) if (ubh_isclr (UCPI_UBH(ucpi), ucpi->c_freeoff, fragno + i)) break; @@ -639,7 +639,7 @@ cg_found: if (!ufs_cg_chkmagic(sb, ucg)) ufs_panic (sb, "ufs_alloc_fragments", "internal error, bad magic number on cg %u", cgno); - ucg->cg_time = cpu_to_fs32(sb, get_seconds()); + ucg->cg_time = ufs_get_seconds(sb); if (count == uspi->s_fpb) { result = ufs_alloccg_block (inode, ucpi, goal, err); diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c index 02c0a4be4212..969fd60436d3 100644 --- a/fs/ufs/ialloc.c +++ b/fs/ufs/ialloc.c @@ -89,7 +89,7 @@ void ufs_free_inode (struct inode * inode) if (!ufs_cg_chkmagic(sb, ucg)) ufs_panic (sb, "ufs_free_fragments", "internal error, bad cg magic number"); - ucg->cg_time = cpu_to_fs32(sb, get_seconds()); + ucg->cg_time = ufs_get_seconds(sb); is_directory = S_ISDIR(inode->i_mode); diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 488088141451..a4e07e910f1b 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -698,7 +698,7 @@ static int ufs_sync_fs(struct super_block *sb, int wait) usb1 = ubh_get_usb_first(uspi); usb3 = ubh_get_usb_third(uspi); - usb1->fs_time = cpu_to_fs32(sb, get_seconds()); + usb1->fs_time = ufs_get_seconds(sb); if ((flags & UFS_ST_MASK) == UFS_ST_SUN || (flags & UFS_ST_MASK) == UFS_ST_SUNOS || (flags & UFS_ST_MASK) == UFS_ST_SUNx86) @@ -1342,7 +1342,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) */ if (*mount_flags & SB_RDONLY) { ufs_put_super_internal(sb); - usb1->fs_time = cpu_to_fs32(sb, get_seconds()); + usb1->fs_time = ufs_get_seconds(sb); if ((flags & UFS_ST_MASK) == UFS_ST_SUN || (flags & UFS_ST_MASK) == UFS_ST_SUNOS || (flags & UFS_ST_MASK) == UFS_ST_SUNx86) diff --git a/fs/ufs/util.h b/fs/ufs/util.h index 1907be6d5808..1fd3011ea623 100644 --- a/fs/ufs/util.h +++ b/fs/ufs/util.h @@ -590,3 +590,17 @@ static inline int ufs_is_data_ptr_zero(struct ufs_sb_private_info *uspi, else return *(__fs32 *)p == 0; } + +static inline __fs32 ufs_get_seconds(struct super_block *sbp) +{ + time64_t now = ktime_get_real_seconds(); + + /* Signed 32-bit interpretation wraps around in 2038, which + * happens in ufs1 inode stamps but not ufs2 using 64-bits + * stamps. For superblock and blockgroup, let's assume + * unsigned 32-bit stamps, which are good until y2106. + * Wrap around rather than clamp here to make the dirty + * file system detection work in the superblock stamp. + */ + return cpu_to_fs32(sbp, lower_32_bits(now)); +} diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index bad9cea37f12..15c265d450bf 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -1849,17 +1849,14 @@ static void userfaultfd_show_fdinfo(struct seq_file *m, struct file *f) { struct userfaultfd_ctx *ctx = f->private_data; wait_queue_entry_t *wq; - struct userfaultfd_wait_queue *uwq; unsigned long pending = 0, total = 0; spin_lock(&ctx->fault_pending_wqh.lock); list_for_each_entry(wq, &ctx->fault_pending_wqh.head, entry) { - uwq = container_of(wq, struct userfaultfd_wait_queue, wq); pending++; total++; } list_for_each_entry(wq, &ctx->fault_wqh.head, entry) { - uwq = container_of(wq, struct userfaultfd_wait_queue, wq); total++; } spin_unlock(&ctx->fault_pending_wqh.lock); diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 181e9084519b..5eaef2c17293 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -1169,7 +1169,7 @@ xfs_file_mmap( file_accessed(filp); vma->vm_ops = &xfs_file_vm_ops; if (IS_DAX(file_inode(filp))) - vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE; + vma->vm_flags |= VM_HUGEPAGE; return 0; } |