From 83cde9e8ba95d180eaefefe834958fbf7008cf39 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Fri, 12 Dec 2014 16:54:21 -0800 Subject: mm: use new helper functions around the i_mmap_mutex Convert all open coded mutex_lock/unlock calls to the i_mmap_[lock/unlock]_write() helpers. Signed-off-by: Davidlohr Bueso Acked-by: Rik van Riel Acked-by: "Kirill A. Shutemov" Acked-by: Hugh Dickins Cc: Oleg Nesterov Acked-by: Peter Zijlstra (Intel) Cc: Srikar Dronamraju Acked-by: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/events/uprobes.c | 4 ++-- kernel/fork.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index ed8f2cde34c5..aac81bf9df09 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -724,7 +724,7 @@ build_map_info(struct address_space *mapping, loff_t offset, bool is_register) int more = 0; again: - mutex_lock(&mapping->i_mmap_mutex); + i_mmap_lock_write(mapping); vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) { if (!valid_vma(vma, is_register)) continue; @@ -755,7 +755,7 @@ build_map_info(struct address_space *mapping, loff_t offset, bool is_register) info->mm = vma->vm_mm; info->vaddr = offset_to_vaddr(vma, offset); } - mutex_unlock(&mapping->i_mmap_mutex); + i_mmap_unlock_write(mapping); if (!more) goto out; diff --git a/kernel/fork.c b/kernel/fork.c index 9ca84189cfc2..4dc2ddade9f1 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -433,7 +433,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) get_file(file); if (tmp->vm_flags & VM_DENYWRITE) atomic_dec(&inode->i_writecount); - mutex_lock(&mapping->i_mmap_mutex); + i_mmap_lock_write(mapping); if (tmp->vm_flags & VM_SHARED) atomic_inc(&mapping->i_mmap_writable); flush_dcache_mmap_lock(mapping); @@ -445,7 +445,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) vma_interval_tree_insert_after(tmp, mpnt, &mapping->i_mmap); flush_dcache_mmap_unlock(mapping); - mutex_unlock(&mapping->i_mmap_mutex); + i_mmap_unlock_write(mapping); } /* -- cgit v1.2.3 From c8c06efa8b552608493b7066c234cfa82c47fcea Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Fri, 12 Dec 2014 16:54:24 -0800 Subject: mm: convert i_mmap_mutex to rwsem The i_mmap_mutex is a close cousin of the anon vma lock, both protecting similar data, one for file backed pages and the other for anon memory. To this end, this lock can also be a rwsem. In addition, there are some important opportunities to share the lock when there are no tree modifications. This conversion is straightforward. For now, all users take the write lock. [sfr@canb.auug.org.au: update fremap.c] Signed-off-by: Davidlohr Bueso Reviewed-by: Rik van Riel Acked-by: "Kirill A. Shutemov" Acked-by: Hugh Dickins Cc: Oleg Nesterov Acked-by: Peter Zijlstra (Intel) Cc: Srikar Dronamraju Acked-by: Mel Gorman Signed-off-by: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/hugetlbfs/inode.c | 10 +++++----- fs/inode.c | 2 +- include/linux/fs.h | 7 ++++--- include/linux/mmu_notifier.h | 2 +- kernel/events/uprobes.c | 2 +- mm/filemap.c | 10 +++++----- mm/hugetlb.c | 10 +++++----- mm/mmap.c | 8 ++++---- mm/mremap.c | 2 +- mm/rmap.c | 6 +++--- 10 files changed, 30 insertions(+), 29 deletions(-) (limited to 'kernel') diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index a082709aa427..5eba47f593f8 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -472,12 +472,12 @@ static struct inode *hugetlbfs_get_root(struct super_block *sb, } /* - * Hugetlbfs is not reclaimable; therefore its i_mmap_mutex will never + * Hugetlbfs is not reclaimable; therefore its i_mmap_rwsem will never * be taken from reclaim -- unlike regular filesystems. This needs an * annotation because huge_pmd_share() does an allocation under - * i_mmap_mutex. + * i_mmap_rwsem. */ -static struct lock_class_key hugetlbfs_i_mmap_mutex_key; +static struct lock_class_key hugetlbfs_i_mmap_rwsem_key; static struct inode *hugetlbfs_get_inode(struct super_block *sb, struct inode *dir, @@ -495,8 +495,8 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, struct hugetlbfs_inode_info *info; inode->i_ino = get_next_ino(); inode_init_owner(inode, dir, mode); - lockdep_set_class(&inode->i_mapping->i_mmap_mutex, - &hugetlbfs_i_mmap_mutex_key); + lockdep_set_class(&inode->i_mapping->i_mmap_rwsem, + &hugetlbfs_i_mmap_rwsem_key); inode->i_mapping->a_ops = &hugetlbfs_aops; inode->i_mapping->backing_dev_info =&hugetlbfs_backing_dev_info; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; diff --git a/fs/inode.c b/fs/inode.c index 2ed95f7caa4f..ad60555b4768 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -346,7 +346,7 @@ void address_space_init_once(struct address_space *mapping) memset(mapping, 0, sizeof(*mapping)); INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC); spin_lock_init(&mapping->tree_lock); - mutex_init(&mapping->i_mmap_mutex); + init_rwsem(&mapping->i_mmap_rwsem); INIT_LIST_HEAD(&mapping->private_list); spin_lock_init(&mapping->private_lock); mapping->i_mmap = RB_ROOT; diff --git a/include/linux/fs.h b/include/linux/fs.h index bd0a1b2f3c02..6abcd0b72ae0 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -401,7 +402,7 @@ struct address_space { atomic_t i_mmap_writable;/* count VM_SHARED mappings */ struct rb_root i_mmap; /* tree of private and shared mappings */ struct list_head i_mmap_nonlinear;/*list VM_NONLINEAR mappings */ - struct mutex i_mmap_mutex; /* protect tree, count, list */ + struct rw_semaphore i_mmap_rwsem; /* protect tree, count, list */ /* Protected by tree_lock together with the radix tree */ unsigned long nrpages; /* number of total pages */ unsigned long nrshadows; /* number of shadow entries */ @@ -469,12 +470,12 @@ int mapping_tagged(struct address_space *mapping, int tag); static inline void i_mmap_lock_write(struct address_space *mapping) { - mutex_lock(&mapping->i_mmap_mutex); + down_write(&mapping->i_mmap_rwsem); } static inline void i_mmap_unlock_write(struct address_space *mapping) { - mutex_unlock(&mapping->i_mmap_mutex); + up_write(&mapping->i_mmap_rwsem); } /* diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index 88787bb4b3b9..ab8564b03468 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -154,7 +154,7 @@ struct mmu_notifier_ops { * Therefore notifier chains can only be traversed when either * * 1. mmap_sem is held. - * 2. One of the reverse map locks is held (i_mmap_mutex or anon_vma->rwsem). + * 2. One of the reverse map locks is held (i_mmap_rwsem or anon_vma->rwsem). * 3. No other concurrent thread can access the list (release) */ struct mmu_notifier { diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index aac81bf9df09..1901dbfa7ce0 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -731,7 +731,7 @@ build_map_info(struct address_space *mapping, loff_t offset, bool is_register) if (!prev && !more) { /* - * Needs GFP_NOWAIT to avoid i_mmap_mutex recursion through + * Needs GFP_NOWAIT to avoid i_mmap_rwsem recursion through * reclaim. This is optimistic, no harm done if it fails. */ prev = kmalloc(sizeof(struct map_info), diff --git a/mm/filemap.c b/mm/filemap.c index 14b4642279f1..e8905bc3cbd7 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -62,16 +62,16 @@ /* * Lock ordering: * - * ->i_mmap_mutex (truncate_pagecache) + * ->i_mmap_rwsem (truncate_pagecache) * ->private_lock (__free_pte->__set_page_dirty_buffers) * ->swap_lock (exclusive_swap_page, others) * ->mapping->tree_lock * * ->i_mutex - * ->i_mmap_mutex (truncate->unmap_mapping_range) + * ->i_mmap_rwsem (truncate->unmap_mapping_range) * * ->mmap_sem - * ->i_mmap_mutex + * ->i_mmap_rwsem * ->page_table_lock or pte_lock (various, mainly in memory.c) * ->mapping->tree_lock (arch-dependent flush_dcache_mmap_lock) * @@ -85,7 +85,7 @@ * sb_lock (fs/fs-writeback.c) * ->mapping->tree_lock (__sync_single_inode) * - * ->i_mmap_mutex + * ->i_mmap_rwsem * ->anon_vma.lock (vma_adjust) * * ->anon_vma.lock @@ -105,7 +105,7 @@ * ->inode->i_lock (zap_pte_range->set_page_dirty) * ->private_lock (zap_pte_range->__set_page_dirty_buffers) * - * ->i_mmap_mutex + * ->i_mmap_rwsem * ->tasklist_lock (memory_failure, collect_procs_ao) */ diff --git a/mm/hugetlb.c b/mm/hugetlb.c index ffe19304cc09..989cb032eaf5 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2726,9 +2726,9 @@ void __unmap_hugepage_range_final(struct mmu_gather *tlb, * on its way out. We're lucky that the flag has such an appropriate * name, and can in fact be safely cleared here. We could clear it * before the __unmap_hugepage_range above, but all that's necessary - * is to clear it before releasing the i_mmap_mutex. This works + * is to clear it before releasing the i_mmap_rwsem. This works * because in the context this is called, the VMA is about to be - * destroyed and the i_mmap_mutex is held. + * destroyed and the i_mmap_rwsem is held. */ vma->vm_flags &= ~VM_MAYSHARE; } @@ -3370,9 +3370,9 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, spin_unlock(ptl); } /* - * Must flush TLB before releasing i_mmap_mutex: x86's huge_pmd_unshare + * Must flush TLB before releasing i_mmap_rwsem: x86's huge_pmd_unshare * may have cleared our pud entry and done put_page on the page table: - * once we release i_mmap_mutex, another task can do the final put_page + * once we release i_mmap_rwsem, another task can do the final put_page * and that page table be reused and filled with junk. */ flush_tlb_range(vma, start, end); @@ -3525,7 +3525,7 @@ static int vma_shareable(struct vm_area_struct *vma, unsigned long addr) * and returns the corresponding pte. While this is not necessary for the * !shared pmd case because we can allocate the pmd later as well, it makes the * code much cleaner. pmd allocation is essential for the shared case because - * pud has to be populated inside the same i_mmap_mutex section - otherwise + * pud has to be populated inside the same i_mmap_rwsem section - otherwise * racing tasks could either miss the sharing (see huge_pte_offset) or select a * bad pmd for sharing. */ diff --git a/mm/mmap.c b/mm/mmap.c index ecd6ecf48778..0d84b2f86f3b 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -232,7 +232,7 @@ error: } /* - * Requires inode->i_mapping->i_mmap_mutex + * Requires inode->i_mapping->i_mmap_rwsem */ static void __remove_shared_vm_struct(struct vm_area_struct *vma, struct file *file, struct address_space *mapping) @@ -2791,7 +2791,7 @@ void exit_mmap(struct mm_struct *mm) /* Insert vm structure into process list sorted by address * and into the inode's i_mmap tree. If vm_file is non-NULL - * then i_mmap_mutex is taken here. + * then i_mmap_rwsem is taken here. */ int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma) { @@ -3086,7 +3086,7 @@ static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping) */ if (test_and_set_bit(AS_MM_ALL_LOCKS, &mapping->flags)) BUG(); - mutex_lock_nest_lock(&mapping->i_mmap_mutex, &mm->mmap_sem); + down_write_nest_lock(&mapping->i_mmap_rwsem, &mm->mmap_sem); } } @@ -3113,7 +3113,7 @@ static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping) * vma in this mm is backed by the same anon_vma or address_space. * * We can take all the locks in random order because the VM code - * taking i_mmap_mutex or anon_vma->rwsem outside the mmap_sem never + * taking i_mmap_rwsem or anon_vma->rwsem outside the mmap_sem never * takes more than one of them in a row. Secondly we're protected * against a concurrent mm_take_all_locks() by the mm_all_locks_mutex. * diff --git a/mm/mremap.c b/mm/mremap.c index 426b448d6447..84aa36f9f308 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -99,7 +99,7 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, spinlock_t *old_ptl, *new_ptl; /* - * When need_rmap_locks is true, we take the i_mmap_mutex and anon_vma + * When need_rmap_locks is true, we take the i_mmap_rwsem and anon_vma * locks to ensure that rmap will always observe either the old or the * new ptes. This is the easiest way to avoid races with * truncate_pagecache(), page migration, etc... diff --git a/mm/rmap.c b/mm/rmap.c index bea03f6bec61..18247f89f1a8 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -23,7 +23,7 @@ * inode->i_mutex (while writing or truncating, not reading or faulting) * mm->mmap_sem * page->flags PG_locked (lock_page) - * mapping->i_mmap_mutex + * mapping->i_mmap_rwsem * anon_vma->rwsem * mm->page_table_lock or pte_lock * zone->lru_lock (in mark_page_accessed, isolate_lru_page) @@ -1260,7 +1260,7 @@ out_mlock: /* * We need mmap_sem locking, Otherwise VM_LOCKED check makes * unstable result and race. Plus, We can't wait here because - * we now hold anon_vma->rwsem or mapping->i_mmap_mutex. + * we now hold anon_vma->rwsem or mapping->i_mmap_rwsem. * if trylock failed, the page remain in evictable lru and later * vmscan could retry to move the page to unevictable lru if the * page is actually mlocked. @@ -1684,7 +1684,7 @@ static int rmap_walk_file(struct page *page, struct rmap_walk_control *rwc) * The page lock not only makes sure that page->mapping cannot * suddenly be NULLified by truncation, it makes sure that the * structure at mapping cannot be freed and reused yet, - * so we can safely take mapping->i_mmap_mutex. + * so we can safely take mapping->i_mmap_rwsem. */ VM_BUG_ON_PAGE(!PageLocked(page), page); -- cgit v1.2.3 From 4a23717a236b2ab31efb1651f586126789fc997f Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Fri, 12 Dec 2014 16:54:30 -0800 Subject: uprobes: share the i_mmap_rwsem Both register and unregister call build_map_info() in order to create the list of mappings before installing or removing breakpoints for every mm which maps file backed memory. As such, there is no reason to hold the i_mmap_rwsem exclusively, so share it and allow concurrent readers to build the mapping data. Signed-off-by: Davidlohr Bueso Acked-by: Srikar Dronamraju Acked-by: "Kirill A. Shutemov" Cc: Oleg Nesterov Acked-by: Hugh Dickins Acked-by: Peter Zijlstra (Intel) Cc: Rik van Riel Acked-by: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/events/uprobes.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 1901dbfa7ce0..995a95f61a19 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -724,7 +724,7 @@ build_map_info(struct address_space *mapping, loff_t offset, bool is_register) int more = 0; again: - i_mmap_lock_write(mapping); + i_mmap_lock_read(mapping); vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) { if (!valid_vma(vma, is_register)) continue; @@ -755,7 +755,7 @@ build_map_info(struct address_space *mapping, loff_t offset, bool is_register) info->mm = vma->vm_mm; info->vaddr = offset_to_vaddr(vma, offset); } - i_mmap_unlock_write(mapping); + i_mmap_unlock_read(mapping); if (!more) goto out; -- cgit v1.2.3 From 9a92a6ce6f842713ccd0025c5228fe8bea61234c Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Fri, 12 Dec 2014 16:55:58 -0800 Subject: stacktrace: introduce snprint_stack_trace for buffer output Current stacktrace only have the function for console output. page_owner that will be introduced in following patch needs to print the output of stacktrace into the buffer for our own output format so so new function, snprint_stack_trace(), is needed. Signed-off-by: Joonsoo Kim Cc: Mel Gorman Cc: Johannes Weiner Cc: Minchan Kim Cc: Dave Hansen Cc: Michal Nazarewicz Cc: Jungsoo Son Cc: Ingo Molnar Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/stacktrace.h | 5 +++++ kernel/stacktrace.c | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) (limited to 'kernel') diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h index 115b570e3bff..669045ab73f3 100644 --- a/include/linux/stacktrace.h +++ b/include/linux/stacktrace.h @@ -1,6 +1,8 @@ #ifndef __LINUX_STACKTRACE_H #define __LINUX_STACKTRACE_H +#include + struct task_struct; struct pt_regs; @@ -20,6 +22,8 @@ extern void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace); extern void print_stack_trace(struct stack_trace *trace, int spaces); +extern int snprint_stack_trace(char *buf, size_t size, + struct stack_trace *trace, int spaces); #ifdef CONFIG_USER_STACKTRACE_SUPPORT extern void save_stack_trace_user(struct stack_trace *trace); @@ -32,6 +36,7 @@ extern void save_stack_trace_user(struct stack_trace *trace); # define save_stack_trace_tsk(tsk, trace) do { } while (0) # define save_stack_trace_user(trace) do { } while (0) # define print_stack_trace(trace, spaces) do { } while (0) +# define snprint_stack_trace(buf, size, trace, spaces) do { } while (0) #endif #endif diff --git a/kernel/stacktrace.c b/kernel/stacktrace.c index 00fe55cc5a82..b6e4c16377c7 100644 --- a/kernel/stacktrace.c +++ b/kernel/stacktrace.c @@ -25,6 +25,38 @@ void print_stack_trace(struct stack_trace *trace, int spaces) } EXPORT_SYMBOL_GPL(print_stack_trace); +int snprint_stack_trace(char *buf, size_t size, + struct stack_trace *trace, int spaces) +{ + int i; + unsigned long ip; + int generated; + int total = 0; + + if (WARN_ON(!trace->entries)) + return 0; + + for (i = 0; i < trace->nr_entries; i++) { + ip = trace->entries[i]; + generated = snprintf(buf, size, "%*c[<%p>] %pS\n", + 1 + spaces, ' ', (void *) ip, (void *) ip); + + total += generated; + + /* Assume that generated isn't a negative number */ + if (generated >= size) { + buf += size; + size = 0; + } else { + buf += generated; + size -= generated; + } + } + + return total; +} +EXPORT_SYMBOL_GPL(snprint_stack_trace); + /* * Architectures that do not implement save_stack_trace_tsk or * save_stack_trace_regs get this weak alias and a once-per-bootup warning -- cgit v1.2.3 From 51f39a1f0cea1cacf8c787f652f26dfee9611874 Mon Sep 17 00:00:00 2001 From: David Drysdale Date: Fri, 12 Dec 2014 16:57:29 -0800 Subject: syscalls: implement execveat() system call This patchset adds execveat(2) for x86, and is derived from Meredydd Luff's patch from Sept 2012 (https://lkml.org/lkml/2012/9/11/528). The primary aim of adding an execveat syscall is to allow an implementation of fexecve(3) that does not rely on the /proc filesystem, at least for executables (rather than scripts). The current glibc version of fexecve(3) is implemented via /proc, which causes problems in sandboxed or otherwise restricted environments. Given the desire for a /proc-free fexecve() implementation, HPA suggested (https://lkml.org/lkml/2006/7/11/556) that an execveat(2) syscall would be an appropriate generalization. Also, having a new syscall means that it can take a flags argument without back-compatibility concerns. The current implementation just defines the AT_EMPTY_PATH and AT_SYMLINK_NOFOLLOW flags, but other flags could be added in future -- for example, flags for new namespaces (as suggested at https://lkml.org/lkml/2006/7/11/474). Related history: - https://lkml.org/lkml/2006/12/27/123 is an example of someone realizing that fexecve() is likely to fail in a chroot environment. - http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=514043 covered documenting the /proc requirement of fexecve(3) in its manpage, to "prevent other people from wasting their time". - https://bugzilla.redhat.com/show_bug.cgi?id=241609 described a problem where a process that did setuid() could not fexecve() because it no longer had access to /proc/self/fd; this has since been fixed. This patch (of 4): Add a new execveat(2) system call. execveat() is to execve() as openat() is to open(): it takes a file descriptor that refers to a directory, and resolves the filename relative to that. In addition, if the filename is empty and AT_EMPTY_PATH is specified, execveat() executes the file to which the file descriptor refers. This replicates the functionality of fexecve(), which is a system call in other UNIXen, but in Linux glibc it depends on opening "/proc/self/fd/" (and so relies on /proc being mounted). The filename fed to the executed program as argv[0] (or the name of the script fed to a script interpreter) will be of the form "/dev/fd/" (for an empty filename) or "/dev/fd//", effectively reflecting how the executable was found. This does however mean that execution of a script in a /proc-less environment won't work; also, script execution via an O_CLOEXEC file descriptor fails (as the file will not be accessible after exec). Based on patches by Meredydd Luff. Signed-off-by: David Drysdale Cc: Meredydd Luff Cc: Shuah Khan Cc: "Eric W. Biederman" Cc: Andy Lutomirski Cc: Alexander Viro Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Kees Cook Cc: Arnd Bergmann Cc: Rich Felker Cc: Christoph Hellwig Cc: Michael Kerrisk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/binfmt_em86.c | 4 ++ fs/binfmt_misc.c | 4 ++ fs/binfmt_script.c | 10 ++++ fs/exec.c | 113 +++++++++++++++++++++++++++++++++----- fs/namei.c | 2 +- include/linux/binfmts.h | 4 ++ include/linux/compat.h | 3 + include/linux/fs.h | 1 + include/linux/sched.h | 4 ++ include/linux/syscalls.h | 5 ++ include/uapi/asm-generic/unistd.h | 4 +- kernel/sys_ni.c | 3 + lib/audit.c | 3 + 13 files changed, 145 insertions(+), 15 deletions(-) (limited to 'kernel') diff --git a/fs/binfmt_em86.c b/fs/binfmt_em86.c index f37b08cea1f7..490538536cb4 100644 --- a/fs/binfmt_em86.c +++ b/fs/binfmt_em86.c @@ -42,6 +42,10 @@ static int load_em86(struct linux_binprm *bprm) return -ENOEXEC; } + /* Need to be able to load the file after exec */ + if (bprm->interp_flags & BINPRM_FLAGS_PATH_INACCESSIBLE) + return -ENOENT; + allow_write_access(bprm->file); fput(bprm->file); bprm->file = NULL; diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index 70789e198dea..c04ef1d4f18a 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -144,6 +144,10 @@ static int load_misc_binary(struct linux_binprm *bprm) if (!fmt) goto ret; + /* Need to be able to load the file after exec */ + if (bprm->interp_flags & BINPRM_FLAGS_PATH_INACCESSIBLE) + return -ENOENT; + if (!(fmt->flags & MISC_FMT_PRESERVE_ARGV0)) { retval = remove_arg_zero(bprm); if (retval) diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c index 5027a3e14922..afdf4e3cafc2 100644 --- a/fs/binfmt_script.c +++ b/fs/binfmt_script.c @@ -24,6 +24,16 @@ static int load_script(struct linux_binprm *bprm) if ((bprm->buf[0] != '#') || (bprm->buf[1] != '!')) return -ENOEXEC; + + /* + * If the script filename will be inaccessible after exec, typically + * because it is a "/dev/fd//.." path against an O_CLOEXEC fd, give + * up now (on the assumption that the interpreter will want to load + * this file). + */ + if (bprm->interp_flags & BINPRM_FLAGS_PATH_INACCESSIBLE) + return -ENOENT; + /* * This section does the #! interpretation. * Sorta complicated, but hopefully it will work. -TYT diff --git a/fs/exec.c b/fs/exec.c index 01aebe300200..ad8798e26be9 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -748,18 +748,25 @@ EXPORT_SYMBOL(setup_arg_pages); #endif /* CONFIG_MMU */ -static struct file *do_open_exec(struct filename *name) +static struct file *do_open_execat(int fd, struct filename *name, int flags) { struct file *file; int err; - static const struct open_flags open_exec_flags = { + struct open_flags open_exec_flags = { .open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC, .acc_mode = MAY_EXEC | MAY_OPEN, .intent = LOOKUP_OPEN, .lookup_flags = LOOKUP_FOLLOW, }; - file = do_filp_open(AT_FDCWD, name, &open_exec_flags); + if ((flags & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) != 0) + return ERR_PTR(-EINVAL); + if (flags & AT_SYMLINK_NOFOLLOW) + open_exec_flags.lookup_flags &= ~LOOKUP_FOLLOW; + if (flags & AT_EMPTY_PATH) + open_exec_flags.lookup_flags |= LOOKUP_EMPTY; + + file = do_filp_open(fd, name, &open_exec_flags); if (IS_ERR(file)) goto out; @@ -770,12 +777,13 @@ static struct file *do_open_exec(struct filename *name) if (file->f_path.mnt->mnt_flags & MNT_NOEXEC) goto exit; - fsnotify_open(file); - err = deny_write_access(file); if (err) goto exit; + if (name->name[0] != '\0') + fsnotify_open(file); + out: return file; @@ -787,7 +795,7 @@ exit: struct file *open_exec(const char *name) { struct filename tmp = { .name = name }; - return do_open_exec(&tmp); + return do_open_execat(AT_FDCWD, &tmp, 0); } EXPORT_SYMBOL(open_exec); @@ -1428,10 +1436,12 @@ static int exec_binprm(struct linux_binprm *bprm) /* * sys_execve() executes a new program. */ -static int do_execve_common(struct filename *filename, - struct user_arg_ptr argv, - struct user_arg_ptr envp) +static int do_execveat_common(int fd, struct filename *filename, + struct user_arg_ptr argv, + struct user_arg_ptr envp, + int flags) { + char *pathbuf = NULL; struct linux_binprm *bprm; struct file *file; struct files_struct *displaced; @@ -1472,7 +1482,7 @@ static int do_execve_common(struct filename *filename, check_unsafe_exec(bprm); current->in_execve = 1; - file = do_open_exec(filename); + file = do_open_execat(fd, filename, flags); retval = PTR_ERR(file); if (IS_ERR(file)) goto out_unmark; @@ -1480,7 +1490,28 @@ static int do_execve_common(struct filename *filename, sched_exec(); bprm->file = file; - bprm->filename = bprm->interp = filename->name; + if (fd == AT_FDCWD || filename->name[0] == '/') { + bprm->filename = filename->name; + } else { + if (filename->name[0] == '\0') + pathbuf = kasprintf(GFP_TEMPORARY, "/dev/fd/%d", fd); + else + pathbuf = kasprintf(GFP_TEMPORARY, "/dev/fd/%d/%s", + fd, filename->name); + if (!pathbuf) { + retval = -ENOMEM; + goto out_unmark; + } + /* + * Record that a name derived from an O_CLOEXEC fd will be + * inaccessible after exec. Relies on having exclusive access to + * current->files (due to unshare_files above). + */ + if (close_on_exec(fd, rcu_dereference_raw(current->files->fdt))) + bprm->interp_flags |= BINPRM_FLAGS_PATH_INACCESSIBLE; + bprm->filename = pathbuf; + } + bprm->interp = bprm->filename; retval = bprm_mm_init(bprm); if (retval) @@ -1521,6 +1552,7 @@ static int do_execve_common(struct filename *filename, acct_update_integrals(current); task_numa_free(current); free_bprm(bprm); + kfree(pathbuf); putname(filename); if (displaced) put_files_struct(displaced); @@ -1538,6 +1570,7 @@ out_unmark: out_free: free_bprm(bprm); + kfree(pathbuf); out_files: if (displaced) @@ -1553,7 +1586,18 @@ int do_execve(struct filename *filename, { struct user_arg_ptr argv = { .ptr.native = __argv }; struct user_arg_ptr envp = { .ptr.native = __envp }; - return do_execve_common(filename, argv, envp); + return do_execveat_common(AT_FDCWD, filename, argv, envp, 0); +} + +int do_execveat(int fd, struct filename *filename, + const char __user *const __user *__argv, + const char __user *const __user *__envp, + int flags) +{ + struct user_arg_ptr argv = { .ptr.native = __argv }; + struct user_arg_ptr envp = { .ptr.native = __envp }; + + return do_execveat_common(fd, filename, argv, envp, flags); } #ifdef CONFIG_COMPAT @@ -1569,7 +1613,23 @@ static int compat_do_execve(struct filename *filename, .is_compat = true, .ptr.compat = __envp, }; - return do_execve_common(filename, argv, envp); + return do_execveat_common(AT_FDCWD, filename, argv, envp, 0); +} + +static int compat_do_execveat(int fd, struct filename *filename, + const compat_uptr_t __user *__argv, + const compat_uptr_t __user *__envp, + int flags) +{ + struct user_arg_ptr argv = { + .is_compat = true, + .ptr.compat = __argv, + }; + struct user_arg_ptr envp = { + .is_compat = true, + .ptr.compat = __envp, + }; + return do_execveat_common(fd, filename, argv, envp, flags); } #endif @@ -1609,6 +1669,20 @@ SYSCALL_DEFINE3(execve, { return do_execve(getname(filename), argv, envp); } + +SYSCALL_DEFINE5(execveat, + int, fd, const char __user *, filename, + const char __user *const __user *, argv, + const char __user *const __user *, envp, + int, flags) +{ + int lookup_flags = (flags & AT_EMPTY_PATH) ? LOOKUP_EMPTY : 0; + + return do_execveat(fd, + getname_flags(filename, lookup_flags, NULL), + argv, envp, flags); +} + #ifdef CONFIG_COMPAT COMPAT_SYSCALL_DEFINE3(execve, const char __user *, filename, const compat_uptr_t __user *, argv, @@ -1616,4 +1690,17 @@ COMPAT_SYSCALL_DEFINE3(execve, const char __user *, filename, { return compat_do_execve(getname(filename), argv, envp); } + +COMPAT_SYSCALL_DEFINE5(execveat, int, fd, + const char __user *, filename, + const compat_uptr_t __user *, argv, + const compat_uptr_t __user *, envp, + int, flags) +{ + int lookup_flags = (flags & AT_EMPTY_PATH) ? LOOKUP_EMPTY : 0; + + return compat_do_execveat(fd, + getname_flags(filename, lookup_flags, NULL), + argv, envp, flags); +} #endif diff --git a/fs/namei.c b/fs/namei.c index db5fe86319e6..ca814165d84c 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -130,7 +130,7 @@ void final_putname(struct filename *name) #define EMBEDDED_NAME_MAX (PATH_MAX - sizeof(struct filename)) -static struct filename * +struct filename * getname_flags(const char __user *filename, int flags, int *empty) { struct filename *result, *err; diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 61f29e5ea840..576e4639ca60 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -53,6 +53,10 @@ struct linux_binprm { #define BINPRM_FLAGS_EXECFD_BIT 1 #define BINPRM_FLAGS_EXECFD (1 << BINPRM_FLAGS_EXECFD_BIT) +/* filename of the binary will be inaccessible after exec */ +#define BINPRM_FLAGS_PATH_INACCESSIBLE_BIT 2 +#define BINPRM_FLAGS_PATH_INACCESSIBLE (1 << BINPRM_FLAGS_PATH_INACCESSIBLE_BIT) + /* Function parameter for binfmt->coredump */ struct coredump_params { const siginfo_t *siginfo; diff --git a/include/linux/compat.h b/include/linux/compat.h index e6494261eaff..7450ca2ac1fc 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -357,6 +357,9 @@ asmlinkage long compat_sys_lseek(unsigned int, compat_off_t, unsigned int); asmlinkage long compat_sys_execve(const char __user *filename, const compat_uptr_t __user *argv, const compat_uptr_t __user *envp); +asmlinkage long compat_sys_execveat(int dfd, const char __user *filename, + const compat_uptr_t __user *argv, + const compat_uptr_t __user *envp, int flags); asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp, compat_ulong_t __user *outp, compat_ulong_t __user *exp, diff --git a/include/linux/fs.h b/include/linux/fs.h index 1d1838de6882..4193a0bd99b0 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2096,6 +2096,7 @@ extern int vfs_open(const struct path *, struct file *, const struct cred *); extern struct file * dentry_open(const struct path *, int, const struct cred *); extern int filp_close(struct file *, fl_owner_t id); +extern struct filename *getname_flags(const char __user *, int, int *); extern struct filename *getname(const char __user *); extern struct filename *getname_kernel(const char *); diff --git a/include/linux/sched.h b/include/linux/sched.h index 4cfdbcf8cf56..8db31ef98d2f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2485,6 +2485,10 @@ extern void do_group_exit(int); extern int do_execve(struct filename *, const char __user * const __user *, const char __user * const __user *); +extern int do_execveat(int, struct filename *, + const char __user * const __user *, + const char __user * const __user *, + int); extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *); struct task_struct *fork_idle(int); extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags); diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index c9afdc7a7f84..85893d744901 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -877,4 +877,9 @@ asmlinkage long sys_seccomp(unsigned int op, unsigned int flags, asmlinkage long sys_getrandom(char __user *buf, size_t count, unsigned int flags); asmlinkage long sys_bpf(int cmd, union bpf_attr *attr, unsigned int size); + +asmlinkage long sys_execveat(int dfd, const char __user *filename, + const char __user *const __user *argv, + const char __user *const __user *envp, int flags); + #endif diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index 22749c134117..e016bd9b1a04 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -707,9 +707,11 @@ __SYSCALL(__NR_getrandom, sys_getrandom) __SYSCALL(__NR_memfd_create, sys_memfd_create) #define __NR_bpf 280 __SYSCALL(__NR_bpf, sys_bpf) +#define __NR_execveat 281 +__SC_COMP(__NR_execveat, sys_execveat, compat_sys_execveat) #undef __NR_syscalls -#define __NR_syscalls 281 +#define __NR_syscalls 282 /* * All syscalls below here should go away really, diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 61eea02b53f5..5adcb0ae3a58 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -226,3 +226,6 @@ cond_syscall(sys_seccomp); /* access BPF programs and maps */ cond_syscall(sys_bpf); + +/* execveat */ +cond_syscall(sys_execveat); diff --git a/lib/audit.c b/lib/audit.c index 1d726a22565b..b8fb5ee81e26 100644 --- a/lib/audit.c +++ b/lib/audit.c @@ -53,6 +53,9 @@ int audit_classify_syscall(int abi, unsigned syscall) #ifdef __NR_socketcall case __NR_socketcall: return 4; +#endif +#ifdef __NR_execveat + case __NR_execveat: #endif case __NR_execve: return 5; -- cgit v1.2.3 From d5393955c33e09c72695acabfcecf00ef6d289b0 Mon Sep 17 00:00:00 2001 From: Masanari Iida Date: Fri, 12 Dec 2014 16:57:41 -0800 Subject: kexec: remove unnecessary KERN_ERR from kexec.c Remove unnecessary KERN_ERR from pr_err() within kexec.c. Signed-off-by: Masanari Iida Acked-by: Vivek Goyal Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/kexec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/kexec.c b/kernel/kexec.c index 2abf9f6e9a61..9a8a01abbaed 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -600,7 +600,7 @@ kimage_file_alloc_init(struct kimage **rimage, int kernel_fd, if (!kexec_on_panic) { image->swap_page = kimage_alloc_control_pages(image, 0); if (!image->swap_page) { - pr_err(KERN_ERR "Could not allocate swap buffer\n"); + pr_err("Could not allocate swap buffer\n"); goto out_free_control_pages; } } -- cgit v1.2.3 From 957e3facd147510f2cf8780e38606f1d707f0e33 Mon Sep 17 00:00:00 2001 From: Riku Voipio Date: Fri, 12 Dec 2014 16:57:44 -0800 Subject: gcov: enable GCOV_PROFILE_ALL from ARCH Kconfigs Following the suggestions from Andrew Morton and Stephen Rothwell, Dont expand the ARCH list in kernel/gcov/Kconfig. Instead, define a ARCH_HAS_GCOV_PROFILE_ALL bool which architectures can enable. set ARCH_HAS_GCOV_PROFILE_ALL on Architectures where it was previously allowed + ARM64 which I tested. Signed-off-by: Riku Voipio Cc: Peter Oberparleiter Cc: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/Kconfig | 1 + arch/arm64/Kconfig | 1 + arch/microblaze/Kconfig | 1 + arch/powerpc/Kconfig | 1 + arch/s390/Kconfig | 1 + arch/sh/Kconfig | 1 + arch/x86/Kconfig | 1 + kernel/gcov/Kconfig | 5 ++++- 8 files changed, 11 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index c8424a85bc04..2160091d9c15 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -5,6 +5,7 @@ config ARM select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAVE_CUSTOM_GPIO_H + select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_USE_BUILTIN_BSWAP diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 6b1ebd964c10..688db03ef5b8 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -2,6 +2,7 @@ config ARM64 def_bool y select ARCH_BINFMT_ELF_RANDOMIZE_PIE select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE + select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_SG_CHAIN select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_USE_CMPXCHG_LOCKREF diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig index a7736fa0580c..0bce820428fc 100644 --- a/arch/microblaze/Kconfig +++ b/arch/microblaze/Kconfig @@ -1,5 +1,6 @@ config MICROBLAZE def_bool y + select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_WANT_IPC_PARSE_VERSION select ARCH_WANT_OPTIONAL_GPIOLIB diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index af696874248b..a2a168e2dfe7 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -129,6 +129,7 @@ config PPC select HAVE_BPF_JIT if PPC64 select HAVE_ARCH_JUMP_LABEL select ARCH_HAVE_NMI_SAFE_CMPXCHG + select ARCH_HAS_GCOV_PROFILE_ALL select GENERIC_SMP_IDLE_THREAD select GENERIC_CMOS_UPDATE select GENERIC_TIME_VSYSCALL_OLD diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index f2cf1f90295b..68b68d755fdf 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -65,6 +65,7 @@ config S390 def_bool y select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS + select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAVE_NMI_SAFE_CMPXCHG select ARCH_INLINE_READ_LOCK select ARCH_INLINE_READ_LOCK_BH diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index a1403470f80e..c6b6ee5f38b2 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -16,6 +16,7 @@ config SUPERH select HAVE_DEBUG_BUGVERBOSE select ARCH_HAVE_CUSTOM_GPIO_H select ARCH_HAVE_NMI_SAFE_CMPXCHG if (GUSA_RB || CPU_SH4A) + select ARCH_HAS_GCOV_PROFILE_ALL select PERF_USE_VMALLOC select HAVE_DEBUG_KMEMLEAK select HAVE_KERNEL_GZIP diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index bea3a0159496..d69f1cd87fd9 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -24,6 +24,7 @@ config X86 select ARCH_MIGHT_HAVE_ACPI_PDC if ACPI select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS select ARCH_HAS_FAST_MULTIPLIER + select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_MIGHT_HAVE_PC_SERIO select HAVE_AOUT if X86_32 diff --git a/kernel/gcov/Kconfig b/kernel/gcov/Kconfig index 3b7408759bdf..c92e44855ddd 100644 --- a/kernel/gcov/Kconfig +++ b/kernel/gcov/Kconfig @@ -32,10 +32,13 @@ config GCOV_KERNEL Note that the debugfs filesystem has to be mounted to access profiling data. +config ARCH_HAS_GCOV_PROFILE_ALL + def_bool n + config GCOV_PROFILE_ALL bool "Profile entire Kernel" depends on GCOV_KERNEL - depends on SUPERH || S390 || X86 || PPC || MICROBLAZE || ARM || ARM64 + depends on ARCH_HAS_GCOV_PROFILE_ALL default n ---help--- This options activates profiling for the entire kernel. -- cgit v1.2.3 From 0809ab69a2782afac8c4d7f3d35cd123050aab9a Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 12 Dec 2014 16:58:36 -0800 Subject: fsnotify: unify inode and mount marks handling There's a lot of common code in inode and mount marks handling. Factor it out to a common helper function. Signed-off-by: Jan Kara Cc: Eric Paris Cc: Heinrich Schuchardt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/notify/dnotify/dnotify.c | 4 +- fs/notify/fdinfo.c | 6 +- fs/notify/fsnotify.c | 4 +- fs/notify/fsnotify.h | 12 ++++ fs/notify/inode_mark.c | 113 ++++++----------------------------- fs/notify/inotify/inotify_fsnotify.c | 2 +- fs/notify/inotify/inotify_user.c | 10 ++-- fs/notify/mark.c | 89 ++++++++++++++++++++++++++- fs/notify/vfsmount_mark.c | 109 ++++++--------------------------- include/linux/fsnotify_backend.h | 24 ++------ kernel/audit_tree.c | 16 ++--- 11 files changed, 160 insertions(+), 229 deletions(-) (limited to 'kernel') diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c index caaaf9dfe353..44523f4a6084 100644 --- a/fs/notify/dnotify/dnotify.c +++ b/fs/notify/dnotify/dnotify.c @@ -69,8 +69,8 @@ static void dnotify_recalc_inode_mask(struct fsnotify_mark *fsn_mark) if (old_mask == new_mask) return; - if (fsn_mark->i.inode) - fsnotify_recalc_inode_mask(fsn_mark->i.inode); + if (fsn_mark->inode) + fsnotify_recalc_inode_mask(fsn_mark->inode); } /* diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c index 6ffd220eb14d..58b7cdb63da9 100644 --- a/fs/notify/fdinfo.c +++ b/fs/notify/fdinfo.c @@ -80,7 +80,7 @@ static void inotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark) return; inode_mark = container_of(mark, struct inotify_inode_mark, fsn_mark); - inode = igrab(mark->i.inode); + inode = igrab(mark->inode); if (inode) { seq_printf(m, "inotify wd:%x ino:%lx sdev:%x mask:%x ignored_mask:%x ", inode_mark->wd, inode->i_ino, inode->i_sb->s_dev, @@ -112,7 +112,7 @@ static void fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark) mflags |= FAN_MARK_IGNORED_SURV_MODIFY; if (mark->flags & FSNOTIFY_MARK_FLAG_INODE) { - inode = igrab(mark->i.inode); + inode = igrab(mark->inode); if (!inode) return; seq_printf(m, "fanotify ino:%lx sdev:%x mflags:%x mask:%x ignored_mask:%x ", @@ -122,7 +122,7 @@ static void fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark) seq_putc(m, '\n'); iput(inode); } else if (mark->flags & FSNOTIFY_MARK_FLAG_VFSMOUNT) { - struct mount *mnt = real_mount(mark->m.mnt); + struct mount *mnt = real_mount(mark->mnt); seq_printf(m, "fanotify mnt_id:%x mflags:%x mask:%x ignored_mask:%x\n", mnt->mnt_id, mflags, mark->mask, mark->ignored_mask); diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 41e39102743a..dd3fb0b17be7 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -242,13 +242,13 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, if (inode_node) { inode_mark = hlist_entry(srcu_dereference(inode_node, &fsnotify_mark_srcu), - struct fsnotify_mark, i.i_list); + struct fsnotify_mark, obj_list); inode_group = inode_mark->group; } if (vfsmount_node) { vfsmount_mark = hlist_entry(srcu_dereference(vfsmount_node, &fsnotify_mark_srcu), - struct fsnotify_mark, m.m_list); + struct fsnotify_mark, obj_list); vfsmount_group = vfsmount_mark->group; } diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h index 3b68b0ae0a97..13a00be516d2 100644 --- a/fs/notify/fsnotify.h +++ b/fs/notify/fsnotify.h @@ -12,12 +12,19 @@ extern void fsnotify_flush_notify(struct fsnotify_group *group); /* protects reads of inode and vfsmount marks list */ extern struct srcu_struct fsnotify_mark_srcu; +/* Calculate mask of events for a list of marks */ +extern u32 fsnotify_recalc_mask(struct hlist_head *head); + /* compare two groups for sorting of marks lists */ extern int fsnotify_compare_groups(struct fsnotify_group *a, struct fsnotify_group *b); extern void fsnotify_set_inode_mark_mask_locked(struct fsnotify_mark *fsn_mark, __u32 mask); +/* Add mark to a proper place in mark list */ +extern int fsnotify_add_mark_list(struct hlist_head *head, + struct fsnotify_mark *mark, + int allow_dups); /* add a mark to an inode */ extern int fsnotify_add_inode_mark(struct fsnotify_mark *mark, struct fsnotify_group *group, struct inode *inode, @@ -31,6 +38,11 @@ extern int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark, extern void fsnotify_destroy_vfsmount_mark(struct fsnotify_mark *mark); /* inode specific destruction of a mark */ extern void fsnotify_destroy_inode_mark(struct fsnotify_mark *mark); +/* Destroy all marks in the given list */ +extern void fsnotify_destroy_marks(struct list_head *to_free); +/* Find mark belonging to given group in the list of marks */ +extern struct fsnotify_mark *fsnotify_find_mark(struct hlist_head *head, + struct fsnotify_group *group); /* run the list of all marks associated with inode and flag them to be freed */ extern void fsnotify_clear_marks_by_inode(struct inode *inode); /* run the list of all marks associated with vfsmount and flag them to be freed */ diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c index dfbf5447eea4..3daf513ee99e 100644 --- a/fs/notify/inode_mark.c +++ b/fs/notify/inode_mark.c @@ -30,21 +30,6 @@ #include "../internal.h" -/* - * Recalculate the mask of events relevant to a given inode locked. - */ -static void fsnotify_recalc_inode_mask_locked(struct inode *inode) -{ - struct fsnotify_mark *mark; - __u32 new_mask = 0; - - assert_spin_locked(&inode->i_lock); - - hlist_for_each_entry(mark, &inode->i_fsnotify_marks, i.i_list) - new_mask |= mark->mask; - inode->i_fsnotify_mask = new_mask; -} - /* * Recalculate the inode->i_fsnotify_mask, or the mask of all FS_* event types * any notifier is interested in hearing for this inode. @@ -52,7 +37,7 @@ static void fsnotify_recalc_inode_mask_locked(struct inode *inode) void fsnotify_recalc_inode_mask(struct inode *inode) { spin_lock(&inode->i_lock); - fsnotify_recalc_inode_mask_locked(inode); + inode->i_fsnotify_mask = fsnotify_recalc_mask(&inode->i_fsnotify_marks); spin_unlock(&inode->i_lock); __fsnotify_update_child_dentry_flags(inode); @@ -60,23 +45,22 @@ void fsnotify_recalc_inode_mask(struct inode *inode) void fsnotify_destroy_inode_mark(struct fsnotify_mark *mark) { - struct inode *inode = mark->i.inode; + struct inode *inode = mark->inode; BUG_ON(!mutex_is_locked(&mark->group->mark_mutex)); assert_spin_locked(&mark->lock); spin_lock(&inode->i_lock); - hlist_del_init_rcu(&mark->i.i_list); - mark->i.inode = NULL; + hlist_del_init_rcu(&mark->obj_list); + mark->inode = NULL; /* * this mark is now off the inode->i_fsnotify_marks list and we * hold the inode->i_lock, so this is the perfect time to update the * inode->i_fsnotify_mask */ - fsnotify_recalc_inode_mask_locked(inode); - + inode->i_fsnotify_mask = fsnotify_recalc_mask(&inode->i_fsnotify_marks); spin_unlock(&inode->i_lock); } @@ -85,30 +69,19 @@ void fsnotify_destroy_inode_mark(struct fsnotify_mark *mark) */ void fsnotify_clear_marks_by_inode(struct inode *inode) { - struct fsnotify_mark *mark, *lmark; + struct fsnotify_mark *mark; struct hlist_node *n; LIST_HEAD(free_list); spin_lock(&inode->i_lock); - hlist_for_each_entry_safe(mark, n, &inode->i_fsnotify_marks, i.i_list) { - list_add(&mark->i.free_i_list, &free_list); - hlist_del_init_rcu(&mark->i.i_list); + hlist_for_each_entry_safe(mark, n, &inode->i_fsnotify_marks, obj_list) { + list_add(&mark->free_list, &free_list); + hlist_del_init_rcu(&mark->obj_list); fsnotify_get_mark(mark); } spin_unlock(&inode->i_lock); - list_for_each_entry_safe(mark, lmark, &free_list, i.free_i_list) { - struct fsnotify_group *group; - - spin_lock(&mark->lock); - fsnotify_get_group(mark->group); - group = mark->group; - spin_unlock(&mark->lock); - - fsnotify_destroy_mark(mark, group); - fsnotify_put_mark(mark); - fsnotify_put_group(group); - } + fsnotify_destroy_marks(&free_list); } /* @@ -119,27 +92,6 @@ void fsnotify_clear_inode_marks_by_group(struct fsnotify_group *group) fsnotify_clear_marks_by_group_flags(group, FSNOTIFY_MARK_FLAG_INODE); } -/* - * given a group and inode, find the mark associated with that combination. - * if found take a reference to that mark and return it, else return NULL - */ -static struct fsnotify_mark *fsnotify_find_inode_mark_locked( - struct fsnotify_group *group, - struct inode *inode) -{ - struct fsnotify_mark *mark; - - assert_spin_locked(&inode->i_lock); - - hlist_for_each_entry(mark, &inode->i_fsnotify_marks, i.i_list) { - if (mark->group == group) { - fsnotify_get_mark(mark); - return mark; - } - } - return NULL; -} - /* * given a group and inode, find the mark associated with that combination. * if found take a reference to that mark and return it, else return NULL @@ -150,7 +102,7 @@ struct fsnotify_mark *fsnotify_find_inode_mark(struct fsnotify_group *group, struct fsnotify_mark *mark; spin_lock(&inode->i_lock); - mark = fsnotify_find_inode_mark_locked(group, inode); + mark = fsnotify_find_mark(&inode->i_fsnotify_marks, group); spin_unlock(&inode->i_lock); return mark; @@ -168,10 +120,10 @@ void fsnotify_set_inode_mark_mask_locked(struct fsnotify_mark *mark, assert_spin_locked(&mark->lock); if (mask && - mark->i.inode && + mark->inode && !(mark->flags & FSNOTIFY_MARK_FLAG_OBJECT_PINNED)) { mark->flags |= FSNOTIFY_MARK_FLAG_OBJECT_PINNED; - inode = igrab(mark->i.inode); + inode = igrab(mark->inode); /* * we shouldn't be able to get here if the inode wasn't * already safely held in memory. But bug in case it @@ -192,9 +144,7 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark, struct fsnotify_group *group, struct inode *inode, int allow_dups) { - struct fsnotify_mark *lmark, *last = NULL; - int ret = 0; - int cmp; + int ret; mark->flags |= FSNOTIFY_MARK_FLAG_INODE; @@ -202,37 +152,10 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark, assert_spin_locked(&mark->lock); spin_lock(&inode->i_lock); - - mark->i.inode = inode; - - /* is mark the first mark? */ - if (hlist_empty(&inode->i_fsnotify_marks)) { - hlist_add_head_rcu(&mark->i.i_list, &inode->i_fsnotify_marks); - goto out; - } - - /* should mark be in the middle of the current list? */ - hlist_for_each_entry(lmark, &inode->i_fsnotify_marks, i.i_list) { - last = lmark; - - if ((lmark->group == group) && !allow_dups) { - ret = -EEXIST; - goto out; - } - - cmp = fsnotify_compare_groups(lmark->group, mark->group); - if (cmp < 0) - continue; - - hlist_add_before_rcu(&mark->i.i_list, &lmark->i.i_list); - goto out; - } - - BUG_ON(last == NULL); - /* mark should be the last entry. last is the current last entry */ - hlist_add_behind_rcu(&mark->i.i_list, &last->i.i_list); -out: - fsnotify_recalc_inode_mask_locked(inode); + mark->inode = inode; + ret = fsnotify_add_mark_list(&inode->i_fsnotify_marks, mark, + allow_dups); + inode->i_fsnotify_mask = fsnotify_recalc_mask(&inode->i_fsnotify_marks); spin_unlock(&inode->i_lock); return ret; diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c index 7d888d77d59a..2cd900c2c737 100644 --- a/fs/notify/inotify/inotify_fsnotify.c +++ b/fs/notify/inotify/inotify_fsnotify.c @@ -156,7 +156,7 @@ static int idr_callback(int id, void *p, void *data) */ if (fsn_mark) printk(KERN_WARNING "fsn_mark->group=%p inode=%p wd=%d\n", - fsn_mark->group, fsn_mark->i.inode, i_mark->wd); + fsn_mark->group, fsn_mark->inode, i_mark->wd); return 0; } diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 283aa312d745..450648697433 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -433,7 +433,7 @@ static void inotify_remove_from_idr(struct fsnotify_group *group, if (wd == -1) { WARN_ONCE(1, "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p" " i_mark->inode=%p\n", __func__, i_mark, i_mark->wd, - i_mark->fsn_mark.group, i_mark->fsn_mark.i.inode); + i_mark->fsn_mark.group, i_mark->fsn_mark.inode); goto out; } @@ -442,7 +442,7 @@ static void inotify_remove_from_idr(struct fsnotify_group *group, if (unlikely(!found_i_mark)) { WARN_ONCE(1, "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p" " i_mark->inode=%p\n", __func__, i_mark, i_mark->wd, - i_mark->fsn_mark.group, i_mark->fsn_mark.i.inode); + i_mark->fsn_mark.group, i_mark->fsn_mark.inode); goto out; } @@ -456,9 +456,9 @@ static void inotify_remove_from_idr(struct fsnotify_group *group, "mark->inode=%p found_i_mark=%p found_i_mark->wd=%d " "found_i_mark->group=%p found_i_mark->inode=%p\n", __func__, i_mark, i_mark->wd, i_mark->fsn_mark.group, - i_mark->fsn_mark.i.inode, found_i_mark, found_i_mark->wd, + i_mark->fsn_mark.inode, found_i_mark, found_i_mark->wd, found_i_mark->fsn_mark.group, - found_i_mark->fsn_mark.i.inode); + found_i_mark->fsn_mark.inode); goto out; } @@ -470,7 +470,7 @@ static void inotify_remove_from_idr(struct fsnotify_group *group, if (unlikely(atomic_read(&i_mark->fsn_mark.refcnt) < 3)) { printk(KERN_ERR "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p" " i_mark->inode=%p\n", __func__, i_mark, i_mark->wd, - i_mark->fsn_mark.group, i_mark->fsn_mark.i.inode); + i_mark->fsn_mark.group, i_mark->fsn_mark.inode); /* we can't really recover with bad ref cnting.. */ BUG(); } diff --git a/fs/notify/mark.c b/fs/notify/mark.c index 34c38fabf514..3942d5c9eb8d 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -110,6 +110,17 @@ void fsnotify_put_mark(struct fsnotify_mark *mark) } } +/* Calculate mask of events for a list of marks */ +u32 fsnotify_recalc_mask(struct hlist_head *head) +{ + u32 new_mask = 0; + struct fsnotify_mark *mark; + + hlist_for_each_entry(mark, head, obj_list) + new_mask |= mark->mask; + return new_mask; +} + /* * Any time a mark is getting freed we end up here. * The caller had better be holding a reference to this mark so we don't actually @@ -133,7 +144,7 @@ void fsnotify_destroy_mark_locked(struct fsnotify_mark *mark, mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE; if (mark->flags & FSNOTIFY_MARK_FLAG_INODE) { - inode = mark->i.inode; + inode = mark->inode; fsnotify_destroy_inode_mark(mark); } else if (mark->flags & FSNOTIFY_MARK_FLAG_VFSMOUNT) fsnotify_destroy_vfsmount_mark(mark); @@ -192,6 +203,27 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark, mutex_unlock(&group->mark_mutex); } +/* + * Destroy all marks in the given list. The marks must be already detached from + * the original inode / vfsmount. + */ +void fsnotify_destroy_marks(struct list_head *to_free) +{ + struct fsnotify_mark *mark, *lmark; + struct fsnotify_group *group; + + list_for_each_entry_safe(mark, lmark, to_free, free_list) { + spin_lock(&mark->lock); + fsnotify_get_group(mark->group); + group = mark->group; + spin_unlock(&mark->lock); + + fsnotify_destroy_mark(mark, group); + fsnotify_put_mark(mark); + fsnotify_put_group(group); + } +} + void fsnotify_set_mark_mask_locked(struct fsnotify_mark *mark, __u32 mask) { assert_spin_locked(&mark->lock); @@ -245,6 +277,39 @@ int fsnotify_compare_groups(struct fsnotify_group *a, struct fsnotify_group *b) return -1; } +/* Add mark into proper place in given list of marks */ +int fsnotify_add_mark_list(struct hlist_head *head, struct fsnotify_mark *mark, + int allow_dups) +{ + struct fsnotify_mark *lmark, *last = NULL; + int cmp; + + /* is mark the first mark? */ + if (hlist_empty(head)) { + hlist_add_head_rcu(&mark->obj_list, head); + return 0; + } + + /* should mark be in the middle of the current list? */ + hlist_for_each_entry(lmark, head, obj_list) { + last = lmark; + + if ((lmark->group == mark->group) && !allow_dups) + return -EEXIST; + + cmp = fsnotify_compare_groups(lmark->group, mark->group); + if (cmp >= 0) { + hlist_add_before_rcu(&mark->obj_list, &lmark->obj_list); + return 0; + } + } + + BUG_ON(last == NULL); + /* mark should be the last entry. last is the current last entry */ + hlist_add_behind_rcu(&mark->obj_list, &last->obj_list); + return 0; +} + /* * Attach an initialized mark to a given group and fs object. * These marks may be used for the fsnotify backend to determine which @@ -322,6 +387,24 @@ int fsnotify_add_mark(struct fsnotify_mark *mark, struct fsnotify_group *group, return ret; } +/* + * Given a list of marks, find the mark associated with given group. If found + * take a reference to that mark and return it, else return NULL. + */ +struct fsnotify_mark *fsnotify_find_mark(struct hlist_head *head, + struct fsnotify_group *group) +{ + struct fsnotify_mark *mark; + + hlist_for_each_entry(mark, head, obj_list) { + if (mark->group == group) { + fsnotify_get_mark(mark); + return mark; + } + } + return NULL; +} + /* * clear any marks in a group in which mark->flags & flags is true */ @@ -352,8 +435,8 @@ void fsnotify_clear_marks_by_group(struct fsnotify_group *group) void fsnotify_duplicate_mark(struct fsnotify_mark *new, struct fsnotify_mark *old) { assert_spin_locked(&old->lock); - new->i.inode = old->i.inode; - new->m.mnt = old->m.mnt; + new->inode = old->inode; + new->mnt = old->mnt; if (old->group) fsnotify_get_group(old->group); new->group = old->group; diff --git a/fs/notify/vfsmount_mark.c b/fs/notify/vfsmount_mark.c index faefa72a11eb..326b148e623c 100644 --- a/fs/notify/vfsmount_mark.c +++ b/fs/notify/vfsmount_mark.c @@ -32,31 +32,20 @@ void fsnotify_clear_marks_by_mount(struct vfsmount *mnt) { - struct fsnotify_mark *mark, *lmark; + struct fsnotify_mark *mark; struct hlist_node *n; struct mount *m = real_mount(mnt); LIST_HEAD(free_list); spin_lock(&mnt->mnt_root->d_lock); - hlist_for_each_entry_safe(mark, n, &m->mnt_fsnotify_marks, m.m_list) { - list_add(&mark->m.free_m_list, &free_list); - hlist_del_init_rcu(&mark->m.m_list); + hlist_for_each_entry_safe(mark, n, &m->mnt_fsnotify_marks, obj_list) { + list_add(&mark->free_list, &free_list); + hlist_del_init_rcu(&mark->obj_list); fsnotify_get_mark(mark); } spin_unlock(&mnt->mnt_root->d_lock); - list_for_each_entry_safe(mark, lmark, &free_list, m.free_m_list) { - struct fsnotify_group *group; - - spin_lock(&mark->lock); - fsnotify_get_group(mark->group); - group = mark->group; - spin_unlock(&mark->lock); - - fsnotify_destroy_mark(mark, group); - fsnotify_put_mark(mark); - fsnotify_put_group(group); - } + fsnotify_destroy_marks(&free_list); } void fsnotify_clear_vfsmount_marks_by_group(struct fsnotify_group *group) @@ -64,67 +53,36 @@ void fsnotify_clear_vfsmount_marks_by_group(struct fsnotify_group *group) fsnotify_clear_marks_by_group_flags(group, FSNOTIFY_MARK_FLAG_VFSMOUNT); } -/* - * Recalculate the mask of events relevant to a given vfsmount locked. - */ -static void fsnotify_recalc_vfsmount_mask_locked(struct vfsmount *mnt) -{ - struct mount *m = real_mount(mnt); - struct fsnotify_mark *mark; - __u32 new_mask = 0; - - assert_spin_locked(&mnt->mnt_root->d_lock); - - hlist_for_each_entry(mark, &m->mnt_fsnotify_marks, m.m_list) - new_mask |= mark->mask; - m->mnt_fsnotify_mask = new_mask; -} - /* * Recalculate the mnt->mnt_fsnotify_mask, or the mask of all FS_* event types * any notifier is interested in hearing for this mount point */ void fsnotify_recalc_vfsmount_mask(struct vfsmount *mnt) { + struct mount *m = real_mount(mnt); + spin_lock(&mnt->mnt_root->d_lock); - fsnotify_recalc_vfsmount_mask_locked(mnt); + m->mnt_fsnotify_mask = fsnotify_recalc_mask(&m->mnt_fsnotify_marks); spin_unlock(&mnt->mnt_root->d_lock); } void fsnotify_destroy_vfsmount_mark(struct fsnotify_mark *mark) { - struct vfsmount *mnt = mark->m.mnt; + struct vfsmount *mnt = mark->mnt; + struct mount *m = real_mount(mnt); BUG_ON(!mutex_is_locked(&mark->group->mark_mutex)); assert_spin_locked(&mark->lock); spin_lock(&mnt->mnt_root->d_lock); - hlist_del_init_rcu(&mark->m.m_list); - mark->m.mnt = NULL; - - fsnotify_recalc_vfsmount_mask_locked(mnt); + hlist_del_init_rcu(&mark->obj_list); + mark->mnt = NULL; + m->mnt_fsnotify_mask = fsnotify_recalc_mask(&m->mnt_fsnotify_marks); spin_unlock(&mnt->mnt_root->d_lock); } -static struct fsnotify_mark *fsnotify_find_vfsmount_mark_locked(struct fsnotify_group *group, - struct vfsmount *mnt) -{ - struct mount *m = real_mount(mnt); - struct fsnotify_mark *mark; - - assert_spin_locked(&mnt->mnt_root->d_lock); - - hlist_for_each_entry(mark, &m->mnt_fsnotify_marks, m.m_list) { - if (mark->group == group) { - fsnotify_get_mark(mark); - return mark; - } - } - return NULL; -} - /* * given a group and vfsmount, find the mark associated with that combination. * if found take a reference to that mark and return it, else return NULL @@ -132,10 +90,11 @@ static struct fsnotify_mark *fsnotify_find_vfsmount_mark_locked(struct fsnotify_ struct fsnotify_mark *fsnotify_find_vfsmount_mark(struct fsnotify_group *group, struct vfsmount *mnt) { + struct mount *m = real_mount(mnt); struct fsnotify_mark *mark; spin_lock(&mnt->mnt_root->d_lock); - mark = fsnotify_find_vfsmount_mark_locked(group, mnt); + mark = fsnotify_find_mark(&m->mnt_fsnotify_marks, group); spin_unlock(&mnt->mnt_root->d_lock); return mark; @@ -151,9 +110,7 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark, int allow_dups) { struct mount *m = real_mount(mnt); - struct fsnotify_mark *lmark, *last = NULL; - int ret = 0; - int cmp; + int ret; mark->flags |= FSNOTIFY_MARK_FLAG_VFSMOUNT; @@ -161,37 +118,9 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark, assert_spin_locked(&mark->lock); spin_lock(&mnt->mnt_root->d_lock); - - mark->m.mnt = mnt; - - /* is mark the first mark? */ - if (hlist_empty(&m->mnt_fsnotify_marks)) { - hlist_add_head_rcu(&mark->m.m_list, &m->mnt_fsnotify_marks); - goto out; - } - - /* should mark be in the middle of the current list? */ - hlist_for_each_entry(lmark, &m->mnt_fsnotify_marks, m.m_list) { - last = lmark; - - if ((lmark->group == group) && !allow_dups) { - ret = -EEXIST; - goto out; - } - - cmp = fsnotify_compare_groups(lmark->group, mark->group); - if (cmp < 0) - continue; - - hlist_add_before_rcu(&mark->m.m_list, &lmark->m.m_list); - goto out; - } - - BUG_ON(last == NULL); - /* mark should be the last entry. last is the current last entry */ - hlist_add_behind_rcu(&mark->m.m_list, &last->m.m_list); -out: - fsnotify_recalc_vfsmount_mask_locked(mnt); + mark->mnt = mnt; + ret = fsnotify_add_mark_list(&m->mnt_fsnotify_marks, mark, allow_dups); + m->mnt_fsnotify_mask = fsnotify_recalc_mask(&m->mnt_fsnotify_marks); spin_unlock(&mnt->mnt_root->d_lock); return ret; diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index ca060d7c4fa6..442847a02b8f 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -196,24 +196,6 @@ struct fsnotify_group { #define FSNOTIFY_EVENT_PATH 1 #define FSNOTIFY_EVENT_INODE 2 -/* - * Inode specific fields in an fsnotify_mark - */ -struct fsnotify_inode_mark { - struct inode *inode; /* inode this mark is associated with */ - struct hlist_node i_list; /* list of marks by inode->i_fsnotify_marks */ - struct list_head free_i_list; /* tmp list used when freeing this mark */ -}; - -/* - * Mount point specific fields in an fsnotify_mark - */ -struct fsnotify_vfsmount_mark { - struct vfsmount *mnt; /* vfsmount this mark is associated with */ - struct hlist_node m_list; /* list of marks by inode->i_fsnotify_marks */ - struct list_head free_m_list; /* tmp list used when freeing this mark */ -}; - /* * a mark is simply an object attached to an in core inode which allows an * fsnotify listener to indicate they are either no longer interested in events @@ -232,9 +214,11 @@ struct fsnotify_mark { struct fsnotify_group *group; /* group this mark is for */ struct list_head g_list; /* list of marks by group->i_fsnotify_marks */ spinlock_t lock; /* protect group and inode */ + struct hlist_node obj_list; /* list of marks for inode / vfsmount */ + struct list_head free_list; /* tmp list used when freeing this mark */ union { - struct fsnotify_inode_mark i; - struct fsnotify_vfsmount_mark m; + struct inode *inode; /* inode this mark is associated with */ + struct vfsmount *mnt; /* vfsmount this mark is associated with */ }; __u32 ignored_mask; /* events types to ignore */ #define FSNOTIFY_MARK_FLAG_INODE 0x01 diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index 80f29e015570..2e0c97427b33 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c @@ -174,9 +174,9 @@ static void insert_hash(struct audit_chunk *chunk) struct fsnotify_mark *entry = &chunk->mark; struct list_head *list; - if (!entry->i.inode) + if (!entry->inode) return; - list = chunk_hash(entry->i.inode); + list = chunk_hash(entry->inode); list_add_rcu(&chunk->hash, list); } @@ -188,7 +188,7 @@ struct audit_chunk *audit_tree_lookup(const struct inode *inode) list_for_each_entry_rcu(p, list, hash) { /* mark.inode may have gone NULL, but who cares? */ - if (p->mark.i.inode == inode) { + if (p->mark.inode == inode) { atomic_long_inc(&p->refs); return p; } @@ -231,7 +231,7 @@ static void untag_chunk(struct node *p) new = alloc_chunk(size); spin_lock(&entry->lock); - if (chunk->dead || !entry->i.inode) { + if (chunk->dead || !entry->inode) { spin_unlock(&entry->lock); if (new) free_chunk(new); @@ -258,7 +258,7 @@ static void untag_chunk(struct node *p) goto Fallback; fsnotify_duplicate_mark(&new->mark, entry); - if (fsnotify_add_mark(&new->mark, new->mark.group, new->mark.i.inode, NULL, 1)) { + if (fsnotify_add_mark(&new->mark, new->mark.group, new->mark.inode, NULL, 1)) { fsnotify_put_mark(&new->mark); goto Fallback; } @@ -386,7 +386,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree) chunk_entry = &chunk->mark; spin_lock(&old_entry->lock); - if (!old_entry->i.inode) { + if (!old_entry->inode) { /* old_entry is being shot, lets just lie */ spin_unlock(&old_entry->lock); fsnotify_put_mark(old_entry); @@ -395,7 +395,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree) } fsnotify_duplicate_mark(chunk_entry, old_entry); - if (fsnotify_add_mark(chunk_entry, chunk_entry->group, chunk_entry->i.inode, NULL, 1)) { + if (fsnotify_add_mark(chunk_entry, chunk_entry->group, chunk_entry->inode, NULL, 1)) { spin_unlock(&old_entry->lock); fsnotify_put_mark(chunk_entry); fsnotify_put_mark(old_entry); @@ -611,7 +611,7 @@ void audit_trim_trees(void) list_for_each_entry(node, &tree->chunks, list) { struct audit_chunk *chunk = find_chunk(node); /* this could be NULL if the watch is dying else where... */ - struct inode *inode = chunk->mark.i.inode; + struct inode *inode = chunk->mark.inode; node->index |= 1U<<31; if (iterate_mounts(compare_root, inode, root_mnt)) node->index &= ~(1U<<31); -- cgit v1.2.3