From 32fcfd40715ed13f7a80cbde49d097ddae20c8e2 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 10 Mar 2013 20:14:08 -0400 Subject: make vfree() safe to call from interrupt contexts A bunch of RCU callbacks want to be able to do vfree() and end up with rather kludgy schemes. Just let vfree() do the right thing - put the victim on llist and schedule actual __vunmap() via schedule_work(), so that it runs from non-interrupt context. Signed-off-by: Al Viro --- mm/vmalloc.c | 45 ++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 40 insertions(+), 5 deletions(-) (limited to 'mm') diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 0f751f2068c3..ef9bdf742273 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -27,10 +27,30 @@ #include #include #include +#include #include #include #include +struct vfree_deferred { + struct llist_head list; + struct work_struct wq; +}; +static DEFINE_PER_CPU(struct vfree_deferred, vfree_deferred); + +static void __vunmap(const void *, int); + +static void free_work(struct work_struct *w) +{ + struct vfree_deferred *p = container_of(w, struct vfree_deferred, wq); + struct llist_node *llnode = llist_del_all(&p->list); + while (llnode) { + void *p = llnode; + llnode = llist_next(llnode); + __vunmap(p, 1); + } +} + /*** Page table manipulation functions ***/ static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end) @@ -1184,10 +1204,14 @@ void __init vmalloc_init(void) for_each_possible_cpu(i) { struct vmap_block_queue *vbq; + struct vfree_deferred *p; vbq = &per_cpu(vmap_block_queue, i); spin_lock_init(&vbq->lock); INIT_LIST_HEAD(&vbq->free); + p = &per_cpu(vfree_deferred, i); + init_llist_head(&p->list); + INIT_WORK(&p->wq, free_work); } /* Import existing vmlist entries. */ @@ -1511,7 +1535,7 @@ static void __vunmap(const void *addr, int deallocate_pages) kfree(area); return; } - + /** * vfree - release memory allocated by vmalloc() * @addr: memory base address @@ -1520,15 +1544,25 @@ static void __vunmap(const void *addr, int deallocate_pages) * obtained from vmalloc(), vmalloc_32() or __vmalloc(). If @addr is * NULL, no operation is performed. * - * Must not be called in interrupt context. + * Must not be called in NMI context (strictly speaking, only if we don't + * have CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG, but making the calling + * conventions for vfree() arch-depenedent would be a really bad idea) + * */ void vfree(const void *addr) { - BUG_ON(in_interrupt()); + BUG_ON(in_nmi()); kmemleak_free(addr); - __vunmap(addr, 1); + if (!addr) + return; + if (unlikely(in_interrupt())) { + struct vfree_deferred *p = &__get_cpu_var(vfree_deferred); + llist_add((struct llist_node *)addr, &p->list); + schedule_work(&p->wq); + } else + __vunmap(addr, 1); } EXPORT_SYMBOL(vfree); @@ -1545,7 +1579,8 @@ void vunmap(const void *addr) { BUG_ON(in_interrupt()); might_sleep(); - __vunmap(addr, 0); + if (addr) + __vunmap(addr, 0); } EXPORT_SYMBOL(vunmap); -- cgit v1.2.3 From 8d71db4f0890605d44815a2b2da4ca003f1bb142 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 19 Mar 2013 21:01:03 -0400 Subject: lift sb_start_write/sb_end_write out of ->aio_write() Signed-off-by: Al Viro --- fs/aio.c | 4 ++++ fs/btrfs/file.c | 3 --- fs/cifs/file.c | 3 --- fs/compat.c | 6 ++++-- fs/fuse/file.c | 2 -- fs/ntfs/file.c | 2 -- fs/ocfs2/file.c | 3 --- fs/read_write.c | 8 ++++++-- fs/xfs/xfs_file.c | 3 --- include/linux/fs.h | 14 ++++++++++++++ mm/filemap.c | 2 -- 11 files changed, 28 insertions(+), 22 deletions(-) (limited to 'mm') diff --git a/fs/aio.c b/fs/aio.c index 3f941f2a3059..4ec28f13a92e 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -1324,6 +1324,8 @@ static ssize_t aio_rw_vect_retry(struct kiocb *iocb) if (iocb->ki_pos < 0) return -EINVAL; + if (opcode == IOCB_CMD_PWRITEV) + file_start_write(file); do { ret = rw_op(iocb, &iocb->ki_iovec[iocb->ki_cur_seg], iocb->ki_nr_segs - iocb->ki_cur_seg, @@ -1336,6 +1338,8 @@ static ssize_t aio_rw_vect_retry(struct kiocb *iocb) } while (ret > 0 && iocb->ki_left > 0 && (opcode == IOCB_CMD_PWRITEV || (!S_ISFIFO(inode->i_mode) && !S_ISSOCK(inode->i_mode)))); + if (opcode == IOCB_CMD_PWRITEV) + file_end_write(file); /* This means we must have transferred all that we could */ /* No need to retry anymore */ diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 5b4ea5f55b8f..254aeb72915f 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1514,8 +1514,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, size_t count, ocount; bool sync = (file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host); - sb_start_write(inode->i_sb); - mutex_lock(&inode->i_mutex); err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ); @@ -1617,7 +1615,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, if (sync) atomic_dec(&BTRFS_I(inode)->sync_writers); out: - sb_end_write(inode->i_sb); current->backing_dev_info = NULL; return num_written ? num_written : err; } diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 7a0dd99e4507..2d4a231dd70b 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -2520,8 +2520,6 @@ cifs_writev(struct kiocb *iocb, const struct iovec *iov, BUG_ON(iocb->ki_pos != pos); - sb_start_write(inode->i_sb); - /* * We need to hold the sem to be sure nobody modifies lock list * with a brlock that prevents writing. @@ -2545,7 +2543,6 @@ cifs_writev(struct kiocb *iocb, const struct iovec *iov, } up_read(&cinode->lock_sem); - sb_end_write(inode->i_sb); return rc; } diff --git a/fs/compat.c b/fs/compat.c index d487985dd0ea..daa3b771d64d 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -1103,10 +1103,12 @@ static ssize_t compat_do_readv_writev(int type, struct file *file, fnv = file->f_op->aio_write; } - if (fnv) + if (fnv) { + file_start_write(file); ret = do_sync_readv_writev(file, iov, nr_segs, tot_len, pos, fnv); - else + file_end_write(file); + } else ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn); out: diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 34b80ba95bad..d15c6f21c17f 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -971,7 +971,6 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov, return err; count = ocount; - sb_start_write(inode->i_sb); mutex_lock(&inode->i_mutex); /* We can write back this queue in page reclaim */ @@ -1030,7 +1029,6 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov, out: current->backing_dev_info = NULL; mutex_unlock(&inode->i_mutex); - sb_end_write(inode->i_sb); return written ? written : err; } diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index 5b2d4f0853ac..1da4b81e6f76 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c @@ -2129,7 +2129,6 @@ static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov, BUG_ON(iocb->ki_pos != pos); - sb_start_write(inode->i_sb); mutex_lock(&inode->i_mutex); ret = ntfs_file_aio_write_nolock(iocb, iov, nr_segs, &iocb->ki_pos); mutex_unlock(&inode->i_mutex); @@ -2138,7 +2137,6 @@ static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov, if (err < 0) ret = err; } - sb_end_write(inode->i_sb); return ret; } diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 6474cb44004d..1c93e771e950 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2248,8 +2248,6 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, if (iocb->ki_left == 0) return 0; - sb_start_write(inode->i_sb); - appending = file->f_flags & O_APPEND ? 1 : 0; direct_io = file->f_flags & O_DIRECT ? 1 : 0; @@ -2423,7 +2421,6 @@ out_sems: ocfs2_iocb_clear_sem_locked(iocb); mutex_unlock(&inode->i_mutex); - sb_end_write(inode->i_sb); if (written) ret = written; diff --git a/fs/read_write.c b/fs/read_write.c index f7b5a23b804b..3e1791a2cfd6 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -398,6 +398,7 @@ ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, lof struct kiocb kiocb; ssize_t ret; + file_start_write(filp); init_sync_kiocb(&kiocb, filp); kiocb.ki_pos = *ppos; kiocb.ki_left = len; @@ -413,6 +414,7 @@ ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, lof if (-EIOCBQUEUED == ret) ret = wait_on_sync_kiocb(&kiocb); *ppos = kiocb.ki_pos; + file_end_write(filp); return ret; } @@ -758,10 +760,12 @@ static ssize_t do_readv_writev(int type, struct file *file, fnv = file->f_op->aio_write; } - if (fnv) + if (fnv) { + file_start_write(file); ret = do_sync_readv_writev(file, iov, nr_segs, tot_len, pos, fnv); - else + file_end_write(file); + } else ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn); out: diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index f03bf1a456fb..3800128d2171 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -775,8 +775,6 @@ xfs_file_aio_write( if (ocount == 0) return 0; - sb_start_write(inode->i_sb); - if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { ret = -EIO; goto out; @@ -800,7 +798,6 @@ xfs_file_aio_write( } out: - sb_end_write(inode->i_sb); return ret; } diff --git a/include/linux/fs.h b/include/linux/fs.h index 2c28271ab9d4..578a66e6ee72 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2223,6 +2223,20 @@ static inline struct inode *file_inode(struct file *f) return f->f_inode; } +static inline void file_start_write(struct file *file) +{ + if (!S_ISREG(file_inode(file)->i_mode)) + return; + __sb_start_write(file_inode(file)->i_sb, SB_FREEZE_WRITE, true); +} + +static inline void file_end_write(struct file *file) +{ + if (!S_ISREG(file_inode(file)->i_mode)) + return; + __sb_end_write(file_inode(file)->i_sb, SB_FREEZE_WRITE); +} + /* * get_write_access() gets write permission for a file. * put_write_access() releases this write permission. diff --git a/mm/filemap.c b/mm/filemap.c index e1979fdca805..cbde8842a374 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2528,7 +2528,6 @@ ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, BUG_ON(iocb->ki_pos != pos); - sb_start_write(inode->i_sb); mutex_lock(&inode->i_mutex); ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos); mutex_unlock(&inode->i_mutex); @@ -2540,7 +2539,6 @@ ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, if (err < 0 && ret > 0) ret = err; } - sb_end_write(inode->i_sb); return ret; } EXPORT_SYMBOL(generic_file_aio_write); -- cgit v1.2.3 From 03d95eb2f2578083a3f6286262e1cb5d88a00c02 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 20 Mar 2013 13:04:20 -0400 Subject: lift sb_start_write() out of ->write() Signed-off-by: Al Viro --- drivers/block/loop.c | 2 ++ fs/cachefiles/rdwr.c | 2 ++ fs/coda/file.c | 2 ++ fs/coredump.c | 2 ++ fs/read_write.c | 24 ++++++++++++++---------- fs/splice.c | 2 ++ kernel/acct.c | 2 ++ mm/filemap_xip.c | 3 --- 8 files changed, 26 insertions(+), 13 deletions(-) (limited to 'mm') diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 747bb2af69dc..cd1e17460f03 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -230,9 +230,11 @@ static int __do_lo_send_write(struct file *file, ssize_t bw; mm_segment_t old_fs = get_fs(); + file_start_write(file); set_fs(get_ds()); bw = file->f_op->write(file, buf, len, &pos); set_fs(old_fs); + file_end_write(file); if (likely(bw == len)) return 0; printk(KERN_ERR "loop: Write error at byte offset %llu, length %i.\n", diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index 480992259707..317f9ee9c991 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c @@ -962,12 +962,14 @@ int cachefiles_write_page(struct fscache_storage *op, struct page *page) } data = kmap(page); + file_start_write(file); old_fs = get_fs(); set_fs(KERNEL_DS); ret = file->f_op->write( file, (const void __user *) data, len, &pos); set_fs(old_fs); kunmap(page); + file_end_write(file); if (ret != len) ret = -EIO; } diff --git a/fs/coda/file.c b/fs/coda/file.c index fa4c100bdc7d..380b798f8443 100644 --- a/fs/coda/file.c +++ b/fs/coda/file.c @@ -79,6 +79,7 @@ coda_file_write(struct file *coda_file, const char __user *buf, size_t count, lo return -EINVAL; host_inode = file_inode(host_file); + file_start_write(host_file); mutex_lock(&coda_inode->i_mutex); ret = host_file->f_op->write(host_file, buf, count, ppos); @@ -87,6 +88,7 @@ coda_file_write(struct file *coda_file, const char __user *buf, size_t count, lo coda_inode->i_blocks = (coda_inode->i_size + 511) >> 9; coda_inode->i_mtime = coda_inode->i_ctime = CURRENT_TIME_SEC; mutex_unlock(&coda_inode->i_mutex); + file_end_write(host_file); return ret; } diff --git a/fs/coredump.c b/fs/coredump.c index c6479658d487..288e5c9f9bbe 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -629,9 +629,11 @@ void do_coredump(siginfo_t *siginfo) goto close_fail; if (displaced) put_files_struct(displaced); + file_start_write(cprm.file); retval = binfmt->core_dump(&cprm); if (retval) current->signal->group_exit_code |= 0x80; + file_end_write(cprm.file); if (ispipe && core_pipe_limit) wait_for_dump_helpers(cprm.file); diff --git a/fs/read_write.c b/fs/read_write.c index e6dd1c2d0592..a1f4d44cbc03 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -398,7 +398,6 @@ ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, lof struct kiocb kiocb; ssize_t ret; - file_start_write(filp); init_sync_kiocb(&kiocb, filp); kiocb.ki_pos = *ppos; kiocb.ki_left = len; @@ -414,7 +413,6 @@ ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, lof if (-EIOCBQUEUED == ret) ret = wait_on_sync_kiocb(&kiocb); *ppos = kiocb.ki_pos; - file_end_write(filp); return ret; } @@ -458,6 +456,7 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_ ret = rw_verify_area(WRITE, file, pos, count); if (ret >= 0) { count = ret; + file_start_write(file); if (file->f_op->write) ret = file->f_op->write(file, buf, count, pos); else @@ -467,6 +466,7 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_ add_wchar(current, ret); } inc_syscw(current); + file_end_write(file); } return ret; @@ -758,16 +758,18 @@ static ssize_t do_readv_writev(int type, struct file *file, } else { fn = (io_fn_t)file->f_op->write; fnv = file->f_op->aio_write; + file_start_write(file); } - if (fnv) { - file_start_write(file); + if (fnv) ret = do_sync_readv_writev(file, iov, nr_segs, tot_len, pos, fnv); - file_end_write(file); - } else + else ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn); + if (type != READ) + file_end_write(file); + out: if (iov != iovstack) kfree(iov); @@ -936,16 +938,18 @@ static ssize_t compat_do_readv_writev(int type, struct file *file, } else { fn = (io_fn_t)file->f_op->write; fnv = file->f_op->aio_write; + file_start_write(file); } - if (fnv) { - file_start_write(file); + if (fnv) ret = do_sync_readv_writev(file, iov, nr_segs, tot_len, pos, fnv); - file_end_write(file); - } else + else ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn); + if (type != READ) + file_end_write(file); + out: if (iov != iovstack) kfree(iov); diff --git a/fs/splice.c b/fs/splice.c index 29e394e49ddd..e78a749064db 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -1052,7 +1052,9 @@ static int write_pipe_buf(struct pipe_inode_info *pipe, struct pipe_buffer *buf, loff_t tmp = sd->pos; data = buf->ops->map(pipe, buf, 0); + file_start_write(sd->u.file); ret = __kernel_write(sd->u.file, data + buf->offset, sd->len, &tmp); + file_end_write(sd->u.file); buf->ops->unmap(pipe, buf, data); return ret; diff --git a/kernel/acct.c b/kernel/acct.c index b9bd7f098ee5..85389fe2abd0 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -543,6 +543,7 @@ static void do_acct_process(struct bsd_acct_struct *acct, * Kernel segment override to datasegment and write it * to the accounting file. */ + file_start_write(file); fs = get_fs(); set_fs(KERNEL_DS); /* @@ -554,6 +555,7 @@ static void do_acct_process(struct bsd_acct_struct *acct, sizeof(acct_t), &file->f_pos); current->signal->rlim[RLIMIT_FSIZE].rlim_cur = flim; set_fs(fs); + file_end_write(file); out: revert_creds(orig_cred); } diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c index a912da6ddfd4..28fe26b64f8a 100644 --- a/mm/filemap_xip.c +++ b/mm/filemap_xip.c @@ -404,8 +404,6 @@ xip_file_write(struct file *filp, const char __user *buf, size_t len, loff_t pos; ssize_t ret; - sb_start_write(inode->i_sb); - mutex_lock(&inode->i_mutex); if (!access_ok(VERIFY_READ, buf, len)) { @@ -439,7 +437,6 @@ xip_file_write(struct file *filp, const char __user *buf, size_t len, current->backing_dev_info = NULL; out_up: mutex_unlock(&inode->i_mutex); - sb_end_write(inode->i_sb); return ret; } EXPORT_SYMBOL_GPL(xip_file_write); -- cgit v1.2.3