summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/aio.c62
-rw-r--r--fs/bio.c12
-rw-r--r--fs/block_dev.c12
-rw-r--r--fs/coda/sysctl.c10
-rw-r--r--fs/compat_ioctl.c745
-rw-r--r--fs/direct-io.c10
-rw-r--r--fs/eventpoll.c4
-rw-r--r--fs/fs-writeback.c28
-rw-r--r--fs/lockd/svc.c26
-rw-r--r--fs/nfs/sysctl.c22
-rw-r--r--fs/nfs/write.c2
-rw-r--r--fs/notify/inotify/inotify_user.c14
-rw-r--r--fs/ntfs/sysctl.c4
-rw-r--r--fs/ocfs2/cluster/netdebug.c8
-rw-r--r--fs/ocfs2/stackglue.c15
-rw-r--r--fs/partitions/check.c12
-rw-r--r--fs/partitions/efi.c30
-rw-r--r--fs/partitions/efi.h8
-rw-r--r--fs/proc/proc_sysctl.c4
-rw-r--r--fs/quota/dquot.c35
-rw-r--r--fs/read_write.c2
-rw-r--r--fs/reiserfs/Makefile2
-rw-r--r--fs/reiserfs/bitmap.c4
-rw-r--r--fs/reiserfs/dir.c10
-rw-r--r--fs/reiserfs/do_balan.c17
-rw-r--r--fs/reiserfs/file.c2
-rw-r--r--fs/reiserfs/fix_node.c19
-rw-r--r--fs/reiserfs/inode.c97
-rw-r--r--fs/reiserfs/ioctl.c77
-rw-r--r--fs/reiserfs/journal.c130
-rw-r--r--fs/reiserfs/lock.c88
-rw-r--r--fs/reiserfs/namei.c20
-rw-r--r--fs/reiserfs/prints.c4
-rw-r--r--fs/reiserfs/resize.c2
-rw-r--r--fs/reiserfs/stree.c53
-rw-r--r--fs/reiserfs/super.c52
-rw-r--r--fs/reiserfs/xattr.c6
-rw-r--r--fs/splice.c24
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c9
-rw-r--r--fs/xfs/linux-2.6/xfs_sysctl.c62
40 files changed, 627 insertions, 1116 deletions
diff --git a/fs/aio.c b/fs/aio.c
index 02a2c9340573..c30dfc006108 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -15,6 +15,7 @@
#include <linux/aio_abi.h>
#include <linux/module.h>
#include <linux/syscalls.h>
+#include <linux/backing-dev.h>
#include <linux/uio.h>
#define DEBUG 0
@@ -32,6 +33,9 @@
#include <linux/workqueue.h>
#include <linux/security.h>
#include <linux/eventfd.h>
+#include <linux/blkdev.h>
+#include <linux/mempool.h>
+#include <linux/hash.h>
#include <asm/kmap_types.h>
#include <asm/uaccess.h>
@@ -60,6 +64,14 @@ static DECLARE_WORK(fput_work, aio_fput_routine);
static DEFINE_SPINLOCK(fput_lock);
static LIST_HEAD(fput_head);
+#define AIO_BATCH_HASH_BITS 3 /* allocated on-stack, so don't go crazy */
+#define AIO_BATCH_HASH_SIZE (1 << AIO_BATCH_HASH_BITS)
+struct aio_batch_entry {
+ struct hlist_node list;
+ struct address_space *mapping;
+};
+mempool_t *abe_pool;
+
static void aio_kick_handler(struct work_struct *);
static void aio_queue_work(struct kioctx *);
@@ -73,6 +85,8 @@ static int __init aio_setup(void)
kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC);
aio_wq = create_workqueue("aio");
+ abe_pool = mempool_create_kmalloc_pool(1, sizeof(struct aio_batch_entry));
+ BUG_ON(!abe_pool);
pr_debug("aio_setup: sizeof(struct page) = %d\n", (int)sizeof(struct page));
@@ -1531,8 +1545,44 @@ static int aio_wake_function(wait_queue_t *wait, unsigned mode,
return 1;
}
+static void aio_batch_add(struct address_space *mapping,
+ struct hlist_head *batch_hash)
+{
+ struct aio_batch_entry *abe;
+ struct hlist_node *pos;
+ unsigned bucket;
+
+ bucket = hash_ptr(mapping, AIO_BATCH_HASH_BITS);
+ hlist_for_each_entry(abe, pos, &batch_hash[bucket], list) {
+ if (abe->mapping == mapping)
+ return;
+ }
+
+ abe = mempool_alloc(abe_pool, GFP_KERNEL);
+ BUG_ON(!igrab(mapping->host));
+ abe->mapping = mapping;
+ hlist_add_head(&abe->list, &batch_hash[bucket]);
+ return;
+}
+
+static void aio_batch_free(struct hlist_head *batch_hash)
+{
+ struct aio_batch_entry *abe;
+ struct hlist_node *pos, *n;
+ int i;
+
+ for (i = 0; i < AIO_BATCH_HASH_SIZE; i++) {
+ hlist_for_each_entry_safe(abe, pos, n, &batch_hash[i], list) {
+ blk_run_address_space(abe->mapping);
+ iput(abe->mapping->host);
+ hlist_del(&abe->list);
+ mempool_free(abe, abe_pool);
+ }
+ }
+}
+
static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
- struct iocb *iocb)
+ struct iocb *iocb, struct hlist_head *batch_hash)
{
struct kiocb *req;
struct file *file;
@@ -1608,6 +1658,12 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
;
}
spin_unlock_irq(&ctx->ctx_lock);
+ if (req->ki_opcode == IOCB_CMD_PREAD ||
+ req->ki_opcode == IOCB_CMD_PREADV ||
+ req->ki_opcode == IOCB_CMD_PWRITE ||
+ req->ki_opcode == IOCB_CMD_PWRITEV)
+ aio_batch_add(file->f_mapping, batch_hash);
+
aio_put_req(req); /* drop extra ref to req */
return 0;
@@ -1635,6 +1691,7 @@ SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr,
struct kioctx *ctx;
long ret = 0;
int i;
+ struct hlist_head batch_hash[AIO_BATCH_HASH_SIZE] = { { 0, }, };
if (unlikely(nr < 0))
return -EINVAL;
@@ -1666,10 +1723,11 @@ SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr,
break;
}
- ret = io_submit_one(ctx, user_iocb, &tmp);
+ ret = io_submit_one(ctx, user_iocb, &tmp, batch_hash);
if (ret)
break;
}
+ aio_batch_free(batch_hash);
put_ioctx(ctx);
return i ? i : ret;
diff --git a/fs/bio.c b/fs/bio.c
index 12da5db8682c..e23a63f4f7de 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -1393,6 +1393,18 @@ void bio_check_pages_dirty(struct bio *bio)
}
}
+#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
+void bio_flush_dcache_pages(struct bio *bi)
+{
+ int i;
+ struct bio_vec *bvec;
+
+ bio_for_each_segment(bvec, bi, i)
+ flush_dcache_page(bvec->bv_page);
+}
+EXPORT_SYMBOL(bio_flush_dcache_pages);
+#endif
+
/**
* bio_endio - end I/O on a bio
* @bio: bio
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 8bed0557d88c..73d6a735b8f3 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -405,7 +405,17 @@ static loff_t block_llseek(struct file *file, loff_t offset, int origin)
static int block_fsync(struct file *filp, struct dentry *dentry, int datasync)
{
- return sync_blockdev(I_BDEV(filp->f_mapping->host));
+ struct block_device *bdev = I_BDEV(filp->f_mapping->host);
+ int error;
+
+ error = sync_blockdev(bdev);
+ if (error)
+ return error;
+
+ error = blkdev_issue_flush(bdev, NULL);
+ if (error == -EOPNOTSUPP)
+ error = 0;
+ return error;
}
/*
diff --git a/fs/coda/sysctl.c b/fs/coda/sysctl.c
index 43c96ce29614..c6405ce3c50e 100644
--- a/fs/coda/sysctl.c
+++ b/fs/coda/sysctl.c
@@ -17,28 +17,25 @@ static struct ctl_table_header *fs_table_header;
static ctl_table coda_table[] = {
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "timeout",
.data = &coda_timeout,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec
+ .proc_handler = proc_dointvec
},
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "hard",
.data = &coda_hard,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec
+ .proc_handler = proc_dointvec
},
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "fake_statfs",
.data = &coda_fake_statfs,
.maxlen = sizeof(int),
.mode = 0600,
- .proc_handler = &proc_dointvec
+ .proc_handler = proc_dointvec
},
{}
};
@@ -46,7 +43,6 @@ static ctl_table coda_table[] = {
#ifdef CONFIG_SYSCTL
static ctl_table fs_table[] = {
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "coda",
.mode = 0555,
.child = coda_table
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index d84e7058c298..229e72218165 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -246,428 +246,6 @@ static int do_video_set_spu_palette(unsigned int fd, unsigned int cmd, unsigned
return err;
}
-#ifdef CONFIG_NET
-static int do_siocgstamp(unsigned int fd, unsigned int cmd, unsigned long arg)
-{
- struct compat_timeval __user *up = compat_ptr(arg);
- struct timeval ktv;
- mm_segment_t old_fs = get_fs();
- int err;
-
- set_fs(KERNEL_DS);
- err = sys_ioctl(fd, cmd, (unsigned long)&ktv);
- set_fs(old_fs);
- if(!err) {
- err = put_user(ktv.tv_sec, &up->tv_sec);
- err |= __put_user(ktv.tv_usec, &up->tv_usec);
- }
- return err;
-}
-
-static int do_siocgstampns(unsigned int fd, unsigned int cmd, unsigned long arg)
-{
- struct compat_timespec __user *up = compat_ptr(arg);
- struct timespec kts;
- mm_segment_t old_fs = get_fs();
- int err;
-
- set_fs(KERNEL_DS);
- err = sys_ioctl(fd, cmd, (unsigned long)&kts);
- set_fs(old_fs);
- if (!err) {
- err = put_user(kts.tv_sec, &up->tv_sec);
- err |= __put_user(kts.tv_nsec, &up->tv_nsec);
- }
- return err;
-}
-
-struct ifmap32 {
- compat_ulong_t mem_start;
- compat_ulong_t mem_end;
- unsigned short base_addr;
- unsigned char irq;
- unsigned char dma;
- unsigned char port;
-};
-
-struct ifreq32 {
-#define IFHWADDRLEN 6
-#define IFNAMSIZ 16
- union {
- char ifrn_name[IFNAMSIZ]; /* if name, e.g. "en0" */
- } ifr_ifrn;
- union {
- struct sockaddr ifru_addr;
- struct sockaddr ifru_dstaddr;
- struct sockaddr ifru_broadaddr;
- struct sockaddr ifru_netmask;
- struct sockaddr ifru_hwaddr;
- short ifru_flags;
- compat_int_t ifru_ivalue;
- compat_int_t ifru_mtu;
- struct ifmap32 ifru_map;
- char ifru_slave[IFNAMSIZ]; /* Just fits the size */
- char ifru_newname[IFNAMSIZ];
- compat_caddr_t ifru_data;
- /* XXXX? ifru_settings should be here */
- } ifr_ifru;
-};
-
-struct ifconf32 {
- compat_int_t ifc_len; /* size of buffer */
- compat_caddr_t ifcbuf;
-};
-
-static int dev_ifname32(unsigned int fd, unsigned int cmd, unsigned long arg)
-{
- struct ifreq __user *uifr;
- int err;
-
- uifr = compat_alloc_user_space(sizeof(struct ifreq));
- if (copy_in_user(uifr, compat_ptr(arg), sizeof(struct ifreq32)))
- return -EFAULT;
-
- err = sys_ioctl(fd, SIOCGIFNAME, (unsigned long)uifr);
- if (err)
- return err;
-
- if (copy_in_user(compat_ptr(arg), uifr, sizeof(struct ifreq32)))
- return -EFAULT;
-
- return 0;
-}
-
-static int dev_ifconf(unsigned int fd, unsigned int cmd, unsigned long arg)
-{
- struct ifconf32 ifc32;
- struct ifconf ifc;
- struct ifconf __user *uifc;
- struct ifreq32 __user *ifr32;
- struct ifreq __user *ifr;
- unsigned int i, j;
- int err;
-
- if (copy_from_user(&ifc32, compat_ptr(arg), sizeof(struct ifconf32)))
- return -EFAULT;
-
- if (ifc32.ifcbuf == 0) {
- ifc32.ifc_len = 0;
- ifc.ifc_len = 0;
- ifc.ifc_req = NULL;
- uifc = compat_alloc_user_space(sizeof(struct ifconf));
- } else {
- size_t len =((ifc32.ifc_len / sizeof (struct ifreq32)) + 1) *
- sizeof (struct ifreq);
- uifc = compat_alloc_user_space(sizeof(struct ifconf) + len);
- ifc.ifc_len = len;
- ifr = ifc.ifc_req = (void __user *)(uifc + 1);
- ifr32 = compat_ptr(ifc32.ifcbuf);
- for (i = 0; i < ifc32.ifc_len; i += sizeof (struct ifreq32)) {
- if (copy_in_user(ifr, ifr32, sizeof(struct ifreq32)))
- return -EFAULT;
- ifr++;
- ifr32++;
- }
- }
- if (copy_to_user(uifc, &ifc, sizeof(struct ifconf)))
- return -EFAULT;
-
- err = sys_ioctl (fd, SIOCGIFCONF, (unsigned long)uifc);
- if (err)
- return err;
-
- if (copy_from_user(&ifc, uifc, sizeof(struct ifconf)))
- return -EFAULT;
-
- ifr = ifc.ifc_req;
- ifr32 = compat_ptr(ifc32.ifcbuf);
- for (i = 0, j = 0;
- i + sizeof (struct ifreq32) <= ifc32.ifc_len && j < ifc.ifc_len;
- i += sizeof (struct ifreq32), j += sizeof (struct ifreq)) {
- if (copy_in_user(ifr32, ifr, sizeof (struct ifreq32)))
- return -EFAULT;
- ifr32++;
- ifr++;
- }
-
- if (ifc32.ifcbuf == 0) {
- /* Translate from 64-bit structure multiple to
- * a 32-bit one.
- */
- i = ifc.ifc_len;
- i = ((i / sizeof(struct ifreq)) * sizeof(struct ifreq32));
- ifc32.ifc_len = i;
- } else {
- ifc32.ifc_len = i;
- }
- if (copy_to_user(compat_ptr(arg), &ifc32, sizeof(struct ifconf32)))
- return -EFAULT;
-
- return 0;
-}
-
-static int ethtool_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
-{
- struct ifreq __user *ifr;
- struct ifreq32 __user *ifr32;
- u32 data;
- void __user *datap;
-
- ifr = compat_alloc_user_space(sizeof(*ifr));
- ifr32 = compat_ptr(arg);
-
- if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
- return -EFAULT;
-
- if (get_user(data, &ifr32->ifr_ifru.ifru_data))
- return -EFAULT;
-
- datap = compat_ptr(data);
- if (put_user(datap, &ifr->ifr_ifru.ifru_data))
- return -EFAULT;
-
- return sys_ioctl(fd, cmd, (unsigned long) ifr);
-}
-
-static int bond_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
-{
- struct ifreq kifr;
- struct ifreq __user *uifr;
- struct ifreq32 __user *ifr32 = compat_ptr(arg);
- mm_segment_t old_fs;
- int err;
- u32 data;
- void __user *datap;
-
- switch (cmd) {
- case SIOCBONDENSLAVE:
- case SIOCBONDRELEASE:
- case SIOCBONDSETHWADDR:
- case SIOCBONDCHANGEACTIVE:
- if (copy_from_user(&kifr, ifr32, sizeof(struct ifreq32)))
- return -EFAULT;
-
- old_fs = get_fs();
- set_fs (KERNEL_DS);
- err = sys_ioctl (fd, cmd, (unsigned long)&kifr);
- set_fs (old_fs);
-
- return err;
- case SIOCBONDSLAVEINFOQUERY:
- case SIOCBONDINFOQUERY:
- uifr = compat_alloc_user_space(sizeof(*uifr));
- if (copy_in_user(&uifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
- return -EFAULT;
-
- if (get_user(data, &ifr32->ifr_ifru.ifru_data))
- return -EFAULT;
-
- datap = compat_ptr(data);
- if (put_user(datap, &uifr->ifr_ifru.ifru_data))
- return -EFAULT;
-
- return sys_ioctl (fd, cmd, (unsigned long)uifr);
- default:
- return -EINVAL;
- };
-}
-
-static int siocdevprivate_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
-{
- struct ifreq __user *u_ifreq64;
- struct ifreq32 __user *u_ifreq32 = compat_ptr(arg);
- char tmp_buf[IFNAMSIZ];
- void __user *data64;
- u32 data32;
-
- if (copy_from_user(&tmp_buf[0], &(u_ifreq32->ifr_ifrn.ifrn_name[0]),
- IFNAMSIZ))
- return -EFAULT;
- if (__get_user(data32, &u_ifreq32->ifr_ifru.ifru_data))
- return -EFAULT;
- data64 = compat_ptr(data32);
-
- u_ifreq64 = compat_alloc_user_space(sizeof(*u_ifreq64));
-
- /* Don't check these user accesses, just let that get trapped
- * in the ioctl handler instead.
- */
- if (copy_to_user(&u_ifreq64->ifr_ifrn.ifrn_name[0], &tmp_buf[0],
- IFNAMSIZ))
- return -EFAULT;
- if (__put_user(data64, &u_ifreq64->ifr_ifru.ifru_data))
- return -EFAULT;
-
- return sys_ioctl(fd, cmd, (unsigned long) u_ifreq64);
-}
-
-static int dev_ifsioc(unsigned int fd, unsigned int cmd, unsigned long arg)
-{
- struct ifreq ifr;
- struct ifreq32 __user *uifr32;
- struct ifmap32 __user *uifmap32;
- mm_segment_t old_fs;
- int err;
-
- uifr32 = compat_ptr(arg);
- uifmap32 = &uifr32->ifr_ifru.ifru_map;
- switch (cmd) {
- case SIOCSIFMAP:
- err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
- err |= __get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
- err |= __get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
- err |= __get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
- err |= __get_user(ifr.ifr_map.irq, &uifmap32->irq);
- err |= __get_user(ifr.ifr_map.dma, &uifmap32->dma);
- err |= __get_user(ifr.ifr_map.port, &uifmap32->port);
- if (err)
- return -EFAULT;
- break;
- case SIOCSHWTSTAMP:
- if (copy_from_user(&ifr, uifr32, sizeof(*uifr32)))
- return -EFAULT;
- ifr.ifr_data = compat_ptr(uifr32->ifr_ifru.ifru_data);
- break;
- default:
- if (copy_from_user(&ifr, uifr32, sizeof(*uifr32)))
- return -EFAULT;
- break;
- }
- old_fs = get_fs();
- set_fs (KERNEL_DS);
- err = sys_ioctl (fd, cmd, (unsigned long)&ifr);
- set_fs (old_fs);
- if (!err) {
- switch (cmd) {
- /* TUNSETIFF is defined as _IOW, it should be _IORW
- * as the data is copied back to user space, but that
- * cannot be fixed without breaking all existing apps.
- */
- case TUNSETIFF:
- case TUNGETIFF:
- case SIOCGIFFLAGS:
- case SIOCGIFMETRIC:
- case SIOCGIFMTU:
- case SIOCGIFMEM:
- case SIOCGIFHWADDR:
- case SIOCGIFINDEX:
- case SIOCGIFADDR:
- case SIOCGIFBRDADDR:
- case SIOCGIFDSTADDR:
- case SIOCGIFNETMASK:
- case SIOCGIFTXQLEN:
- if (copy_to_user(uifr32, &ifr, sizeof(*uifr32)))
- return -EFAULT;
- break;
- case SIOCGIFMAP:
- err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
- err |= __put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
- err |= __put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
- err |= __put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
- err |= __put_user(ifr.ifr_map.irq, &uifmap32->irq);
- err |= __put_user(ifr.ifr_map.dma, &uifmap32->dma);
- err |= __put_user(ifr.ifr_map.port, &uifmap32->port);
- if (err)
- err = -EFAULT;
- break;
- }
- }
- return err;
-}
-
-struct rtentry32 {
- u32 rt_pad1;
- struct sockaddr rt_dst; /* target address */
- struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */
- struct sockaddr rt_genmask; /* target network mask (IP) */
- unsigned short rt_flags;
- short rt_pad2;
- u32 rt_pad3;
- unsigned char rt_tos;
- unsigned char rt_class;
- short rt_pad4;
- short rt_metric; /* +1 for binary compatibility! */
- /* char * */ u32 rt_dev; /* forcing the device at add */
- u32 rt_mtu; /* per route MTU/Window */
- u32 rt_window; /* Window clamping */
- unsigned short rt_irtt; /* Initial RTT */
-
-};
-
-struct in6_rtmsg32 {
- struct in6_addr rtmsg_dst;
- struct in6_addr rtmsg_src;
- struct in6_addr rtmsg_gateway;
- u32 rtmsg_type;
- u16 rtmsg_dst_len;
- u16 rtmsg_src_len;
- u32 rtmsg_metric;
- u32 rtmsg_info;
- u32 rtmsg_flags;
- s32 rtmsg_ifindex;
-};
-
-static int routing_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
-{
- int ret;
- void *r = NULL;
- struct in6_rtmsg r6;
- struct rtentry r4;
- char devname[16];
- u32 rtdev;
- mm_segment_t old_fs = get_fs();
-
- struct socket *mysock = sockfd_lookup(fd, &ret);
-
- if (mysock && mysock->sk && mysock->sk->sk_family == AF_INET6) { /* ipv6 */
- struct in6_rtmsg32 __user *ur6 = compat_ptr(arg);
- ret = copy_from_user (&r6.rtmsg_dst, &(ur6->rtmsg_dst),
- 3 * sizeof(struct in6_addr));
- ret |= __get_user (r6.rtmsg_type, &(ur6->rtmsg_type));
- ret |= __get_user (r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
- ret |= __get_user (r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
- ret |= __get_user (r6.rtmsg_metric, &(ur6->rtmsg_metric));
- ret |= __get_user (r6.rtmsg_info, &(ur6->rtmsg_info));
- ret |= __get_user (r6.rtmsg_flags, &(ur6->rtmsg_flags));
- ret |= __get_user (r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
-
- r = (void *) &r6;
- } else { /* ipv4 */
- struct rtentry32 __user *ur4 = compat_ptr(arg);
- ret = copy_from_user (&r4.rt_dst, &(ur4->rt_dst),
- 3 * sizeof(struct sockaddr));
- ret |= __get_user (r4.rt_flags, &(ur4->rt_flags));
- ret |= __get_user (r4.rt_metric, &(ur4->rt_metric));
- ret |= __get_user (r4.rt_mtu, &(ur4->rt_mtu));
- ret |= __get_user (r4.rt_window, &(ur4->rt_window));
- ret |= __get_user (r4.rt_irtt, &(ur4->rt_irtt));
- ret |= __get_user (rtdev, &(ur4->rt_dev));
- if (rtdev) {
- ret |= copy_from_user (devname, compat_ptr(rtdev), 15);
- r4.rt_dev = devname; devname[15] = 0;
- } else
- r4.rt_dev = NULL;
-
- r = (void *) &r4;
- }
-
- if (ret) {
- ret = -EFAULT;
- goto out;
- }
-
- set_fs (KERNEL_DS);
- ret = sys_ioctl (fd, cmd, (unsigned long) r);
- set_fs (old_fs);
-
-out:
- if (mysock)
- sockfd_put(mysock);
-
- return ret;
-}
-#endif
-
#ifdef CONFIG_BLOCK
typedef struct sg_io_hdr32 {
compat_int_t interface_id; /* [i] 'S' for SCSI generic (required) */
@@ -1212,170 +790,6 @@ static int do_smb_getmountuid(unsigned int fd, unsigned int cmd, unsigned long a
return err;
}
-struct atmif_sioc32 {
- compat_int_t number;
- compat_int_t length;
- compat_caddr_t arg;
-};
-
-struct atm_iobuf32 {
- compat_int_t length;
- compat_caddr_t buffer;
-};
-
-#define ATM_GETLINKRATE32 _IOW('a', ATMIOC_ITF+1, struct atmif_sioc32)
-#define ATM_GETNAMES32 _IOW('a', ATMIOC_ITF+3, struct atm_iobuf32)
-#define ATM_GETTYPE32 _IOW('a', ATMIOC_ITF+4, struct atmif_sioc32)
-#define ATM_GETESI32 _IOW('a', ATMIOC_ITF+5, struct atmif_sioc32)
-#define ATM_GETADDR32 _IOW('a', ATMIOC_ITF+6, struct atmif_sioc32)
-#define ATM_RSTADDR32 _IOW('a', ATMIOC_ITF+7, struct atmif_sioc32)
-#define ATM_ADDADDR32 _IOW('a', ATMIOC_ITF+8, struct atmif_sioc32)
-#define ATM_DELADDR32 _IOW('a', ATMIOC_ITF+9, struct atmif_sioc32)
-#define ATM_GETCIRANGE32 _IOW('a', ATMIOC_ITF+10, struct atmif_sioc32)
-#define ATM_SETCIRANGE32 _IOW('a', ATMIOC_ITF+11, struct atmif_sioc32)
-#define ATM_SETESI32 _IOW('a', ATMIOC_ITF+12, struct atmif_sioc32)
-#define ATM_SETESIF32 _IOW('a', ATMIOC_ITF+13, struct atmif_sioc32)
-#define ATM_GETSTAT32 _IOW('a', ATMIOC_SARCOM+0, struct atmif_sioc32)
-#define ATM_GETSTATZ32 _IOW('a', ATMIOC_SARCOM+1, struct atmif_sioc32)
-#define ATM_GETLOOP32 _IOW('a', ATMIOC_SARCOM+2, struct atmif_sioc32)
-#define ATM_SETLOOP32 _IOW('a', ATMIOC_SARCOM+3, struct atmif_sioc32)
-#define ATM_QUERYLOOP32 _IOW('a', ATMIOC_SARCOM+4, struct atmif_sioc32)
-
-static struct {
- unsigned int cmd32;
- unsigned int cmd;
-} atm_ioctl_map[] = {
- { ATM_GETLINKRATE32, ATM_GETLINKRATE },
- { ATM_GETNAMES32, ATM_GETNAMES },
- { ATM_GETTYPE32, ATM_GETTYPE },
- { ATM_GETESI32, ATM_GETESI },
- { ATM_GETADDR32, ATM_GETADDR },
- { ATM_RSTADDR32, ATM_RSTADDR },
- { ATM_ADDADDR32, ATM_ADDADDR },
- { ATM_DELADDR32, ATM_DELADDR },
- { ATM_GETCIRANGE32, ATM_GETCIRANGE },
- { ATM_SETCIRANGE32, ATM_SETCIRANGE },
- { ATM_SETESI32, ATM_SETESI },
- { ATM_SETESIF32, ATM_SETESIF },
- { ATM_GETSTAT32, ATM_GETSTAT },
- { ATM_GETSTATZ32, ATM_GETSTATZ },
- { ATM_GETLOOP32, ATM_GETLOOP },
- { ATM_SETLOOP32, ATM_SETLOOP },
- { ATM_QUERYLOOP32, ATM_QUERYLOOP }
-};
-
-#define NR_ATM_IOCTL ARRAY_SIZE(atm_ioctl_map)
-
-static int do_atm_iobuf(unsigned int fd, unsigned int cmd, unsigned long arg)
-{
- struct atm_iobuf __user *iobuf;
- struct atm_iobuf32 __user *iobuf32;
- u32 data;
- void __user *datap;
- int len, err;
-
- iobuf = compat_alloc_user_space(sizeof(*iobuf));
- iobuf32 = compat_ptr(arg);
-
- if (get_user(len, &iobuf32->length) ||
- get_user(data, &iobuf32->buffer))
- return -EFAULT;
- datap = compat_ptr(data);
- if (put_user(len, &iobuf->length) ||
- put_user(datap, &iobuf->buffer))
- return -EFAULT;
-
- err = sys_ioctl(fd, cmd, (unsigned long)iobuf);
-
- if (!err) {
- if (copy_in_user(&iobuf32->length, &iobuf->length,
- sizeof(int)))
- err = -EFAULT;
- }
-
- return err;
-}
-
-static int do_atmif_sioc(unsigned int fd, unsigned int cmd, unsigned long arg)
-{
- struct atmif_sioc __user *sioc;
- struct atmif_sioc32 __user *sioc32;
- u32 data;
- void __user *datap;
- int err;
-
- sioc = compat_alloc_user_space(sizeof(*sioc));
- sioc32 = compat_ptr(arg);
-
- if (copy_in_user(&sioc->number, &sioc32->number, 2 * sizeof(int)) ||
- get_user(data, &sioc32->arg))
- return -EFAULT;
- datap = compat_ptr(data);
- if (put_user(datap, &sioc->arg))
- return -EFAULT;
-
- err = sys_ioctl(fd, cmd, (unsigned long) sioc);
-
- if (!err) {
- if (copy_in_user(&sioc32->length, &sioc->length,
- sizeof(int)))
- err = -EFAULT;
- }
- return err;
-}
-
-static int do_atm_ioctl(unsigned int fd, unsigned int cmd32, unsigned long arg)
-{
- int i;
- unsigned int cmd = 0;
-
- switch (cmd32) {
- case SONET_GETSTAT:
- case SONET_GETSTATZ:
- case SONET_GETDIAG:
- case SONET_SETDIAG:
- case SONET_CLRDIAG:
- case SONET_SETFRAMING:
- case SONET_GETFRAMING:
- case SONET_GETFRSENSE:
- return do_atmif_sioc(fd, cmd32, arg);
- }
-
- for (i = 0; i < NR_ATM_IOCTL; i++) {
- if (cmd32 == atm_ioctl_map[i].cmd32) {
- cmd = atm_ioctl_map[i].cmd;
- break;
- }
- }
- if (i == NR_ATM_IOCTL)
- return -EINVAL;
-
- switch (cmd) {
- case ATM_GETNAMES:
- return do_atm_iobuf(fd, cmd, arg);
-
- case ATM_GETLINKRATE:
- case ATM_GETTYPE:
- case ATM_GETESI:
- case ATM_GETADDR:
- case ATM_RSTADDR:
- case ATM_ADDADDR:
- case ATM_DELADDR:
- case ATM_GETCIRANGE:
- case ATM_SETCIRANGE:
- case ATM_SETESI:
- case ATM_SETESIF:
- case ATM_GETSTAT:
- case ATM_GETSTATZ:
- case ATM_GETLOOP:
- case ATM_SETLOOP:
- case ATM_QUERYLOOP:
- return do_atmif_sioc(fd, cmd, arg);
- }
-
- return -EINVAL;
-}
-
static __used int
ret_einval(unsigned int fd, unsigned int cmd, unsigned long arg)
{
@@ -1718,21 +1132,6 @@ static int do_i2c_smbus_ioctl(unsigned int fd, unsigned int cmd, unsigned long a
return sys_ioctl(fd, cmd, (unsigned long)tdata);
}
-/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
- * for some operations; this forces use of the newer bridge-utils that
- * use compatible ioctls
- */
-static int old_bridge_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
-{
- u32 tmp;
-
- if (get_user(tmp, (u32 __user *) arg))
- return -EFAULT;
- if (tmp == BRCTL_GET_VERSION)
- return BRCTL_VERSION + 1;
- return -EINVAL;
-}
-
#define RTC_IRQP_READ32 _IOR('p', 0x0b, compat_ulong_t)
#define RTC_IRQP_SET32 _IOW('p', 0x0c, compat_ulong_t)
#define RTC_EPOCH_READ32 _IOR('p', 0x0d, compat_ulong_t)
@@ -1979,18 +1378,6 @@ COMPATIBLE_IOCTL(SCSI_IOCTL_SEND_COMMAND)
COMPATIBLE_IOCTL(SCSI_IOCTL_PROBE_HOST)
COMPATIBLE_IOCTL(SCSI_IOCTL_GET_PCI)
#endif
-/* Big T */
-COMPATIBLE_IOCTL(TUNSETNOCSUM)
-COMPATIBLE_IOCTL(TUNSETDEBUG)
-COMPATIBLE_IOCTL(TUNSETPERSIST)
-COMPATIBLE_IOCTL(TUNSETOWNER)
-COMPATIBLE_IOCTL(TUNSETLINK)
-COMPATIBLE_IOCTL(TUNSETGROUP)
-COMPATIBLE_IOCTL(TUNGETFEATURES)
-COMPATIBLE_IOCTL(TUNSETOFFLOAD)
-COMPATIBLE_IOCTL(TUNSETTXFILTER)
-COMPATIBLE_IOCTL(TUNGETSNDBUF)
-COMPATIBLE_IOCTL(TUNSETSNDBUF)
/* Big V */
COMPATIBLE_IOCTL(VT_SETMODE)
COMPATIBLE_IOCTL(VT_GETMODE)
@@ -2032,30 +1419,6 @@ COMPATIBLE_IOCTL(_IOW('p', 21, int[7])) /* RTCSET */
COMPATIBLE_IOCTL(MTIOCTOP)
/* Socket level stuff */
COMPATIBLE_IOCTL(FIOQSIZE)
-COMPATIBLE_IOCTL(FIOSETOWN)
-COMPATIBLE_IOCTL(SIOCSPGRP)
-COMPATIBLE_IOCTL(FIOGETOWN)
-COMPATIBLE_IOCTL(SIOCGPGRP)
-COMPATIBLE_IOCTL(SIOCATMARK)
-COMPATIBLE_IOCTL(SIOCSIFLINK)
-COMPATIBLE_IOCTL(SIOCSIFENCAP)
-COMPATIBLE_IOCTL(SIOCGIFENCAP)
-COMPATIBLE_IOCTL(SIOCSIFNAME)
-COMPATIBLE_IOCTL(SIOCSARP)
-COMPATIBLE_IOCTL(SIOCGARP)
-COMPATIBLE_IOCTL(SIOCDARP)
-COMPATIBLE_IOCTL(SIOCSRARP)
-COMPATIBLE_IOCTL(SIOCGRARP)
-COMPATIBLE_IOCTL(SIOCDRARP)
-COMPATIBLE_IOCTL(SIOCADDDLCI)
-COMPATIBLE_IOCTL(SIOCDELDLCI)
-COMPATIBLE_IOCTL(SIOCGMIIPHY)
-COMPATIBLE_IOCTL(SIOCGMIIREG)
-COMPATIBLE_IOCTL(SIOCSMIIREG)
-COMPATIBLE_IOCTL(SIOCGIFVLAN)
-COMPATIBLE_IOCTL(SIOCSIFVLAN)
-COMPATIBLE_IOCTL(SIOCBRADDBR)
-COMPATIBLE_IOCTL(SIOCBRDELBR)
#ifdef CONFIG_BLOCK
/* SG stuff */
COMPATIBLE_IOCTL(SG_SET_TIMEOUT)
@@ -2311,22 +1674,6 @@ COMPATIBLE_IOCTL(RAW_SETBIND)
COMPATIBLE_IOCTL(RAW_GETBIND)
/* SMB ioctls which do not need any translations */
COMPATIBLE_IOCTL(SMB_IOC_NEWCONN)
-/* Little a */
-COMPATIBLE_IOCTL(ATMSIGD_CTRL)
-COMPATIBLE_IOCTL(ATMARPD_CTRL)
-COMPATIBLE_IOCTL(ATMLEC_CTRL)
-COMPATIBLE_IOCTL(ATMLEC_MCAST)
-COMPATIBLE_IOCTL(ATMLEC_DATA)
-COMPATIBLE_IOCTL(ATM_SETSC)
-COMPATIBLE_IOCTL(SIOCSIFATMTCP)
-COMPATIBLE_IOCTL(SIOCMKCLIP)
-COMPATIBLE_IOCTL(ATMARP_MKIP)
-COMPATIBLE_IOCTL(ATMARP_SETENTRY)
-COMPATIBLE_IOCTL(ATMARP_ENCAP)
-COMPATIBLE_IOCTL(ATMTCP_CREATE)
-COMPATIBLE_IOCTL(ATMTCP_REMOVE)
-COMPATIBLE_IOCTL(ATMMPC_CTRL)
-COMPATIBLE_IOCTL(ATMMPC_DATA)
/* Watchdog */
COMPATIBLE_IOCTL(WDIOC_GETSUPPORT)
COMPATIBLE_IOCTL(WDIOC_GETSTATUS)
@@ -2532,63 +1879,6 @@ COMPATIBLE_IOCTL(JSIOCGBUTTONS)
COMPATIBLE_IOCTL(JSIOCGNAME(0))
/* now things that need handlers */
-#ifdef CONFIG_NET
-HANDLE_IOCTL(SIOCGIFNAME, dev_ifname32)
-HANDLE_IOCTL(SIOCGIFCONF, dev_ifconf)
-HANDLE_IOCTL(SIOCGIFFLAGS, dev_ifsioc)
-HANDLE_IOCTL(SIOCSIFFLAGS, dev_ifsioc)
-HANDLE_IOCTL(SIOCGIFMETRIC, dev_ifsioc)
-HANDLE_IOCTL(SIOCSIFMETRIC, dev_ifsioc)
-HANDLE_IOCTL(SIOCGIFMTU, dev_ifsioc)
-HANDLE_IOCTL(SIOCSIFMTU, dev_ifsioc)
-HANDLE_IOCTL(SIOCGIFMEM, dev_ifsioc)
-HANDLE_IOCTL(SIOCSIFMEM, dev_ifsioc)
-HANDLE_IOCTL(SIOCGIFHWADDR, dev_ifsioc)
-HANDLE_IOCTL(SIOCSIFHWADDR, dev_ifsioc)
-HANDLE_IOCTL(SIOCADDMULTI, dev_ifsioc)
-HANDLE_IOCTL(SIOCDELMULTI, dev_ifsioc)
-HANDLE_IOCTL(SIOCGIFINDEX, dev_ifsioc)
-HANDLE_IOCTL(SIOCGIFMAP, dev_ifsioc)
-HANDLE_IOCTL(SIOCSIFMAP, dev_ifsioc)
-HANDLE_IOCTL(SIOCGIFADDR, dev_ifsioc)
-HANDLE_IOCTL(SIOCSIFADDR, dev_ifsioc)
-HANDLE_IOCTL(SIOCSIFHWBROADCAST, dev_ifsioc)
-HANDLE_IOCTL(SIOCSHWTSTAMP, dev_ifsioc)
-
-/* ioctls used by appletalk ddp.c */
-HANDLE_IOCTL(SIOCATALKDIFADDR, dev_ifsioc)
-HANDLE_IOCTL(SIOCDIFADDR, dev_ifsioc)
-HANDLE_IOCTL(SIOCSARP, dev_ifsioc)
-HANDLE_IOCTL(SIOCDARP, dev_ifsioc)
-
-HANDLE_IOCTL(SIOCGIFBRDADDR, dev_ifsioc)
-HANDLE_IOCTL(SIOCSIFBRDADDR, dev_ifsioc)
-HANDLE_IOCTL(SIOCGIFDSTADDR, dev_ifsioc)
-HANDLE_IOCTL(SIOCSIFDSTADDR, dev_ifsioc)
-HANDLE_IOCTL(SIOCGIFNETMASK, dev_ifsioc)
-HANDLE_IOCTL(SIOCSIFNETMASK, dev_ifsioc)
-HANDLE_IOCTL(SIOCSIFPFLAGS, dev_ifsioc)
-HANDLE_IOCTL(SIOCGIFPFLAGS, dev_ifsioc)
-HANDLE_IOCTL(SIOCGIFTXQLEN, dev_ifsioc)
-HANDLE_IOCTL(SIOCSIFTXQLEN, dev_ifsioc)
-HANDLE_IOCTL(TUNSETIFF, dev_ifsioc)
-HANDLE_IOCTL(TUNGETIFF, dev_ifsioc)
-HANDLE_IOCTL(SIOCETHTOOL, ethtool_ioctl)
-HANDLE_IOCTL(SIOCBONDENSLAVE, bond_ioctl)
-HANDLE_IOCTL(SIOCBONDRELEASE, bond_ioctl)
-HANDLE_IOCTL(SIOCBONDSETHWADDR, bond_ioctl)
-HANDLE_IOCTL(SIOCBONDSLAVEINFOQUERY, bond_ioctl)
-HANDLE_IOCTL(SIOCBONDINFOQUERY, bond_ioctl)
-HANDLE_IOCTL(SIOCBONDCHANGEACTIVE, bond_ioctl)
-HANDLE_IOCTL(SIOCADDRT, routing_ioctl)
-HANDLE_IOCTL(SIOCDELRT, routing_ioctl)
-HANDLE_IOCTL(SIOCBRADDIF, dev_ifsioc)
-HANDLE_IOCTL(SIOCBRDELIF, dev_ifsioc)
-/* Note SIOCRTMSG is no longer, so this is safe and * the user would have seen just an -EINVAL anyways. */
-HANDLE_IOCTL(SIOCRTMSG, ret_einval)
-HANDLE_IOCTL(SIOCGSTAMP, do_siocgstamp)
-HANDLE_IOCTL(SIOCGSTAMPNS, do_siocgstampns)
-#endif
#ifdef CONFIG_BLOCK
HANDLE_IOCTL(SG_IO,sg_ioctl_trans)
HANDLE_IOCTL(SG_GET_REQUEST_TABLE, sg_grt_trans)
@@ -2613,31 +1903,6 @@ HANDLE_IOCTL(KDFONTOP, do_kdfontop_ioctl)
/* One SMB ioctl needs translations. */
#define SMB_IOC_GETMOUNTUID_32 _IOR('u', 1, compat_uid_t)
HANDLE_IOCTL(SMB_IOC_GETMOUNTUID_32, do_smb_getmountuid)
-HANDLE_IOCTL(ATM_GETLINKRATE32, do_atm_ioctl)
-HANDLE_IOCTL(ATM_GETNAMES32, do_atm_ioctl)
-HANDLE_IOCTL(ATM_GETTYPE32, do_atm_ioctl)
-HANDLE_IOCTL(ATM_GETESI32, do_atm_ioctl)
-HANDLE_IOCTL(ATM_GETADDR32, do_atm_ioctl)
-HANDLE_IOCTL(ATM_RSTADDR32, do_atm_ioctl)
-HANDLE_IOCTL(ATM_ADDADDR32, do_atm_ioctl)
-HANDLE_IOCTL(ATM_DELADDR32, do_atm_ioctl)
-HANDLE_IOCTL(ATM_GETCIRANGE32, do_atm_ioctl)
-HANDLE_IOCTL(ATM_SETCIRANGE32, do_atm_ioctl)
-HANDLE_IOCTL(ATM_SETESI32, do_atm_ioctl)
-HANDLE_IOCTL(ATM_SETESIF32, do_atm_ioctl)
-HANDLE_IOCTL(ATM_GETSTAT32, do_atm_ioctl)
-HANDLE_IOCTL(ATM_GETSTATZ32, do_atm_ioctl)
-HANDLE_IOCTL(ATM_GETLOOP32, do_atm_ioctl)
-HANDLE_IOCTL(ATM_SETLOOP32, do_atm_ioctl)
-HANDLE_IOCTL(ATM_QUERYLOOP32, do_atm_ioctl)
-HANDLE_IOCTL(SONET_GETSTAT, do_atm_ioctl)
-HANDLE_IOCTL(SONET_GETSTATZ, do_atm_ioctl)
-HANDLE_IOCTL(SONET_GETDIAG, do_atm_ioctl)
-HANDLE_IOCTL(SONET_SETDIAG, do_atm_ioctl)
-HANDLE_IOCTL(SONET_CLRDIAG, do_atm_ioctl)
-HANDLE_IOCTL(SONET_SETFRAMING, do_atm_ioctl)
-HANDLE_IOCTL(SONET_GETFRAMING, do_atm_ioctl)
-HANDLE_IOCTL(SONET_GETFRSENSE, do_atm_ioctl)
/* block stuff */
#ifdef CONFIG_BLOCK
/* loop */
@@ -2672,11 +1937,7 @@ COMPATIBLE_IOCTL(USBDEVFS_IOCTL32)
HANDLE_IOCTL(I2C_FUNCS, w_long)
HANDLE_IOCTL(I2C_RDWR, do_i2c_rdwr_ioctl)
HANDLE_IOCTL(I2C_SMBUS, do_i2c_smbus_ioctl)
-/* bridge */
-HANDLE_IOCTL(SIOCSIFBR, old_bridge_ioctl)
-HANDLE_IOCTL(SIOCGIFBR, old_bridge_ioctl)
/* Not implemented in the native kernel */
-IGNORE_IOCTL(SIOCGIFCOUNT)
HANDLE_IOCTL(RTC_IRQP_READ32, rtc_ioctl)
HANDLE_IOCTL(RTC_IRQP_SET32, rtc_ioctl)
HANDLE_IOCTL(RTC_EPOCH_READ32, rtc_ioctl)
@@ -2831,12 +2092,6 @@ asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd,
goto found_handler;
}
-#ifdef CONFIG_NET
- if (S_ISSOCK(filp->f_path.dentry->d_inode->i_mode) &&
- cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
- error = siocdevprivate_ioctl(fd, cmd, arg);
- } else
-#endif
{
static int count;
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 8b10b87dc01a..b912270942fa 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1028,9 +1028,6 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
if (dio->bio)
dio_bio_submit(dio);
- /* All IO is now issued, send it on its way */
- blk_run_address_space(inode->i_mapping);
-
/*
* It is possible that, we return short IO due to end of file.
* In that case, we need to release all the pages we got hold on.
@@ -1057,8 +1054,11 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
((rw & READ) || (dio->result == dio->size)))
ret = -EIOCBQUEUED;
- if (ret != -EIOCBQUEUED)
+ if (ret != -EIOCBQUEUED) {
+ /* All IO is now issued, send it on its way */
+ blk_run_address_space(inode->i_mapping);
dio_await_completion(dio);
+ }
/*
* Sync will always be dropping the final ref and completing the
@@ -1124,7 +1124,7 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
int acquire_i_mutex = 0;
if (rw & WRITE)
- rw = WRITE_ODIRECT;
+ rw = WRITE_ODIRECT_PLUG;
if (bdev)
bdev_blkbits = blksize_bits(bdev_logical_block_size(bdev));
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 085c5c063420..366c503f9657 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -251,10 +251,10 @@ ctl_table epoll_table[] = {
.data = &max_user_watches,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec_minmax,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = &zero,
},
- { .ctl_name = 0 }
+ { }
};
#endif /* CONFIG_SYSCTL */
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 9d5360c4c2af..49bc1b8e8f19 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -614,7 +614,6 @@ static void writeback_inodes_wb(struct bdi_writeback *wb,
struct writeback_control *wbc)
{
struct super_block *sb = wbc->sb, *pin_sb = NULL;
- const int is_blkdev_sb = sb_is_blkdev_sb(sb);
const unsigned long start = jiffies; /* livelock avoidance */
spin_lock(&inode_lock);
@@ -635,36 +634,11 @@ static void writeback_inodes_wb(struct bdi_writeback *wb,
continue;
}
- if (!bdi_cap_writeback_dirty(wb->bdi)) {
- redirty_tail(inode);
- if (is_blkdev_sb) {
- /*
- * Dirty memory-backed blockdev: the ramdisk
- * driver does this. Skip just this inode
- */
- continue;
- }
- /*
- * Dirty memory-backed inode against a filesystem other
- * than the kernel-internal bdev filesystem. Skip the
- * entire superblock.
- */
- break;
- }
-
if (inode->i_state & (I_NEW | I_WILL_FREE)) {
requeue_io(inode);
continue;
}
- if (wbc->nonblocking && bdi_write_congested(wb->bdi)) {
- wbc->encountered_congestion = 1;
- if (!is_blkdev_sb)
- break; /* Skip a congested fs */
- requeue_io(inode);
- continue; /* Skip a congested blockdev */
- }
-
/*
* Was this inode dirtied after sync_sb_inodes was called?
* This keeps sync from extra jobs and livelock.
@@ -756,6 +730,7 @@ static long wb_writeback(struct bdi_writeback *wb,
.sync_mode = args->sync_mode,
.older_than_this = NULL,
.for_kupdate = args->for_kupdate,
+ .for_background = args->for_background,
.range_cyclic = args->range_cyclic,
};
unsigned long oldest_jif;
@@ -787,7 +762,6 @@ static long wb_writeback(struct bdi_writeback *wb,
break;
wbc.more_io = 0;
- wbc.encountered_congestion = 0;
wbc.nr_to_write = MAX_WRITEBACK_PAGES;
wbc.pages_skipped = 0;
writeback_inodes_wb(wb, &wbc);
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 1a54ae14a192..e50cfa3d9654 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -371,82 +371,74 @@ EXPORT_SYMBOL_GPL(lockd_down);
static ctl_table nlm_sysctls[] = {
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "nlm_grace_period",
.data = &nlm_grace_period,
.maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = &proc_doulongvec_minmax,
+ .proc_handler = proc_doulongvec_minmax,
.extra1 = (unsigned long *) &nlm_grace_period_min,
.extra2 = (unsigned long *) &nlm_grace_period_max,
},
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "nlm_timeout",
.data = &nlm_timeout,
.maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = &proc_doulongvec_minmax,
+ .proc_handler = proc_doulongvec_minmax,
.extra1 = (unsigned long *) &nlm_timeout_min,
.extra2 = (unsigned long *) &nlm_timeout_max,
},
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "nlm_udpport",
.data = &nlm_udpport,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec_minmax,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = (int *) &nlm_port_min,
.extra2 = (int *) &nlm_port_max,
},
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "nlm_tcpport",
.data = &nlm_tcpport,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec_minmax,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = (int *) &nlm_port_min,
.extra2 = (int *) &nlm_port_max,
},
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "nsm_use_hostnames",
.data = &nsm_use_hostnames,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "nsm_local_state",
.data = &nsm_local_state,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
- { .ctl_name = 0 }
+ { }
};
static ctl_table nlm_sysctl_dir[] = {
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "nfs",
.mode = 0555,
.child = nlm_sysctls,
},
- { .ctl_name = 0 }
+ { }
};
static ctl_table nlm_sysctl_root[] = {
{
- .ctl_name = CTL_FS,
.procname = "fs",
.mode = 0555,
.child = nlm_sysctl_dir,
},
- { .ctl_name = 0 }
+ { }
};
#endif /* CONFIG_SYSCTL */
diff --git a/fs/nfs/sysctl.c b/fs/nfs/sysctl.c
index b62481dabae9..70e1fbbaaeab 100644
--- a/fs/nfs/sysctl.c
+++ b/fs/nfs/sysctl.c
@@ -22,63 +22,55 @@ static struct ctl_table_header *nfs_callback_sysctl_table;
static ctl_table nfs_cb_sysctls[] = {
#ifdef CONFIG_NFS_V4
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "nfs_callback_tcpport",
.data = &nfs_callback_set_tcpport,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec_minmax,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = (int *)&nfs_set_port_min,
.extra2 = (int *)&nfs_set_port_max,
},
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "idmap_cache_timeout",
.data = &nfs_idmap_cache_timeout,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- .strategy = &sysctl_jiffies,
+ .proc_handler = proc_dointvec_jiffies,
},
#endif
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "nfs_mountpoint_timeout",
.data = &nfs_mountpoint_expiry_timeout,
.maxlen = sizeof(nfs_mountpoint_expiry_timeout),
.mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- .strategy = &sysctl_jiffies,
+ .proc_handler = proc_dointvec_jiffies,
},
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "nfs_congestion_kb",
.data = &nfs_congestion_kb,
.maxlen = sizeof(nfs_congestion_kb),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
- { .ctl_name = 0 }
+ { }
};
static ctl_table nfs_cb_sysctl_dir[] = {
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "nfs",
.mode = 0555,
.child = nfs_cb_sysctls,
},
- { .ctl_name = 0 }
+ { }
};
static ctl_table nfs_cb_sysctl_root[] = {
{
- .ctl_name = CTL_FS,
.procname = "fs",
.mode = 0555,
.child = nfs_cb_sysctl_dir,
},
- { .ctl_name = 0 }
+ { }
};
int nfs_register_sysctl(void)
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 53eb26c16b50..c84b5cc1a943 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -178,7 +178,7 @@ static int wb_priority(struct writeback_control *wbc)
{
if (wbc->for_reclaim)
return FLUSH_HIGHPRI | FLUSH_STABLE;
- if (wbc->for_kupdate)
+ if (wbc->for_kupdate || wbc->for_background)
return FLUSH_LOWPRI;
return 0;
}
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index dcd2040d330c..1d1d1a2765dd 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -69,36 +69,30 @@ static int zero;
ctl_table inotify_table[] = {
{
- .ctl_name = INOTIFY_MAX_USER_INSTANCES,
.procname = "max_user_instances",
.data = &inotify_max_user_instances,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec_minmax,
- .strategy = &sysctl_intvec,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = &zero,
},
{
- .ctl_name = INOTIFY_MAX_USER_WATCHES,
.procname = "max_user_watches",
.data = &inotify_max_user_watches,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec_minmax,
- .strategy = &sysctl_intvec,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = &zero,
},
{
- .ctl_name = INOTIFY_MAX_QUEUED_EVENTS,
.procname = "max_queued_events",
.data = &inotify_max_queued_events,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec_minmax,
- .strategy = &sysctl_intvec,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = &zero
},
- { .ctl_name = 0 }
+ { }
};
#endif /* CONFIG_SYSCTL */
diff --git a/fs/ntfs/sysctl.c b/fs/ntfs/sysctl.c
index 9ef85e628fe1..79a89184cb5e 100644
--- a/fs/ntfs/sysctl.c
+++ b/fs/ntfs/sysctl.c
@@ -36,12 +36,11 @@
/* Definition of the ntfs sysctl. */
static ctl_table ntfs_sysctls[] = {
{
- .ctl_name = CTL_UNNUMBERED, /* Binary and text IDs. */
.procname = "ntfs-debug",
.data = &debug_msgs, /* Data pointer and size. */
.maxlen = sizeof(debug_msgs),
.mode = 0644, /* Mode, proc handler. */
- .proc_handler = &proc_dointvec
+ .proc_handler = proc_dointvec
},
{}
};
@@ -49,7 +48,6 @@ static ctl_table ntfs_sysctls[] = {
/* Define the parent directory /proc/sys/fs. */
static ctl_table sysctls_root[] = {
{
- .ctl_name = CTL_FS,
.procname = "fs",
.mode = 0555,
.child = ntfs_sysctls
diff --git a/fs/ocfs2/cluster/netdebug.c b/fs/ocfs2/cluster/netdebug.c
index da794bc07a6c..a3f150e52b02 100644
--- a/fs/ocfs2/cluster/netdebug.c
+++ b/fs/ocfs2/cluster/netdebug.c
@@ -294,10 +294,10 @@ static int sc_seq_show(struct seq_file *seq, void *v)
if (sc->sc_sock) {
inet = inet_sk(sc->sc_sock->sk);
/* the stack's structs aren't sparse endian clean */
- saddr = (__force __be32)inet->saddr;
- daddr = (__force __be32)inet->daddr;
- sport = (__force __be16)inet->sport;
- dport = (__force __be16)inet->dport;
+ saddr = (__force __be32)inet->inet_saddr;
+ daddr = (__force __be32)inet->inet_daddr;
+ sport = (__force __be16)inet->inet_sport;
+ dport = (__force __be16)inet->inet_dport;
}
/* XXX sigh, inet-> doesn't have sparse annotation so any
diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c
index 3f2f1c45b7b6..f3df0baa9a48 100644
--- a/fs/ocfs2/stackglue.c
+++ b/fs/ocfs2/stackglue.c
@@ -620,51 +620,46 @@ error:
static ctl_table ocfs2_nm_table[] = {
{
- .ctl_name = 1,
.procname = "hb_ctl_path",
.data = ocfs2_hb_ctl_path,
.maxlen = OCFS2_MAX_HB_CTL_PATH,
.mode = 0644,
- .proc_handler = &proc_dostring,
- .strategy = &sysctl_string,
+ .proc_handler = proc_dostring,
},
- { .ctl_name = 0 }
+ { }
};
static ctl_table ocfs2_mod_table[] = {
{
- .ctl_name = FS_OCFS2_NM,
.procname = "nm",
.data = NULL,
.maxlen = 0,
.mode = 0555,
.child = ocfs2_nm_table
},
- { .ctl_name = 0}
+ { }
};
static ctl_table ocfs2_kern_table[] = {
{
- .ctl_name = FS_OCFS2,
.procname = "ocfs2",
.data = NULL,
.maxlen = 0,
.mode = 0555,
.child = ocfs2_mod_table
},
- { .ctl_name = 0}
+ { }
};
static ctl_table ocfs2_root_table[] = {
{
- .ctl_name = CTL_FS,
.procname = "fs",
.data = NULL,
.maxlen = 0,
.mode = 0555,
.child = ocfs2_kern_table
},
- { .ctl_name = 0 }
+ { }
};
static struct ctl_table_header *ocfs2_table_header = NULL;
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 7b685e10cbad..64bc8998ac9a 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -226,6 +226,13 @@ ssize_t part_alignment_offset_show(struct device *dev,
return sprintf(buf, "%llu\n", (unsigned long long)p->alignment_offset);
}
+ssize_t part_discard_alignment_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct hd_struct *p = dev_to_part(dev);
+ return sprintf(buf, "%u\n", p->discard_alignment);
+}
+
ssize_t part_stat_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -288,6 +295,8 @@ static DEVICE_ATTR(partition, S_IRUGO, part_partition_show, NULL);
static DEVICE_ATTR(start, S_IRUGO, part_start_show, NULL);
static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL);
static DEVICE_ATTR(alignment_offset, S_IRUGO, part_alignment_offset_show, NULL);
+static DEVICE_ATTR(discard_alignment, S_IRUGO, part_discard_alignment_show,
+ NULL);
static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL);
#ifdef CONFIG_FAIL_MAKE_REQUEST
@@ -300,6 +309,7 @@ static struct attribute *part_attrs[] = {
&dev_attr_start.attr,
&dev_attr_size.attr,
&dev_attr_alignment_offset.attr,
+ &dev_attr_discard_alignment.attr,
&dev_attr_stat.attr,
&dev_attr_inflight.attr,
#ifdef CONFIG_FAIL_MAKE_REQUEST
@@ -403,6 +413,8 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
p->start_sect = start;
p->alignment_offset = queue_sector_alignment_offset(disk->queue, start);
+ p->discard_alignment = queue_sector_discard_alignment(disk->queue,
+ start);
p->nr_sects = len;
p->partno = partno;
p->policy = get_disk_ro(disk);
diff --git a/fs/partitions/efi.c b/fs/partitions/efi.c
index 038a6022152f..49cfd5f54238 100644
--- a/fs/partitions/efi.c
+++ b/fs/partitions/efi.c
@@ -1,7 +1,9 @@
/************************************************************
* EFI GUID Partition Table handling
- * Per Intel EFI Specification v1.02
- * http://developer.intel.com/technology/efi/efi.htm
+ *
+ * http://www.uefi.org/specs/
+ * http://www.intel.com/technology/efi/
+ *
* efi.[ch] by Matt Domsch <Matt_Domsch@dell.com>
* Copyright 2000,2001,2002,2004 Dell Inc.
*
@@ -92,6 +94,7 @@
*
************************************************************/
#include <linux/crc32.h>
+#include <linux/math64.h>
#include "check.h"
#include "efi.h"
@@ -141,7 +144,8 @@ last_lba(struct block_device *bdev)
{
if (!bdev || !bdev->bd_inode)
return 0;
- return (bdev->bd_inode->i_size >> 9) - 1ULL;
+ return div_u64(bdev->bd_inode->i_size,
+ bdev_logical_block_size(bdev)) - 1ULL;
}
static inline int
@@ -188,6 +192,7 @@ static size_t
read_lba(struct block_device *bdev, u64 lba, u8 * buffer, size_t count)
{
size_t totalreadcount = 0;
+ sector_t n = lba * (bdev_logical_block_size(bdev) / 512);
if (!bdev || !buffer || lba > last_lba(bdev))
return 0;
@@ -195,7 +200,7 @@ read_lba(struct block_device *bdev, u64 lba, u8 * buffer, size_t count)
while (count) {
int copied = 512;
Sector sect;
- unsigned char *data = read_dev_sector(bdev, lba++, &sect);
+ unsigned char *data = read_dev_sector(bdev, n++, &sect);
if (!data)
break;
if (copied > count)
@@ -257,15 +262,16 @@ static gpt_header *
alloc_read_gpt_header(struct block_device *bdev, u64 lba)
{
gpt_header *gpt;
+ unsigned ssz = bdev_logical_block_size(bdev);
+
if (!bdev)
return NULL;
- gpt = kzalloc(sizeof (gpt_header), GFP_KERNEL);
+ gpt = kzalloc(ssz, GFP_KERNEL);
if (!gpt)
return NULL;
- if (read_lba(bdev, lba, (u8 *) gpt,
- sizeof (gpt_header)) < sizeof (gpt_header)) {
+ if (read_lba(bdev, lba, (u8 *) gpt, ssz) < ssz) {
kfree(gpt);
gpt=NULL;
return NULL;
@@ -601,6 +607,7 @@ efi_partition(struct parsed_partitions *state, struct block_device *bdev)
gpt_header *gpt = NULL;
gpt_entry *ptes = NULL;
u32 i;
+ unsigned ssz = bdev_logical_block_size(bdev) / 512;
if (!find_valid_gpt(bdev, &gpt, &ptes) || !gpt || !ptes) {
kfree(gpt);
@@ -611,13 +618,14 @@ efi_partition(struct parsed_partitions *state, struct block_device *bdev)
pr_debug("GUID Partition Table is valid! Yea!\n");
for (i = 0; i < le32_to_cpu(gpt->num_partition_entries) && i < state->limit-1; i++) {
+ u64 start = le64_to_cpu(ptes[i].starting_lba);
+ u64 size = le64_to_cpu(ptes[i].ending_lba) -
+ le64_to_cpu(ptes[i].starting_lba) + 1ULL;
+
if (!is_pte_valid(&ptes[i], last_lba(bdev)))
continue;
- put_partition(state, i+1, le64_to_cpu(ptes[i].starting_lba),
- (le64_to_cpu(ptes[i].ending_lba) -
- le64_to_cpu(ptes[i].starting_lba) +
- 1ULL));
+ put_partition(state, i+1, start * ssz, size * ssz);
/* If this is a RAID volume, tell md */
if (!efi_guidcmp(ptes[i].partition_type_guid,
diff --git a/fs/partitions/efi.h b/fs/partitions/efi.h
index 2cc89d0475bf..6998b589abf9 100644
--- a/fs/partitions/efi.h
+++ b/fs/partitions/efi.h
@@ -37,7 +37,6 @@
#define EFI_PMBR_OSTYPE_EFI 0xEF
#define EFI_PMBR_OSTYPE_EFI_GPT 0xEE
-#define GPT_BLOCK_SIZE 512
#define GPT_HEADER_SIGNATURE 0x5452415020494645ULL
#define GPT_HEADER_REVISION_V1 0x00010000
#define GPT_PRIMARY_PARTITION_TABLE_LBA 1
@@ -79,7 +78,12 @@ typedef struct _gpt_header {
__le32 num_partition_entries;
__le32 sizeof_partition_entry;
__le32 partition_entry_array_crc32;
- u8 reserved2[GPT_BLOCK_SIZE - 92];
+
+ /* The rest of the logical block is reserved by UEFI and must be zero.
+ * EFI standard handles this by:
+ *
+ * uint8_t reserved2[ BlockSize - 92 ];
+ */
} __attribute__ ((packed)) gpt_header;
typedef struct _gpt_entry_attributes {
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index f667e8aeabdf..6ff9981f0a18 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -48,7 +48,7 @@ out:
static struct ctl_table *find_in_table(struct ctl_table *p, struct qstr *name)
{
int len;
- for ( ; p->ctl_name || p->procname; p++) {
+ for ( ; p->procname; p++) {
if (!p->procname)
continue;
@@ -218,7 +218,7 @@ static int scan(struct ctl_table_header *head, ctl_table *table,
void *dirent, filldir_t filldir)
{
- for (; table->ctl_name || table->procname; table++, (*pos)++) {
+ for (; table->procname; table++, (*pos)++) {
int res;
/* Can't do anything without a proc name */
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 9b6ad908dcb2..eb5a755718f6 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -2404,100 +2404,89 @@ const struct quotactl_ops vfs_quotactl_ops = {
static ctl_table fs_dqstats_table[] = {
{
- .ctl_name = FS_DQ_LOOKUPS,
.procname = "lookups",
.data = &dqstats.lookups,
.maxlen = sizeof(int),
.mode = 0444,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
{
- .ctl_name = FS_DQ_DROPS,
.procname = "drops",
.data = &dqstats.drops,
.maxlen = sizeof(int),
.mode = 0444,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
{
- .ctl_name = FS_DQ_READS,
.procname = "reads",
.data = &dqstats.reads,
.maxlen = sizeof(int),
.mode = 0444,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
{
- .ctl_name = FS_DQ_WRITES,
.procname = "writes",
.data = &dqstats.writes,
.maxlen = sizeof(int),
.mode = 0444,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
{
- .ctl_name = FS_DQ_CACHE_HITS,
.procname = "cache_hits",
.data = &dqstats.cache_hits,
.maxlen = sizeof(int),
.mode = 0444,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
{
- .ctl_name = FS_DQ_ALLOCATED,
.procname = "allocated_dquots",
.data = &dqstats.allocated_dquots,
.maxlen = sizeof(int),
.mode = 0444,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
{
- .ctl_name = FS_DQ_FREE,
.procname = "free_dquots",
.data = &dqstats.free_dquots,
.maxlen = sizeof(int),
.mode = 0444,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
{
- .ctl_name = FS_DQ_SYNCS,
.procname = "syncs",
.data = &dqstats.syncs,
.maxlen = sizeof(int),
.mode = 0444,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
#ifdef CONFIG_PRINT_QUOTA_WARNING
{
- .ctl_name = FS_DQ_WARNINGS,
.procname = "warnings",
.data = &flag_print_warnings,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
#endif
- { .ctl_name = 0 },
+ { },
};
static ctl_table fs_table[] = {
{
- .ctl_name = FS_DQSTATS,
.procname = "quota",
.mode = 0555,
.child = fs_dqstats_table,
},
- { .ctl_name = 0 },
+ { },
};
static ctl_table sys_table[] = {
{
- .ctl_name = CTL_FS,
.procname = "fs",
.mode = 0555,
.child = fs_table,
},
- { .ctl_name = 0 },
+ { },
};
static int __init dquot_init(void)
diff --git a/fs/read_write.c b/fs/read_write.c
index 3ac28987f22a..b7f4a1f94d48 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -826,8 +826,6 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
if (!(out_file->f_mode & FMODE_WRITE))
goto fput_out;
retval = -EINVAL;
- if (!out_file->f_op || !out_file->f_op->sendpage)
- goto fput_out;
in_inode = in_file->f_path.dentry->d_inode;
out_inode = out_file->f_path.dentry->d_inode;
retval = rw_verify_area(WRITE, out_file, &out_file->f_pos, count);
diff --git a/fs/reiserfs/Makefile b/fs/reiserfs/Makefile
index 7c5ab6330dd6..6a9e30c041dd 100644
--- a/fs/reiserfs/Makefile
+++ b/fs/reiserfs/Makefile
@@ -7,7 +7,7 @@ obj-$(CONFIG_REISERFS_FS) += reiserfs.o
reiserfs-objs := bitmap.o do_balan.o namei.o inode.o file.o dir.o fix_node.o \
super.o prints.o objectid.o lbalance.o ibalance.o stree.o \
hashes.o tail_conversion.o journal.o resize.o \
- item_ops.o ioctl.o procfs.o xattr.o
+ item_ops.o ioctl.o procfs.o xattr.o lock.o
ifeq ($(CONFIG_REISERFS_FS_XATTR),y)
reiserfs-objs += xattr_user.o xattr_trusted.o
diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c
index e716161ab325..685495707181 100644
--- a/fs/reiserfs/bitmap.c
+++ b/fs/reiserfs/bitmap.c
@@ -1249,14 +1249,18 @@ struct buffer_head *reiserfs_read_bitmap_block(struct super_block *sb,
else if (bitmap == 0)
block = (REISERFS_DISK_OFFSET_IN_BYTES >> sb->s_blocksize_bits) + 1;
+ reiserfs_write_unlock(sb);
bh = sb_bread(sb, block);
+ reiserfs_write_lock(sb);
if (bh == NULL)
reiserfs_warning(sb, "sh-2029: %s: bitmap block (#%u) "
"reading failed", __func__, block);
else {
if (buffer_locked(bh)) {
PROC_INFO_INC(sb, scan_bitmap.wait);
+ reiserfs_write_unlock(sb);
__wait_on_buffer(bh);
+ reiserfs_write_lock(sb);
}
BUG_ON(!buffer_uptodate(bh));
BUG_ON(atomic_read(&bh->b_count) == 0);
diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c
index 6d2668fdc384..c094f58c7448 100644
--- a/fs/reiserfs/dir.c
+++ b/fs/reiserfs/dir.c
@@ -20,7 +20,7 @@ const struct file_operations reiserfs_dir_operations = {
.read = generic_read_dir,
.readdir = reiserfs_readdir,
.fsync = reiserfs_dir_fsync,
- .ioctl = reiserfs_ioctl,
+ .unlocked_ioctl = reiserfs_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = reiserfs_compat_ioctl,
#endif
@@ -174,14 +174,22 @@ int reiserfs_readdir_dentry(struct dentry *dentry, void *dirent,
// user space buffer is swapped out. At that time
// entry can move to somewhere else
memcpy(local_buf, d_name, d_reclen);
+
+ /*
+ * Since filldir might sleep, we can release
+ * the write lock here for other waiters
+ */
+ reiserfs_write_unlock(inode->i_sb);
if (filldir
(dirent, local_buf, d_reclen, d_off, d_ino,
DT_UNKNOWN) < 0) {
+ reiserfs_write_lock(inode->i_sb);
if (local_buf != small_buf) {
kfree(local_buf);
}
goto end;
}
+ reiserfs_write_lock(inode->i_sb);
if (local_buf != small_buf) {
kfree(local_buf);
}
diff --git a/fs/reiserfs/do_balan.c b/fs/reiserfs/do_balan.c
index 128d3f7c8aa5..60c080440661 100644
--- a/fs/reiserfs/do_balan.c
+++ b/fs/reiserfs/do_balan.c
@@ -21,14 +21,6 @@
#include <linux/buffer_head.h>
#include <linux/kernel.h>
-#ifdef CONFIG_REISERFS_CHECK
-
-struct tree_balance *cur_tb = NULL; /* detects whether more than one
- copy of tb exists as a means
- of checking whether schedule
- is interrupting do_balance */
-#endif
-
static inline void buffer_info_init_left(struct tree_balance *tb,
struct buffer_info *bi)
{
@@ -1840,11 +1832,12 @@ static int check_before_balancing(struct tree_balance *tb)
{
int retval = 0;
- if (cur_tb) {
+ if (REISERFS_SB(tb->tb_sb)->cur_tb) {
reiserfs_panic(tb->tb_sb, "vs-12335", "suspect that schedule "
"occurred based on cur_tb not being null at "
"this point in code. do_balance cannot properly "
- "handle schedule occurring while it runs.");
+ "handle concurrent tree accesses on a same "
+ "mount point.");
}
/* double check that buffers that we will modify are unlocked. (fix_nodes should already have
@@ -1986,7 +1979,7 @@ static inline void do_balance_starts(struct tree_balance *tb)
"check");*/
RFALSE(check_before_balancing(tb), "PAP-12340: locked buffers in TB");
#ifdef CONFIG_REISERFS_CHECK
- cur_tb = tb;
+ REISERFS_SB(tb->tb_sb)->cur_tb = tb;
#endif
}
@@ -1996,7 +1989,7 @@ static inline void do_balance_completed(struct tree_balance *tb)
#ifdef CONFIG_REISERFS_CHECK
check_leaf_level(tb);
check_internal_levels(tb);
- cur_tb = NULL;
+ REISERFS_SB(tb->tb_sb)->cur_tb = NULL;
#endif
/* reiserfs_free_block is no longer schedule safe. So, we need to
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 9f436668b7f8..da2dba082e2d 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -284,7 +284,7 @@ static ssize_t reiserfs_file_write(struct file *file, /* the file we are going t
const struct file_operations reiserfs_file_operations = {
.read = do_sync_read,
.write = reiserfs_file_write,
- .ioctl = reiserfs_ioctl,
+ .unlocked_ioctl = reiserfs_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = reiserfs_compat_ioctl,
#endif
diff --git a/fs/reiserfs/fix_node.c b/fs/reiserfs/fix_node.c
index 5e5a4e6fbaf8..d2f31330dcae 100644
--- a/fs/reiserfs/fix_node.c
+++ b/fs/reiserfs/fix_node.c
@@ -563,9 +563,6 @@ static int get_num_ver(int mode, struct tree_balance *tb, int h,
return needed_nodes;
}
-#ifdef CONFIG_REISERFS_CHECK
-extern struct tree_balance *cur_tb;
-#endif
/* Set parameters for balancing.
* Performs write of results of analysis of balancing into structure tb,
@@ -1022,7 +1019,11 @@ static int get_far_parent(struct tree_balance *tb,
/* Check whether the common parent is locked. */
if (buffer_locked(*pcom_father)) {
+
+ /* Release the write lock while the buffer is busy */
+ reiserfs_write_unlock(tb->tb_sb);
__wait_on_buffer(*pcom_father);
+ reiserfs_write_lock(tb->tb_sb);
if (FILESYSTEM_CHANGED_TB(tb)) {
brelse(*pcom_father);
return REPEAT_SEARCH;
@@ -1927,7 +1928,9 @@ static int get_direct_parent(struct tree_balance *tb, int h)
return REPEAT_SEARCH;
if (buffer_locked(bh)) {
+ reiserfs_write_unlock(tb->tb_sb);
__wait_on_buffer(bh);
+ reiserfs_write_lock(tb->tb_sb);
if (FILESYSTEM_CHANGED_TB(tb))
return REPEAT_SEARCH;
}
@@ -1965,7 +1968,9 @@ static int get_neighbors(struct tree_balance *tb, int h)
tb->FL[h]) ? tb->lkey[h] : B_NR_ITEMS(tb->
FL[h]);
son_number = B_N_CHILD_NUM(tb->FL[h], child_position);
+ reiserfs_write_unlock(sb);
bh = sb_bread(sb, son_number);
+ reiserfs_write_lock(sb);
if (!bh)
return IO_ERROR;
if (FILESYSTEM_CHANGED_TB(tb)) {
@@ -2003,7 +2008,9 @@ static int get_neighbors(struct tree_balance *tb, int h)
child_position =
(bh == tb->FR[h]) ? tb->rkey[h] + 1 : 0;
son_number = B_N_CHILD_NUM(tb->FR[h], child_position);
+ reiserfs_write_unlock(sb);
bh = sb_bread(sb, son_number);
+ reiserfs_write_lock(sb);
if (!bh)
return IO_ERROR;
if (FILESYSTEM_CHANGED_TB(tb)) {
@@ -2278,7 +2285,9 @@ static int wait_tb_buffers_until_unlocked(struct tree_balance *tb)
REPEAT_SEARCH : CARRY_ON;
}
#endif
+ reiserfs_write_unlock(tb->tb_sb);
__wait_on_buffer(locked);
+ reiserfs_write_lock(tb->tb_sb);
if (FILESYSTEM_CHANGED_TB(tb))
return REPEAT_SEARCH;
}
@@ -2349,12 +2358,14 @@ int fix_nodes(int op_mode, struct tree_balance *tb,
/* if it possible in indirect_to_direct conversion */
if (buffer_locked(tbS0)) {
+ reiserfs_write_unlock(tb->tb_sb);
__wait_on_buffer(tbS0);
+ reiserfs_write_lock(tb->tb_sb);
if (FILESYSTEM_CHANGED_TB(tb))
return REPEAT_SEARCH;
}
#ifdef CONFIG_REISERFS_CHECK
- if (cur_tb) {
+ if (REISERFS_SB(tb->tb_sb)->cur_tb) {
print_cur_tb("fix_nodes");
reiserfs_panic(tb->tb_sb, "PAP-8305",
"there is pending do_balance");
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index a14d6cd9eeda..3a28e7751b3c 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -251,7 +251,6 @@ static int _get_block_create_0(struct inode *inode, sector_t block,
struct cpu_key key;
struct buffer_head *bh;
struct item_head *ih, tmp_ih;
- int fs_gen;
b_blocknr_t blocknr;
char *p = NULL;
int chars;
@@ -265,7 +264,6 @@ static int _get_block_create_0(struct inode *inode, sector_t block,
(loff_t) block * inode->i_sb->s_blocksize + 1, TYPE_ANY,
3);
- research:
result = search_for_position_by_key(inode->i_sb, &key, &path);
if (result != POSITION_FOUND) {
pathrelse(&path);
@@ -340,7 +338,6 @@ static int _get_block_create_0(struct inode *inode, sector_t block,
}
// read file tail into part of page
offset = (cpu_key_k_offset(&key) - 1) & (PAGE_CACHE_SIZE - 1);
- fs_gen = get_generation(inode->i_sb);
copy_item_head(&tmp_ih, ih);
/* we only want to kmap if we are reading the tail into the page.
@@ -348,13 +345,9 @@ static int _get_block_create_0(struct inode *inode, sector_t block,
** sure we need to. But, this means the item might move if
** kmap schedules
*/
- if (!p) {
+ if (!p)
p = (char *)kmap(bh_result->b_page);
- if (fs_changed(fs_gen, inode->i_sb)
- && item_moved(&tmp_ih, &path)) {
- goto research;
- }
- }
+
p += offset;
memset(p, 0, inode->i_sb->s_blocksize);
do {
@@ -489,10 +482,14 @@ static int reiserfs_get_blocks_direct_io(struct inode *inode,
disappeared */
if (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) {
int err;
- lock_kernel();
+
+ reiserfs_write_lock(inode->i_sb);
+
err = reiserfs_commit_for_inode(inode);
REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask;
- unlock_kernel();
+
+ reiserfs_write_unlock(inode->i_sb);
+
if (err < 0)
ret = err;
}
@@ -601,6 +598,7 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
__le32 *item;
int done;
int fs_gen;
+ int lock_depth;
struct reiserfs_transaction_handle *th = NULL;
/* space reserved in transaction batch:
. 3 balancings in direct->indirect conversion
@@ -616,12 +614,11 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
loff_t new_offset =
(((loff_t) block) << inode->i_sb->s_blocksize_bits) + 1;
- /* bad.... */
- reiserfs_write_lock(inode->i_sb);
+ lock_depth = reiserfs_write_lock_once(inode->i_sb);
version = get_inode_item_key_version(inode);
if (!file_capable(inode, block)) {
- reiserfs_write_unlock(inode->i_sb);
+ reiserfs_write_unlock_once(inode->i_sb, lock_depth);
return -EFBIG;
}
@@ -633,7 +630,7 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
/* find number of block-th logical block of the file */
ret = _get_block_create_0(inode, block, bh_result,
create | GET_BLOCK_READ_DIRECT);
- reiserfs_write_unlock(inode->i_sb);
+ reiserfs_write_unlock_once(inode->i_sb, lock_depth);
return ret;
}
/*
@@ -751,7 +748,7 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
if (!dangle && th)
retval = reiserfs_end_persistent_transaction(th);
- reiserfs_write_unlock(inode->i_sb);
+ reiserfs_write_unlock_once(inode->i_sb, lock_depth);
/* the item was found, so new blocks were not added to the file
** there is no need to make sure the inode is updated with this
@@ -935,7 +932,7 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
if (blocks_needed == 1) {
un = &unf_single;
} else {
- un = kzalloc(min(blocks_needed, max_to_insert) * UNFM_P_SIZE, GFP_ATOMIC); // We need to avoid scheduling.
+ un = kzalloc(min(blocks_needed, max_to_insert) * UNFM_P_SIZE, GFP_NOFS);
if (!un) {
un = &unf_single;
blocks_needed = 1;
@@ -997,10 +994,16 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
if (retval)
goto failure;
}
- /* inserting indirect pointers for a hole can take a
- ** long time. reschedule if needed
+ /*
+ * inserting indirect pointers for a hole can take a
+ * long time. reschedule if needed and also release the write
+ * lock for others.
*/
- cond_resched();
+ if (need_resched()) {
+ reiserfs_write_unlock_once(inode->i_sb, lock_depth);
+ schedule();
+ lock_depth = reiserfs_write_lock_once(inode->i_sb);
+ }
retval = search_for_position_by_key(inode->i_sb, &key, &path);
if (retval == IO_ERROR) {
@@ -1035,7 +1038,7 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
retval = err;
}
- reiserfs_write_unlock(inode->i_sb);
+ reiserfs_write_unlock_once(inode->i_sb, lock_depth);
reiserfs_check_path(&path);
return retval;
}
@@ -2072,8 +2075,9 @@ int reiserfs_truncate_file(struct inode *inode, int update_timestamps)
int error;
struct buffer_head *bh = NULL;
int err2;
+ int lock_depth;
- reiserfs_write_lock(inode->i_sb);
+ lock_depth = reiserfs_write_lock_once(inode->i_sb);
if (inode->i_size > 0) {
error = grab_tail_page(inode, &page, &bh);
@@ -2142,14 +2146,17 @@ int reiserfs_truncate_file(struct inode *inode, int update_timestamps)
page_cache_release(page);
}
- reiserfs_write_unlock(inode->i_sb);
+ reiserfs_write_unlock_once(inode->i_sb, lock_depth);
+
return 0;
out:
if (page) {
unlock_page(page);
page_cache_release(page);
}
- reiserfs_write_unlock(inode->i_sb);
+
+ reiserfs_write_unlock_once(inode->i_sb, lock_depth);
+
return error;
}
@@ -2608,7 +2615,10 @@ int reiserfs_prepare_write(struct file *f, struct page *page,
int ret;
int old_ref = 0;
+ reiserfs_write_unlock(inode->i_sb);
reiserfs_wait_on_write_block(inode->i_sb);
+ reiserfs_write_lock(inode->i_sb);
+
fix_tail_page_for_writing(page);
if (reiserfs_transaction_running(inode->i_sb)) {
struct reiserfs_transaction_handle *th;
@@ -2664,6 +2674,8 @@ static int reiserfs_write_end(struct file *file, struct address_space *mapping,
int update_sd = 0;
struct reiserfs_transaction_handle *th;
unsigned start;
+ int lock_depth = 0;
+ bool locked = false;
if ((unsigned long)fsdata & AOP_FLAG_CONT_EXPAND)
pos ++;
@@ -2690,9 +2702,11 @@ static int reiserfs_write_end(struct file *file, struct address_space *mapping,
** to do the i_size updates here.
*/
pos += copied;
+
if (pos > inode->i_size) {
struct reiserfs_transaction_handle myth;
- reiserfs_write_lock(inode->i_sb);
+ lock_depth = reiserfs_write_lock_once(inode->i_sb);
+ locked = true;
/* If the file have grown beyond the border where it
can have a tail, unmark it as needing a tail
packing */
@@ -2703,10 +2717,9 @@ static int reiserfs_write_end(struct file *file, struct address_space *mapping,
REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask;
ret = journal_begin(&myth, inode->i_sb, 1);
- if (ret) {
- reiserfs_write_unlock(inode->i_sb);
+ if (ret)
goto journal_error;
- }
+
reiserfs_update_inode_transaction(inode);
inode->i_size = pos;
/*
@@ -2718,34 +2731,36 @@ static int reiserfs_write_end(struct file *file, struct address_space *mapping,
reiserfs_update_sd(&myth, inode);
update_sd = 1;
ret = journal_end(&myth, inode->i_sb, 1);
- reiserfs_write_unlock(inode->i_sb);
if (ret)
goto journal_error;
}
if (th) {
- reiserfs_write_lock(inode->i_sb);
+ if (!locked) {
+ lock_depth = reiserfs_write_lock_once(inode->i_sb);
+ locked = true;
+ }
if (!update_sd)
mark_inode_dirty(inode);
ret = reiserfs_end_persistent_transaction(th);
- reiserfs_write_unlock(inode->i_sb);
if (ret)
goto out;
}
out:
+ if (locked)
+ reiserfs_write_unlock_once(inode->i_sb, lock_depth);
unlock_page(page);
page_cache_release(page);
return ret == 0 ? copied : ret;
journal_error:
+ reiserfs_write_unlock_once(inode->i_sb, lock_depth);
+ locked = false;
if (th) {
- reiserfs_write_lock(inode->i_sb);
if (!update_sd)
reiserfs_update_sd(th, inode);
ret = reiserfs_end_persistent_transaction(th);
- reiserfs_write_unlock(inode->i_sb);
}
-
goto out;
}
@@ -2758,7 +2773,10 @@ int reiserfs_commit_write(struct file *f, struct page *page,
int update_sd = 0;
struct reiserfs_transaction_handle *th = NULL;
+ reiserfs_write_unlock(inode->i_sb);
reiserfs_wait_on_write_block(inode->i_sb);
+ reiserfs_write_lock(inode->i_sb);
+
if (reiserfs_transaction_running(inode->i_sb)) {
th = current->journal_info;
}
@@ -2770,7 +2788,6 @@ int reiserfs_commit_write(struct file *f, struct page *page,
*/
if (pos > inode->i_size) {
struct reiserfs_transaction_handle myth;
- reiserfs_write_lock(inode->i_sb);
/* If the file have grown beyond the border where it
can have a tail, unmark it as needing a tail
packing */
@@ -2781,10 +2798,9 @@ int reiserfs_commit_write(struct file *f, struct page *page,
REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask;
ret = journal_begin(&myth, inode->i_sb, 1);
- if (ret) {
- reiserfs_write_unlock(inode->i_sb);
+ if (ret)
goto journal_error;
- }
+
reiserfs_update_inode_transaction(inode);
inode->i_size = pos;
/*
@@ -2796,16 +2812,13 @@ int reiserfs_commit_write(struct file *f, struct page *page,
reiserfs_update_sd(&myth, inode);
update_sd = 1;
ret = journal_end(&myth, inode->i_sb, 1);
- reiserfs_write_unlock(inode->i_sb);
if (ret)
goto journal_error;
}
if (th) {
- reiserfs_write_lock(inode->i_sb);
if (!update_sd)
mark_inode_dirty(inode);
ret = reiserfs_end_persistent_transaction(th);
- reiserfs_write_unlock(inode->i_sb);
if (ret)
goto out;
}
@@ -2815,11 +2828,9 @@ int reiserfs_commit_write(struct file *f, struct page *page,
journal_error:
if (th) {
- reiserfs_write_lock(inode->i_sb);
if (!update_sd)
reiserfs_update_sd(th, inode);
ret = reiserfs_end_persistent_transaction(th);
- reiserfs_write_unlock(inode->i_sb);
}
return ret;
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index 0ccc3fdda7bf..ace77451ceb1 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -13,44 +13,52 @@
#include <linux/compat.h>
/*
-** reiserfs_ioctl - handler for ioctl for inode
-** supported commands:
-** 1) REISERFS_IOC_UNPACK - try to unpack tail from direct item into indirect
-** and prevent packing file (argument arg has to be non-zero)
-** 2) REISERFS_IOC_[GS]ETFLAGS, REISERFS_IOC_[GS]ETVERSION
-** 3) That's all for a while ...
-*/
-int reiserfs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
- unsigned long arg)
+ * reiserfs_ioctl - handler for ioctl for inode
+ * supported commands:
+ * 1) REISERFS_IOC_UNPACK - try to unpack tail from direct item into indirect
+ * and prevent packing file (argument arg has to be non-zero)
+ * 2) REISERFS_IOC_[GS]ETFLAGS, REISERFS_IOC_[GS]ETVERSION
+ * 3) That's all for a while ...
+ */
+long reiserfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
+ struct inode *inode = filp->f_path.dentry->d_inode;
unsigned int flags;
int err = 0;
+ reiserfs_write_lock(inode->i_sb);
+
switch (cmd) {
case REISERFS_IOC_UNPACK:
if (S_ISREG(inode->i_mode)) {
if (arg)
- return reiserfs_unpack(inode, filp);
- else
- return 0;
+ err = reiserfs_unpack(inode, filp);
} else
- return -ENOTTY;
- /* following two cases are taken from fs/ext2/ioctl.c by Remy
- Card (card@masi.ibp.fr) */
+ err = -ENOTTY;
+ break;
+ /*
+ * following two cases are taken from fs/ext2/ioctl.c by Remy
+ * Card (card@masi.ibp.fr)
+ */
case REISERFS_IOC_GETFLAGS:
- if (!reiserfs_attrs(inode->i_sb))
- return -ENOTTY;
+ if (!reiserfs_attrs(inode->i_sb)) {
+ err = -ENOTTY;
+ break;
+ }
flags = REISERFS_I(inode)->i_attrs;
i_attrs_to_sd_attrs(inode, (__u16 *) & flags);
- return put_user(flags, (int __user *)arg);
+ err = put_user(flags, (int __user *)arg);
+ break;
case REISERFS_IOC_SETFLAGS:{
- if (!reiserfs_attrs(inode->i_sb))
- return -ENOTTY;
+ if (!reiserfs_attrs(inode->i_sb)) {
+ err = -ENOTTY;
+ break;
+ }
err = mnt_want_write(filp->f_path.mnt);
if (err)
- return err;
+ break;
if (!is_owner_or_cap(inode)) {
err = -EPERM;
@@ -90,16 +98,18 @@ int reiserfs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
mark_inode_dirty(inode);
setflags_out:
mnt_drop_write(filp->f_path.mnt);
- return err;
+ break;
}
case REISERFS_IOC_GETVERSION:
- return put_user(inode->i_generation, (int __user *)arg);
+ err = put_user(inode->i_generation, (int __user *)arg);
+ break;
case REISERFS_IOC_SETVERSION:
if (!is_owner_or_cap(inode))
- return -EPERM;
+ err = -EPERM;
+ break;
err = mnt_want_write(filp->f_path.mnt);
if (err)
- return err;
+ break;
if (get_user(inode->i_generation, (int __user *)arg)) {
err = -EFAULT;
goto setversion_out;
@@ -108,19 +118,20 @@ setflags_out:
mark_inode_dirty(inode);
setversion_out:
mnt_drop_write(filp->f_path.mnt);
- return err;
+ break;
default:
- return -ENOTTY;
+ err = -ENOTTY;
}
+
+ reiserfs_write_unlock(inode->i_sb);
+
+ return err;
}
#ifdef CONFIG_COMPAT
long reiserfs_compat_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
{
- struct inode *inode = file->f_path.dentry->d_inode;
- int ret;
-
/* These are just misnamed, they actually get/put from/to user an int */
switch (cmd) {
case REISERFS_IOC32_UNPACK:
@@ -141,10 +152,8 @@ long reiserfs_compat_ioctl(struct file *file, unsigned int cmd,
default:
return -ENOIOCTLCMD;
}
- lock_kernel();
- ret = reiserfs_ioctl(inode, file, cmd, (unsigned long) compat_ptr(arg));
- unlock_kernel();
- return ret;
+
+ return reiserfs_ioctl(file, cmd, (unsigned long) compat_ptr(arg));
}
#endif
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 90622200b39c..2f8a7e7b8dab 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -429,21 +429,6 @@ static void clear_prepared_bits(struct buffer_head *bh)
clear_buffer_journal_restore_dirty(bh);
}
-/* utility function to force a BUG if it is called without the big
-** kernel lock held. caller is the string printed just before calling BUG()
-*/
-void reiserfs_check_lock_depth(struct super_block *sb, char *caller)
-{
-#ifdef CONFIG_SMP
- if (current->lock_depth < 0) {
- reiserfs_panic(sb, "journal-1", "%s called without kernel "
- "lock held", caller);
- }
-#else
- ;
-#endif
-}
-
/* return a cnode with same dev, block number and size in table, or null if not found */
static inline struct reiserfs_journal_cnode *get_journal_hash_dev(struct
super_block
@@ -556,7 +541,8 @@ static inline void insert_journal_hash(struct reiserfs_journal_cnode **table,
static inline void lock_journal(struct super_block *sb)
{
PROC_INFO_INC(sb, journal.lock_journal);
- mutex_lock(&SB_JOURNAL(sb)->j_mutex);
+
+ reiserfs_mutex_lock_safe(&SB_JOURNAL(sb)->j_mutex, sb);
}
/* unlock the current transaction */
@@ -708,7 +694,9 @@ static void check_barrier_completion(struct super_block *s,
disable_barrier(s);
set_buffer_uptodate(bh);
set_buffer_dirty(bh);
+ reiserfs_write_unlock(s);
sync_dirty_buffer(bh);
+ reiserfs_write_lock(s);
}
}
@@ -996,8 +984,13 @@ static int reiserfs_async_progress_wait(struct super_block *s)
{
DEFINE_WAIT(wait);
struct reiserfs_journal *j = SB_JOURNAL(s);
- if (atomic_read(&j->j_async_throttle))
+
+ if (atomic_read(&j->j_async_throttle)) {
+ reiserfs_write_unlock(s);
congestion_wait(BLK_RW_ASYNC, HZ / 10);
+ reiserfs_write_lock(s);
+ }
+
return 0;
}
@@ -1043,7 +1036,8 @@ static int flush_commit_list(struct super_block *s,
}
/* make sure nobody is trying to flush this one at the same time */
- mutex_lock(&jl->j_commit_mutex);
+ reiserfs_mutex_lock_safe(&jl->j_commit_mutex, s);
+
if (!journal_list_still_alive(s, trans_id)) {
mutex_unlock(&jl->j_commit_mutex);
goto put_jl;
@@ -1061,12 +1055,17 @@ static int flush_commit_list(struct super_block *s,
if (!list_empty(&jl->j_bh_list)) {
int ret;
- unlock_kernel();
+
+ /*
+ * We might sleep in numerous places inside
+ * write_ordered_buffers. Relax the write lock.
+ */
+ reiserfs_write_unlock(s);
ret = write_ordered_buffers(&journal->j_dirty_buffers_lock,
journal, jl, &jl->j_bh_list);
if (ret < 0 && retval == 0)
retval = ret;
- lock_kernel();
+ reiserfs_write_lock(s);
}
BUG_ON(!list_empty(&jl->j_bh_list));
/*
@@ -1085,8 +1084,11 @@ static int flush_commit_list(struct super_block *s,
SB_ONDISK_JOURNAL_SIZE(s);
tbh = journal_find_get_block(s, bn);
if (tbh) {
- if (buffer_dirty(tbh))
- ll_rw_block(WRITE, 1, &tbh) ;
+ if (buffer_dirty(tbh)) {
+ reiserfs_write_unlock(s);
+ ll_rw_block(WRITE, 1, &tbh);
+ reiserfs_write_lock(s);
+ }
put_bh(tbh) ;
}
}
@@ -1114,12 +1116,19 @@ static int flush_commit_list(struct super_block *s,
bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) +
(jl->j_start + i) % SB_ONDISK_JOURNAL_SIZE(s);
tbh = journal_find_get_block(s, bn);
+
+ reiserfs_write_unlock(s);
wait_on_buffer(tbh);
+ reiserfs_write_lock(s);
// since we're using ll_rw_blk above, it might have skipped over
// a locked buffer. Double check here
//
- if (buffer_dirty(tbh)) /* redundant, sync_dirty_buffer() checks */
+ /* redundant, sync_dirty_buffer() checks */
+ if (buffer_dirty(tbh)) {
+ reiserfs_write_unlock(s);
sync_dirty_buffer(tbh);
+ reiserfs_write_lock(s);
+ }
if (unlikely(!buffer_uptodate(tbh))) {
#ifdef CONFIG_REISERFS_CHECK
reiserfs_warning(s, "journal-601",
@@ -1143,10 +1152,15 @@ static int flush_commit_list(struct super_block *s,
if (buffer_dirty(jl->j_commit_bh))
BUG();
mark_buffer_dirty(jl->j_commit_bh) ;
+ reiserfs_write_unlock(s);
sync_dirty_buffer(jl->j_commit_bh) ;
+ reiserfs_write_lock(s);
}
- } else
+ } else {
+ reiserfs_write_unlock(s);
wait_on_buffer(jl->j_commit_bh);
+ reiserfs_write_lock(s);
+ }
check_barrier_completion(s, jl->j_commit_bh);
@@ -1286,7 +1300,9 @@ static int _update_journal_header_block(struct super_block *sb,
if (trans_id >= journal->j_last_flush_trans_id) {
if (buffer_locked((journal->j_header_bh))) {
+ reiserfs_write_unlock(sb);
wait_on_buffer((journal->j_header_bh));
+ reiserfs_write_lock(sb);
if (unlikely(!buffer_uptodate(journal->j_header_bh))) {
#ifdef CONFIG_REISERFS_CHECK
reiserfs_warning(sb, "journal-699",
@@ -1312,12 +1328,16 @@ static int _update_journal_header_block(struct super_block *sb,
disable_barrier(sb);
goto sync;
}
+ reiserfs_write_unlock(sb);
wait_on_buffer(journal->j_header_bh);
+ reiserfs_write_lock(sb);
check_barrier_completion(sb, journal->j_header_bh);
} else {
sync:
set_buffer_dirty(journal->j_header_bh);
+ reiserfs_write_unlock(sb);
sync_dirty_buffer(journal->j_header_bh);
+ reiserfs_write_lock(sb);
}
if (!buffer_uptodate(journal->j_header_bh)) {
reiserfs_warning(sb, "journal-837",
@@ -1409,7 +1429,7 @@ static int flush_journal_list(struct super_block *s,
/* if flushall == 0, the lock is already held */
if (flushall) {
- mutex_lock(&journal->j_flush_mutex);
+ reiserfs_mutex_lock_safe(&journal->j_flush_mutex, s);
} else if (mutex_trylock(&journal->j_flush_mutex)) {
BUG();
}
@@ -1553,7 +1573,11 @@ static int flush_journal_list(struct super_block *s,
reiserfs_panic(s, "journal-1011",
"cn->bh is NULL");
}
+
+ reiserfs_write_unlock(s);
wait_on_buffer(cn->bh);
+ reiserfs_write_lock(s);
+
if (!cn->bh) {
reiserfs_panic(s, "journal-1012",
"cn->bh is NULL");
@@ -1769,7 +1793,7 @@ static int kupdate_transactions(struct super_block *s,
struct reiserfs_journal *journal = SB_JOURNAL(s);
chunk.nr = 0;
- mutex_lock(&journal->j_flush_mutex);
+ reiserfs_mutex_lock_safe(&journal->j_flush_mutex, s);
if (!journal_list_still_alive(s, orig_trans_id)) {
goto done;
}
@@ -1973,11 +1997,19 @@ static int do_journal_release(struct reiserfs_transaction_handle *th,
reiserfs_mounted_fs_count--;
/* wait for all commits to finish */
cancel_delayed_work(&SB_JOURNAL(sb)->j_work);
+
+ /*
+ * We must release the write lock here because
+ * the workqueue job (flush_async_commit) needs this lock
+ */
+ reiserfs_write_unlock(sb);
flush_workqueue(commit_wq);
+
if (!reiserfs_mounted_fs_count) {
destroy_workqueue(commit_wq);
commit_wq = NULL;
}
+ reiserfs_write_lock(sb);
free_journal_ram(sb);
@@ -2243,7 +2275,11 @@ static int journal_read_transaction(struct super_block *sb,
/* read in the log blocks, memcpy to the corresponding real block */
ll_rw_block(READ, get_desc_trans_len(desc), log_blocks);
for (i = 0; i < get_desc_trans_len(desc); i++) {
+
+ reiserfs_write_unlock(sb);
wait_on_buffer(log_blocks[i]);
+ reiserfs_write_lock(sb);
+
if (!buffer_uptodate(log_blocks[i])) {
reiserfs_warning(sb, "journal-1212",
"REPLAY FAILURE fsck required! "
@@ -2765,11 +2801,27 @@ int journal_init(struct super_block *sb, const char *j_dev_name,
goto free_and_return;
}
+ /*
+ * We need to unlock here to avoid creating the following
+ * dependency:
+ * reiserfs_lock -> sysfs_mutex
+ * Because the reiserfs mmap path creates the following dependency:
+ * mm->mmap -> reiserfs_lock, hence we have
+ * mm->mmap -> reiserfs_lock ->sysfs_mutex
+ * This would ends up in a circular dependency with sysfs readdir path
+ * which does sysfs_mutex -> mm->mmap_sem
+ * This is fine because the reiserfs lock is useless in mount path,
+ * at least until we call journal_begin. We keep it for paranoid
+ * reasons.
+ */
+ reiserfs_write_unlock(sb);
if (journal_init_dev(sb, journal, j_dev_name) != 0) {
+ reiserfs_write_lock(sb);
reiserfs_warning(sb, "sh-462",
"unable to initialize jornal device");
goto free_and_return;
}
+ reiserfs_write_lock(sb);
rs = SB_DISK_SUPER_BLOCK(sb);
@@ -2881,8 +2933,11 @@ int journal_init(struct super_block *sb, const char *j_dev_name,
}
reiserfs_mounted_fs_count++;
- if (reiserfs_mounted_fs_count <= 1)
+ if (reiserfs_mounted_fs_count <= 1) {
+ reiserfs_write_unlock(sb);
commit_wq = create_workqueue("reiserfs");
+ reiserfs_write_lock(sb);
+ }
INIT_DELAYED_WORK(&journal->j_work, flush_async_commits);
journal->j_work_sb = sb;
@@ -2964,8 +3019,11 @@ static void queue_log_writer(struct super_block *s)
init_waitqueue_entry(&wait, current);
add_wait_queue(&journal->j_join_wait, &wait);
set_current_state(TASK_UNINTERRUPTIBLE);
- if (test_bit(J_WRITERS_QUEUED, &journal->j_state))
+ if (test_bit(J_WRITERS_QUEUED, &journal->j_state)) {
+ reiserfs_write_unlock(s);
schedule();
+ reiserfs_write_lock(s);
+ }
__set_current_state(TASK_RUNNING);
remove_wait_queue(&journal->j_join_wait, &wait);
}
@@ -2982,7 +3040,9 @@ static void let_transaction_grow(struct super_block *sb, unsigned int trans_id)
struct reiserfs_journal *journal = SB_JOURNAL(sb);
unsigned long bcount = journal->j_bcount;
while (1) {
+ reiserfs_write_unlock(sb);
schedule_timeout_uninterruptible(1);
+ reiserfs_write_lock(sb);
journal->j_current_jl->j_state |= LIST_COMMIT_PENDING;
while ((atomic_read(&journal->j_wcount) > 0 ||
atomic_read(&journal->j_jlock)) &&
@@ -3033,7 +3093,9 @@ static int do_journal_begin_r(struct reiserfs_transaction_handle *th,
if (test_bit(J_WRITERS_BLOCKED, &journal->j_state)) {
unlock_journal(sb);
+ reiserfs_write_unlock(sb);
reiserfs_wait_on_write_block(sb);
+ reiserfs_write_lock(sb);
PROC_INFO_INC(sb, journal.journal_relock_writers);
goto relock;
}
@@ -3506,14 +3568,14 @@ static void flush_async_commits(struct work_struct *work)
struct reiserfs_journal_list *jl;
struct list_head *entry;
- lock_kernel();
+ reiserfs_write_lock(sb);
if (!list_empty(&journal->j_journal_list)) {
/* last entry is the youngest, commit it and you get everything */
entry = journal->j_journal_list.prev;
jl = JOURNAL_LIST_ENTRY(entry);
flush_commit_list(sb, jl, 1);
}
- unlock_kernel();
+ reiserfs_write_unlock(sb);
}
/*
@@ -4041,7 +4103,7 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
* the new transaction is fully setup, and we've already flushed the
* ordered bh list
*/
- mutex_lock(&jl->j_commit_mutex);
+ reiserfs_mutex_lock_safe(&jl->j_commit_mutex, sb);
/* save the transaction id in case we need to commit it later */
commit_trans_id = jl->j_trans_id;
@@ -4156,7 +4218,9 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
next = cn->next;
free_cnode(sb, cn);
cn = next;
+ reiserfs_write_unlock(sb);
cond_resched();
+ reiserfs_write_lock(sb);
}
/* we are done with both the c_bh and d_bh, but
@@ -4203,10 +4267,10 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
* is lost.
*/
if (!list_empty(&jl->j_tail_bh_list)) {
- unlock_kernel();
+ reiserfs_write_unlock(sb);
write_ordered_buffers(&journal->j_dirty_buffers_lock,
journal, jl, &jl->j_tail_bh_list);
- lock_kernel();
+ reiserfs_write_lock(sb);
}
BUG_ON(!list_empty(&jl->j_tail_bh_list));
mutex_unlock(&jl->j_commit_mutex);
diff --git a/fs/reiserfs/lock.c b/fs/reiserfs/lock.c
new file mode 100644
index 000000000000..ee2cfc0fd8a7
--- /dev/null
+++ b/fs/reiserfs/lock.c
@@ -0,0 +1,88 @@
+#include <linux/reiserfs_fs.h>
+#include <linux/mutex.h>
+
+/*
+ * The previous reiserfs locking scheme was heavily based on
+ * the tricky properties of the Bkl:
+ *
+ * - it was acquired recursively by a same task
+ * - the performances relied on the release-while-schedule() property
+ *
+ * Now that we replace it by a mutex, we still want to keep the same
+ * recursive property to avoid big changes in the code structure.
+ * We use our own lock_owner here because the owner field on a mutex
+ * is only available in SMP or mutex debugging, also we only need this field
+ * for this mutex, no need for a system wide mutex facility.
+ *
+ * Also this lock is often released before a call that could block because
+ * reiserfs performances were partialy based on the release while schedule()
+ * property of the Bkl.
+ */
+void reiserfs_write_lock(struct super_block *s)
+{
+ struct reiserfs_sb_info *sb_i = REISERFS_SB(s);
+
+ if (sb_i->lock_owner != current) {
+ mutex_lock(&sb_i->lock);
+ sb_i->lock_owner = current;
+ }
+
+ /* No need to protect it, only the current task touches it */
+ sb_i->lock_depth++;
+}
+
+void reiserfs_write_unlock(struct super_block *s)
+{
+ struct reiserfs_sb_info *sb_i = REISERFS_SB(s);
+
+ /*
+ * Are we unlocking without even holding the lock?
+ * Such a situation must raise a BUG() if we don't want
+ * to corrupt the data.
+ */
+ BUG_ON(sb_i->lock_owner != current);
+
+ if (--sb_i->lock_depth == -1) {
+ sb_i->lock_owner = NULL;
+ mutex_unlock(&sb_i->lock);
+ }
+}
+
+/*
+ * If we already own the lock, just exit and don't increase the depth.
+ * Useful when we don't want to lock more than once.
+ *
+ * We always return the lock_depth we had before calling
+ * this function.
+ */
+int reiserfs_write_lock_once(struct super_block *s)
+{
+ struct reiserfs_sb_info *sb_i = REISERFS_SB(s);
+
+ if (sb_i->lock_owner != current) {
+ mutex_lock(&sb_i->lock);
+ sb_i->lock_owner = current;
+ return sb_i->lock_depth++;
+ }
+
+ return sb_i->lock_depth;
+}
+
+void reiserfs_write_unlock_once(struct super_block *s, int lock_depth)
+{
+ if (lock_depth == -1)
+ reiserfs_write_unlock(s);
+}
+
+/*
+ * Utility function to force a BUG if it is called without the superblock
+ * write lock held. caller is the string printed just before calling BUG()
+ */
+void reiserfs_check_lock_depth(struct super_block *sb, char *caller)
+{
+ struct reiserfs_sb_info *sb_i = REISERFS_SB(sb);
+
+ if (sb_i->lock_depth < 0)
+ reiserfs_panic(sb, "%s called without kernel lock held %d",
+ caller);
+}
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index 271579128634..e296ff72a6cc 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -324,6 +324,7 @@ static struct dentry *reiserfs_lookup(struct inode *dir, struct dentry *dentry,
struct nameidata *nd)
{
int retval;
+ int lock_depth;
struct inode *inode = NULL;
struct reiserfs_dir_entry de;
INITIALIZE_PATH(path_to_entry);
@@ -331,7 +332,13 @@ static struct dentry *reiserfs_lookup(struct inode *dir, struct dentry *dentry,
if (REISERFS_MAX_NAME(dir->i_sb->s_blocksize) < dentry->d_name.len)
return ERR_PTR(-ENAMETOOLONG);
- reiserfs_write_lock(dir->i_sb);
+ /*
+ * Might be called with or without the write lock, must be careful
+ * to not recursively hold it in case we want to release the lock
+ * before rescheduling.
+ */
+ lock_depth = reiserfs_write_lock_once(dir->i_sb);
+
de.de_gen_number_bit_string = NULL;
retval =
reiserfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len,
@@ -341,7 +348,7 @@ static struct dentry *reiserfs_lookup(struct inode *dir, struct dentry *dentry,
inode = reiserfs_iget(dir->i_sb,
(struct cpu_key *)&(de.de_dir_id));
if (!inode || IS_ERR(inode)) {
- reiserfs_write_unlock(dir->i_sb);
+ reiserfs_write_unlock_once(dir->i_sb, lock_depth);
return ERR_PTR(-EACCES);
}
@@ -350,7 +357,7 @@ static struct dentry *reiserfs_lookup(struct inode *dir, struct dentry *dentry,
if (IS_PRIVATE(dir))
inode->i_flags |= S_PRIVATE;
}
- reiserfs_write_unlock(dir->i_sb);
+ reiserfs_write_unlock_once(dir->i_sb, lock_depth);
if (retval == IO_ERROR) {
return ERR_PTR(-EIO);
}
@@ -725,6 +732,7 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
struct inode *inode;
struct reiserfs_transaction_handle th;
struct reiserfs_security_handle security;
+ int lock_depth;
/* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */
int jbegin_count =
JOURNAL_PER_BALANCE_CNT * 3 +
@@ -748,7 +756,7 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
return retval;
}
jbegin_count += retval;
- reiserfs_write_lock(dir->i_sb);
+ lock_depth = reiserfs_write_lock_once(dir->i_sb);
retval = journal_begin(&th, dir->i_sb, jbegin_count);
if (retval) {
@@ -798,8 +806,8 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
d_instantiate(dentry, inode);
unlock_new_inode(inode);
retval = journal_end(&th, dir->i_sb, jbegin_count);
- out_failed:
- reiserfs_write_unlock(dir->i_sb);
+out_failed:
+ reiserfs_write_unlock_once(dir->i_sb, lock_depth);
return retval;
}
diff --git a/fs/reiserfs/prints.c b/fs/reiserfs/prints.c
index 536eacaeb710..adbc6f538515 100644
--- a/fs/reiserfs/prints.c
+++ b/fs/reiserfs/prints.c
@@ -349,10 +349,6 @@ void reiserfs_debug(struct super_block *s, int level, const char *fmt, ...)
. */
-#ifdef CONFIG_REISERFS_CHECK
-extern struct tree_balance *cur_tb;
-#endif
-
void __reiserfs_panic(struct super_block *sb, const char *id,
const char *function, const char *fmt, ...)
{
diff --git a/fs/reiserfs/resize.c b/fs/reiserfs/resize.c
index 18b315d3d104..b3a94d20f0fc 100644
--- a/fs/reiserfs/resize.c
+++ b/fs/reiserfs/resize.c
@@ -141,7 +141,9 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new)
set_buffer_uptodate(bh);
mark_buffer_dirty(bh);
+ reiserfs_write_unlock(s);
sync_dirty_buffer(bh);
+ reiserfs_write_lock(s);
// update bitmap_info stuff
bitmap[i].free_count = sb_blocksize(sb) * 8 - 1;
brelse(bh);
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index d036ee5b1c81..5fa7118f04e1 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c
@@ -222,9 +222,6 @@ static inline int bin_search(const void *key, /* Key to search for. */
return ITEM_NOT_FOUND;
}
-#ifdef CONFIG_REISERFS_CHECK
-extern struct tree_balance *cur_tb;
-#endif
/* Minimal possible key. It is never in the tree. */
const struct reiserfs_key MIN_KEY = { 0, 0, {{0, 0},} };
@@ -519,25 +516,48 @@ static int is_tree_node(struct buffer_head *bh, int level)
#define SEARCH_BY_KEY_READA 16
-/* The function is NOT SCHEDULE-SAFE! */
-static void search_by_key_reada(struct super_block *s,
+/*
+ * The function is NOT SCHEDULE-SAFE!
+ * It might unlock the write lock if we needed to wait for a block
+ * to be read. Note that in this case it won't recover the lock to avoid
+ * high contention resulting from too much lock requests, especially
+ * the caller (search_by_key) will perform other schedule-unsafe
+ * operations just after calling this function.
+ *
+ * @return true if we have unlocked
+ */
+static bool search_by_key_reada(struct super_block *s,
struct buffer_head **bh,
b_blocknr_t *b, int num)
{
int i, j;
+ bool unlocked = false;
for (i = 0; i < num; i++) {
bh[i] = sb_getblk(s, b[i]);
}
+ /*
+ * We are going to read some blocks on which we
+ * have a reference. It's safe, though we might be
+ * reading blocks concurrently changed if we release
+ * the lock. But it's still fine because we check later
+ * if the tree changed
+ */
for (j = 0; j < i; j++) {
/*
* note, this needs attention if we are getting rid of the BKL
* you have to make sure the prepared bit isn't set on this buffer
*/
- if (!buffer_uptodate(bh[j]))
+ if (!buffer_uptodate(bh[j])) {
+ if (!unlocked) {
+ reiserfs_write_unlock(s);
+ unlocked = true;
+ }
ll_rw_block(READA, 1, bh + j);
+ }
brelse(bh[j]);
}
+ return unlocked;
}
/**************************************************************************
@@ -625,11 +645,26 @@ int search_by_key(struct super_block *sb, const struct cpu_key *key, /* Key to s
have a pointer to it. */
if ((bh = last_element->pe_buffer =
sb_getblk(sb, block_number))) {
+ bool unlocked = false;
+
if (!buffer_uptodate(bh) && reada_count > 1)
- search_by_key_reada(sb, reada_bh,
+ /* may unlock the write lock */
+ unlocked = search_by_key_reada(sb, reada_bh,
reada_blocks, reada_count);
+ /*
+ * If we haven't already unlocked the write lock,
+ * then we need to do that here before reading
+ * the current block
+ */
+ if (!buffer_uptodate(bh) && !unlocked) {
+ reiserfs_write_unlock(sb);
+ unlocked = true;
+ }
ll_rw_block(READ, 1, &bh);
wait_on_buffer(bh);
+
+ if (unlocked)
+ reiserfs_write_lock(sb);
if (!buffer_uptodate(bh))
goto io_error;
} else {
@@ -673,7 +708,7 @@ int search_by_key(struct super_block *sb, const struct cpu_key *key, /* Key to s
!key_in_buffer(search_path, key, sb),
"PAP-5130: key is not in the buffer");
#ifdef CONFIG_REISERFS_CHECK
- if (cur_tb) {
+ if (REISERFS_SB(sb)->cur_tb) {
print_cur_tb("5140");
reiserfs_panic(sb, "PAP-5140",
"schedule occurred in do_balance!");
@@ -1024,7 +1059,9 @@ static char prepare_for_delete_or_cut(struct reiserfs_transaction_handle *th, st
reiserfs_free_block(th, inode, block, 1);
}
+ reiserfs_write_unlock(sb);
cond_resched();
+ reiserfs_write_lock(sb);
if (item_moved (&s_ih, path)) {
need_re_search = 1;
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index f0ad05f38022..339b0baf2af6 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -465,7 +465,7 @@ static void reiserfs_put_super(struct super_block *s)
struct reiserfs_transaction_handle th;
th.t_trans_id = 0;
- lock_kernel();
+ reiserfs_write_lock(s);
if (s->s_dirt)
reiserfs_write_super(s);
@@ -499,10 +499,10 @@ static void reiserfs_put_super(struct super_block *s)
reiserfs_proc_info_done(s);
+ reiserfs_write_unlock(s);
+ mutex_destroy(&REISERFS_SB(s)->lock);
kfree(s->s_fs_info);
s->s_fs_info = NULL;
-
- unlock_kernel();
}
static struct kmem_cache *reiserfs_inode_cachep;
@@ -554,25 +554,28 @@ static void reiserfs_dirty_inode(struct inode *inode)
struct reiserfs_transaction_handle th;
int err = 0;
+ int lock_depth;
+
if (inode->i_sb->s_flags & MS_RDONLY) {
reiserfs_warning(inode->i_sb, "clm-6006",
"writing inode %lu on readonly FS",
inode->i_ino);
return;
}
- reiserfs_write_lock(inode->i_sb);
+ lock_depth = reiserfs_write_lock_once(inode->i_sb);
/* this is really only used for atime updates, so they don't have
** to be included in O_SYNC or fsync
*/
err = journal_begin(&th, inode->i_sb, 1);
- if (err) {
- reiserfs_write_unlock(inode->i_sb);
- return;
- }
+ if (err)
+ goto out;
+
reiserfs_update_sd(&th, inode);
journal_end(&th, inode->i_sb, 1);
- reiserfs_write_unlock(inode->i_sb);
+
+out:
+ reiserfs_write_unlock_once(inode->i_sb, lock_depth);
}
#ifdef CONFIG_QUOTA
@@ -1168,11 +1171,14 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
unsigned int qfmt = 0;
#ifdef CONFIG_QUOTA
int i;
+#endif
+
+ reiserfs_write_lock(s);
+#ifdef CONFIG_QUOTA
memcpy(qf_names, REISERFS_SB(s)->s_qf_names, sizeof(qf_names));
#endif
- lock_kernel();
rs = SB_DISK_SUPER_BLOCK(s);
if (!reiserfs_parse_options
@@ -1295,12 +1301,12 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
out_ok:
replace_mount_options(s, new_opts);
- unlock_kernel();
+ reiserfs_write_unlock(s);
return 0;
out_err:
kfree(new_opts);
- unlock_kernel();
+ reiserfs_write_unlock(s);
return err;
}
@@ -1404,7 +1410,9 @@ static int read_super_block(struct super_block *s, int offset)
static int reread_meta_blocks(struct super_block *s)
{
ll_rw_block(READ, 1, &(SB_BUFFER_WITH_SB(s)));
+ reiserfs_write_unlock(s);
wait_on_buffer(SB_BUFFER_WITH_SB(s));
+ reiserfs_write_lock(s);
if (!buffer_uptodate(SB_BUFFER_WITH_SB(s))) {
reiserfs_warning(s, "reiserfs-2504", "error reading the super");
return 1;
@@ -1613,7 +1621,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
sbi = kzalloc(sizeof(struct reiserfs_sb_info), GFP_KERNEL);
if (!sbi) {
errval = -ENOMEM;
- goto error;
+ goto error_alloc;
}
s->s_fs_info = sbi;
/* Set default values for options: non-aggressive tails, RO on errors */
@@ -1627,6 +1635,20 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
/* setup default block allocator options */
reiserfs_init_alloc_options(s);
+ mutex_init(&REISERFS_SB(s)->lock);
+ REISERFS_SB(s)->lock_depth = -1;
+
+ /*
+ * This function is called with the bkl, which also was the old
+ * locking used here.
+ * do_journal_begin() will soon check if we hold the lock (ie: was the
+ * bkl). This is likely because do_journal_begin() has several another
+ * callers because at this time, it doesn't seem to be necessary to
+ * protect against anything.
+ * Anyway, let's be conservative and lock for now.
+ */
+ reiserfs_write_lock(s);
+
jdev_name = NULL;
if (reiserfs_parse_options
(s, (char *)data, &(sbi->s_mount_opt), &blocks, &jdev_name,
@@ -1852,9 +1874,13 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
init_waitqueue_head(&(sbi->s_wait));
spin_lock_init(&sbi->bitmap_lock);
+ reiserfs_write_unlock(s);
+
return (0);
error:
+ reiserfs_write_unlock(s);
+error_alloc:
if (jinit_done) { /* kill the commit thread, free journal ram */
journal_release_error(NULL, s);
}
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 6925b835a43b..58aa8e75f7f5 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -975,7 +975,7 @@ int reiserfs_lookup_privroot(struct super_block *s)
int err = 0;
/* If we don't have the privroot located yet - go find it */
- mutex_lock(&s->s_root->d_inode->i_mutex);
+ reiserfs_mutex_lock_safe(&s->s_root->d_inode->i_mutex, s);
dentry = lookup_one_len(PRIVROOT_NAME, s->s_root,
strlen(PRIVROOT_NAME));
if (!IS_ERR(dentry)) {
@@ -1004,14 +1004,14 @@ int reiserfs_xattr_init(struct super_block *s, int mount_flags)
goto error;
if (!privroot->d_inode && !(mount_flags & MS_RDONLY)) {
- mutex_lock(&s->s_root->d_inode->i_mutex);
+ reiserfs_mutex_lock_safe(&s->s_root->d_inode->i_mutex, s);
err = create_privroot(REISERFS_SB(s)->priv_root);
mutex_unlock(&s->s_root->d_inode->i_mutex);
}
if (privroot->d_inode) {
s->s_xattr = reiserfs_xattr_handlers;
- mutex_lock(&privroot->d_inode->i_mutex);
+ reiserfs_mutex_lock_safe(&privroot->d_inode->i_mutex, s);
if (!REISERFS_SB(s)->xattr_root) {
struct dentry *dentry;
dentry = lookup_one_len(XAROOT_NAME, privroot,
diff --git a/fs/splice.c b/fs/splice.c
index 7394e9e17534..39208663aaf1 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -648,9 +648,11 @@ static int pipe_to_sendpage(struct pipe_inode_info *pipe,
ret = buf->ops->confirm(pipe, buf);
if (!ret) {
more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len;
-
- ret = file->f_op->sendpage(file, buf->page, buf->offset,
- sd->len, &pos, more);
+ if (file->f_op && file->f_op->sendpage)
+ ret = file->f_op->sendpage(file, buf->page, buf->offset,
+ sd->len, &pos, more);
+ else
+ ret = -EINVAL;
}
return ret;
@@ -1068,8 +1070,9 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
if (unlikely(ret < 0))
return ret;
- splice_write = out->f_op->splice_write;
- if (!splice_write)
+ if (out->f_op && out->f_op->splice_write)
+ splice_write = out->f_op->splice_write;
+ else
splice_write = default_file_splice_write;
return splice_write(pipe, out, ppos, len, flags);
@@ -1093,8 +1096,9 @@ static long do_splice_to(struct file *in, loff_t *ppos,
if (unlikely(ret < 0))
return ret;
- splice_read = in->f_op->splice_read;
- if (!splice_read)
+ if (in->f_op && in->f_op->splice_read)
+ splice_read = in->f_op->splice_read;
+ else
splice_read = default_file_splice_read;
return splice_read(in, ppos, pipe, len, flags);
@@ -1316,7 +1320,8 @@ static long do_splice(struct file *in, loff_t __user *off_in,
if (off_in)
return -ESPIPE;
if (off_out) {
- if (out->f_op->llseek == no_llseek)
+ if (!out->f_op || !out->f_op->llseek ||
+ out->f_op->llseek == no_llseek)
return -EINVAL;
if (copy_from_user(&offset, off_out, sizeof(loff_t)))
return -EFAULT;
@@ -1336,7 +1341,8 @@ static long do_splice(struct file *in, loff_t __user *off_in,
if (off_out)
return -ESPIPE;
if (off_in) {
- if (in->f_op->llseek == no_llseek)
+ if (!in->f_op || !in->f_op->llseek ||
+ in->f_op->llseek == no_llseek)
return -EINVAL;
if (copy_from_user(&offset, off_in, sizeof(loff_t)))
return -EFAULT;
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index c2e30eea74dc..70f989895d15 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -904,16 +904,9 @@ xfs_convert_page(
if (startio) {
if (count) {
- struct backing_dev_info *bdi;
-
- bdi = inode->i_mapping->backing_dev_info;
wbc->nr_to_write--;
- if (bdi_write_congested(bdi)) {
- wbc->encountered_congestion = 1;
- done = 1;
- } else if (wbc->nr_to_write <= 0) {
+ if (wbc->nr_to_write <= 0)
done = 1;
- }
}
xfs_start_page_writeback(page, !page_dirty, count);
}
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.c b/fs/xfs/linux-2.6/xfs_sysctl.c
index c5bc67c4e3bb..7bb5092d6ae4 100644
--- a/fs/xfs/linux-2.6/xfs_sysctl.c
+++ b/fs/xfs/linux-2.6/xfs_sysctl.c
@@ -55,170 +55,140 @@ xfs_stats_clear_proc_handler(
static ctl_table xfs_table[] = {
{
- .ctl_name = XFS_SGID_INHERIT,
.procname = "irix_sgid_inherit",
.data = &xfs_params.sgid_inherit.val,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec_minmax,
- .strategy = &sysctl_intvec,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = &xfs_params.sgid_inherit.min,
.extra2 = &xfs_params.sgid_inherit.max
},
{
- .ctl_name = XFS_SYMLINK_MODE,
.procname = "irix_symlink_mode",
.data = &xfs_params.symlink_mode.val,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec_minmax,
- .strategy = &sysctl_intvec,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = &xfs_params.symlink_mode.min,
.extra2 = &xfs_params.symlink_mode.max
},
{
- .ctl_name = XFS_PANIC_MASK,
.procname = "panic_mask",
.data = &xfs_params.panic_mask.val,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec_minmax,
- .strategy = &sysctl_intvec,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = &xfs_params.panic_mask.min,
.extra2 = &xfs_params.panic_mask.max
},
{
- .ctl_name = XFS_ERRLEVEL,
.procname = "error_level",
.data = &xfs_params.error_level.val,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec_minmax,
- .strategy = &sysctl_intvec,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = &xfs_params.error_level.min,
.extra2 = &xfs_params.error_level.max
},
{
- .ctl_name = XFS_SYNCD_TIMER,
.procname = "xfssyncd_centisecs",
.data = &xfs_params.syncd_timer.val,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec_minmax,
- .strategy = &sysctl_intvec,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = &xfs_params.syncd_timer.min,
.extra2 = &xfs_params.syncd_timer.max
},
{
- .ctl_name = XFS_INHERIT_SYNC,
.procname = "inherit_sync",
.data = &xfs_params.inherit_sync.val,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec_minmax,
- .strategy = &sysctl_intvec,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = &xfs_params.inherit_sync.min,
.extra2 = &xfs_params.inherit_sync.max
},
{
- .ctl_name = XFS_INHERIT_NODUMP,
.procname = "inherit_nodump",
.data = &xfs_params.inherit_nodump.val,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec_minmax,
- .strategy = &sysctl_intvec,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = &xfs_params.inherit_nodump.min,
.extra2 = &xfs_params.inherit_nodump.max
},
{
- .ctl_name = XFS_INHERIT_NOATIME,
.procname = "inherit_noatime",
.data = &xfs_params.inherit_noatim.val,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec_minmax,
- .strategy = &sysctl_intvec,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = &xfs_params.inherit_noatim.min,
.extra2 = &xfs_params.inherit_noatim.max
},
{
- .ctl_name = XFS_BUF_TIMER,
.procname = "xfsbufd_centisecs",
.data = &xfs_params.xfs_buf_timer.val,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec_minmax,
- .strategy = &sysctl_intvec,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = &xfs_params.xfs_buf_timer.min,
.extra2 = &xfs_params.xfs_buf_timer.max
},
{
- .ctl_name = XFS_BUF_AGE,
.procname = "age_buffer_centisecs",
.data = &xfs_params.xfs_buf_age.val,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec_minmax,
- .strategy = &sysctl_intvec,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = &xfs_params.xfs_buf_age.min,
.extra2 = &xfs_params.xfs_buf_age.max
},
{
- .ctl_name = XFS_INHERIT_NOSYM,
.procname = "inherit_nosymlinks",
.data = &xfs_params.inherit_nosym.val,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec_minmax,
- .strategy = &sysctl_intvec,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = &xfs_params.inherit_nosym.min,
.extra2 = &xfs_params.inherit_nosym.max
},
{
- .ctl_name = XFS_ROTORSTEP,
.procname = "rotorstep",
.data = &xfs_params.rotorstep.val,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec_minmax,
- .strategy = &sysctl_intvec,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = &xfs_params.rotorstep.min,
.extra2 = &xfs_params.rotorstep.max
},
{
- .ctl_name = XFS_INHERIT_NODFRG,
.procname = "inherit_nodefrag",
.data = &xfs_params.inherit_nodfrg.val,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec_minmax,
- .strategy = &sysctl_intvec,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = &xfs_params.inherit_nodfrg.min,
.extra2 = &xfs_params.inherit_nodfrg.max
},
{
- .ctl_name = XFS_FILESTREAM_TIMER,
.procname = "filestream_centisecs",
.data = &xfs_params.fstrm_timer.val,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec_minmax,
- .strategy = &sysctl_intvec,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = &xfs_params.fstrm_timer.min,
.extra2 = &xfs_params.fstrm_timer.max,
},
/* please keep this the last entry */
#ifdef CONFIG_PROC_FS
{
- .ctl_name = XFS_STATS_CLEAR,
.procname = "stats_clear",
.data = &xfs_params.stats_clear.val,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &xfs_stats_clear_proc_handler,
- .strategy = &sysctl_intvec,
+ .proc_handler = xfs_stats_clear_proc_handler,
.extra1 = &xfs_params.stats_clear.min,
.extra2 = &xfs_params.stats_clear.max
},
@@ -229,7 +199,6 @@ static ctl_table xfs_table[] = {
static ctl_table xfs_dir_table[] = {
{
- .ctl_name = FS_XFS,
.procname = "xfs",
.mode = 0555,
.child = xfs_table
@@ -239,7 +208,6 @@ static ctl_table xfs_dir_table[] = {
static ctl_table xfs_root_table[] = {
{
- .ctl_name = CTL_FS,
.procname = "fs",
.mode = 0555,
.child = xfs_dir_table