summaryrefslogtreecommitdiffstats
path: root/fs/ceph
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-05-01 10:39:57 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2017-05-01 10:39:57 -0700
commit694752922b12bd318aa80191bd9d8c3dcfb39055 (patch)
tree5afe83fd99100bea546dd5a1c1f778c58f41e5c0 /fs/ceph
parenta351e9b9fc24e982ec2f0e76379a49826036da12 (diff)
parent9438b3e080beccf6022138ea62192d55cc7dc4ed (diff)
downloadlinux-694752922b12bd318aa80191bd9d8c3dcfb39055.tar.bz2
Merge branch 'for-4.12/block' of git://git.kernel.dk/linux-block
Pull block layer updates from Jens Axboe: - Add BFQ IO scheduler under the new blk-mq scheduling framework. BFQ was initially a fork of CFQ, but subsequently changed to implement fairness based on B-WF2Q+, a modified variant of WF2Q. BFQ is meant to be used on desktop type single drives, providing good fairness. From Paolo. - Add Kyber IO scheduler. This is a full multiqueue aware scheduler, using a scalable token based algorithm that throttles IO based on live completion IO stats, similary to blk-wbt. From Omar. - A series from Jan, moving users to separately allocated backing devices. This continues the work of separating backing device life times, solving various problems with hot removal. - A series of updates for lightnvm, mostly from Javier. Includes a 'pblk' target that exposes an open channel SSD as a physical block device. - A series of fixes and improvements for nbd from Josef. - A series from Omar, removing queue sharing between devices on mostly legacy drivers. This helps us clean up other bits, if we know that a queue only has a single device backing. This has been overdue for more than a decade. - Fixes for the blk-stats, and improvements to unify the stats and user windows. This both improves blk-wbt, and enables other users to register a need to receive IO stats for a device. From Omar. - blk-throttle improvements from Shaohua. This provides a scalable framework for implementing scalable priotization - particularly for blk-mq, but applicable to any type of block device. The interface is marked experimental for now. - Bucketized IO stats for IO polling from Stephen Bates. This improves efficiency of polled workloads in the presence of mixed block size IO. - A few fixes for opal, from Scott. - A few pulls for NVMe, including a lot of fixes for NVMe-over-fabrics. From a variety of folks, mostly Sagi and James Smart. - A series from Bart, improving our exposed info and capabilities from the blk-mq debugfs support. - A series from Christoph, cleaning up how handle WRITE_ZEROES. - A series from Christoph, cleaning up the block layer handling of how we track errors in a request. On top of being a nice cleanup, it also shrinks the size of struct request a bit. - Removal of mg_disk and hd (sorry Linus) by Christoph. The former was never used by platforms, and the latter has outlived it's usefulness. - Various little bug fixes and cleanups from a wide variety of folks. * 'for-4.12/block' of git://git.kernel.dk/linux-block: (329 commits) block: hide badblocks attribute by default blk-mq: unify hctx delay_work and run_work block: add kblock_mod_delayed_work_on() blk-mq: unify hctx delayed_run_work and run_work nbd: fix use after free on module unload MAINTAINERS: bfq: Add Paolo as maintainer for the BFQ I/O scheduler blk-mq-sched: alloate reserved tags out of normal pool mtip32xx: use runtime tag to initialize command header scsi: Implement blk_mq_ops.show_rq() blk-mq: Add blk_mq_ops.show_rq() blk-mq: Show operation, cmd_flags and rq_flags names blk-mq: Make blk_flags_show() callers append a newline character blk-mq: Move the "state" debugfs attribute one level down blk-mq: Unregister debugfs attributes earlier blk-mq: Only unregister hctxs for which registration succeeded blk-mq-debugfs: Rename functions for registering and unregistering the mq directory blk-mq: Let blk_mq_debugfs_register() look up the queue name blk-mq: Register <dev>/queue/mq after having registered <dev>/queue ide-pm: always pass 0 error to ide_complete_rq in ide_do_devset ide-pm: always pass 0 error to __blk_end_request_all ..
Diffstat (limited to 'fs/ceph')
-rw-r--r--fs/ceph/addr.c6
-rw-r--r--fs/ceph/debugfs.c2
-rw-r--r--fs/ceph/super.c35
-rw-r--r--fs/ceph/super.h2
4 files changed, 17 insertions, 28 deletions
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 1a3e1b40799a..9ecb2fd348cb 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -578,7 +578,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
writeback_stat = atomic_long_inc_return(&fsc->writeback_count);
if (writeback_stat >
CONGESTION_ON_THRESH(fsc->mount_options->congestion_kb))
- set_bdi_congested(&fsc->backing_dev_info, BLK_RW_ASYNC);
+ set_bdi_congested(inode_to_bdi(inode), BLK_RW_ASYNC);
set_page_writeback(page);
err = ceph_osdc_writepages(osdc, ceph_vino(inode),
@@ -700,7 +700,7 @@ static void writepages_finish(struct ceph_osd_request *req)
if (atomic_long_dec_return(&fsc->writeback_count) <
CONGESTION_OFF_THRESH(
fsc->mount_options->congestion_kb))
- clear_bdi_congested(&fsc->backing_dev_info,
+ clear_bdi_congested(inode_to_bdi(inode),
BLK_RW_ASYNC);
if (rc < 0)
@@ -979,7 +979,7 @@ get_more_pages:
if (atomic_long_inc_return(&fsc->writeback_count) >
CONGESTION_ON_THRESH(
fsc->mount_options->congestion_kb)) {
- set_bdi_congested(&fsc->backing_dev_info,
+ set_bdi_congested(inode_to_bdi(inode),
BLK_RW_ASYNC);
}
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index f2ae393e2c31..3ef11bc8d728 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -251,7 +251,7 @@ int ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
goto out;
snprintf(name, sizeof(name), "../../bdi/%s",
- dev_name(fsc->backing_dev_info.dev));
+ dev_name(fsc->sb->s_bdi->dev));
fsc->debugfs_bdi =
debugfs_create_symlink("bdi",
fsc->client->debugfs_dir,
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 0ec8d0114e57..a8c81b2052ca 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -579,10 +579,6 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
atomic_long_set(&fsc->writeback_count, 0);
- err = bdi_init(&fsc->backing_dev_info);
- if (err < 0)
- goto fail_client;
-
err = -ENOMEM;
/*
* The number of concurrent works can be high but they don't need
@@ -590,7 +586,7 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
*/
fsc->wb_wq = alloc_workqueue("ceph-writeback", 0, 1);
if (fsc->wb_wq == NULL)
- goto fail_bdi;
+ goto fail_client;
fsc->pg_inv_wq = alloc_workqueue("ceph-pg-invalid", 0, 1);
if (fsc->pg_inv_wq == NULL)
goto fail_wb_wq;
@@ -624,8 +620,6 @@ fail_pg_inv_wq:
destroy_workqueue(fsc->pg_inv_wq);
fail_wb_wq:
destroy_workqueue(fsc->wb_wq);
-fail_bdi:
- bdi_destroy(&fsc->backing_dev_info);
fail_client:
ceph_destroy_client(fsc->client);
fail:
@@ -643,8 +637,6 @@ static void destroy_fs_client(struct ceph_fs_client *fsc)
destroy_workqueue(fsc->pg_inv_wq);
destroy_workqueue(fsc->trunc_wq);
- bdi_destroy(&fsc->backing_dev_info);
-
mempool_destroy(fsc->wb_pagevec_pool);
destroy_mount_options(fsc->mount_options);
@@ -937,33 +929,32 @@ static int ceph_compare_super(struct super_block *sb, void *data)
*/
static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
-static int ceph_register_bdi(struct super_block *sb,
- struct ceph_fs_client *fsc)
+static int ceph_setup_bdi(struct super_block *sb, struct ceph_fs_client *fsc)
{
int err;
+ err = super_setup_bdi_name(sb, "ceph-%ld",
+ atomic_long_inc_return(&bdi_seq));
+ if (err)
+ return err;
+
/* set ra_pages based on rasize mount option? */
if (fsc->mount_options->rasize >= PAGE_SIZE)
- fsc->backing_dev_info.ra_pages =
+ sb->s_bdi->ra_pages =
(fsc->mount_options->rasize + PAGE_SIZE - 1)
>> PAGE_SHIFT;
else
- fsc->backing_dev_info.ra_pages =
- VM_MAX_READAHEAD * 1024 / PAGE_SIZE;
+ sb->s_bdi->ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_SIZE;
if (fsc->mount_options->rsize > fsc->mount_options->rasize &&
fsc->mount_options->rsize >= PAGE_SIZE)
- fsc->backing_dev_info.io_pages =
+ sb->s_bdi->io_pages =
(fsc->mount_options->rsize + PAGE_SIZE - 1)
>> PAGE_SHIFT;
else if (fsc->mount_options->rsize == 0)
- fsc->backing_dev_info.io_pages = ULONG_MAX;
+ sb->s_bdi->io_pages = ULONG_MAX;
- err = bdi_register(&fsc->backing_dev_info, NULL, "ceph-%ld",
- atomic_long_inc_return(&bdi_seq));
- if (!err)
- sb->s_bdi = &fsc->backing_dev_info;
- return err;
+ return 0;
}
static struct dentry *ceph_mount(struct file_system_type *fs_type,
@@ -1018,7 +1009,7 @@ static struct dentry *ceph_mount(struct file_system_type *fs_type,
dout("get_sb got existing client %p\n", fsc);
} else {
dout("get_sb using new client %p\n", fsc);
- err = ceph_register_bdi(sb, fsc);
+ err = ceph_setup_bdi(sb, fsc);
if (err < 0) {
res = ERR_PTR(err);
goto out_splat;
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index fe6b9cfc4013..176186b12457 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -92,8 +92,6 @@ struct ceph_fs_client {
struct workqueue_struct *trunc_wq;
atomic_long_t writeback_count;
- struct backing_dev_info backing_dev_info;
-
#ifdef CONFIG_DEBUG_FS
struct dentry *debugfs_dentry_lru, *debugfs_caps;
struct dentry *debugfs_congestion_kb;