diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-12-13 10:19:16 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-12-13 10:19:16 -0800 |
commit | 36869cb93d36269f34800b3384ba7991060a69cf (patch) | |
tree | 1ff266dcb3386bb1403494aa89647a96fd2396cd /block/blk-wbt.h | |
parent | 9439b3710df688d853eb6cb4851256f2c92b1797 (diff) | |
parent | 7cd54aa8438947602cf68eda1db327822b9b8e6b (diff) | |
download | linux-36869cb93d36269f34800b3384ba7991060a69cf.tar.bz2 |
Merge branch 'for-4.10/block' of git://git.kernel.dk/linux-block
Pull block layer updates from Jens Axboe:
"This is the main block pull request this series. Contrary to previous
release, I've kept the core and driver changes in the same branch. We
always ended up having dependencies between the two for obvious
reasons, so makes more sense to keep them together. That said, I'll
probably try and keep more topical branches going forward, especially
for cycles that end up being as busy as this one.
The major parts of this pull request is:
- Improved support for O_DIRECT on block devices, with a small
private implementation instead of using the pig that is
fs/direct-io.c. From Christoph.
- Request completion tracking in a scalable fashion. This is utilized
by two components in this pull, the new hybrid polling and the
writeback queue throttling code.
- Improved support for polling with O_DIRECT, adding a hybrid mode
that combines pure polling with an initial sleep. From me.
- Support for automatic throttling of writeback queues on the block
side. This uses feedback from the device completion latencies to
scale the queue on the block side up or down. From me.
- Support from SMR drives in the block layer and for SD. From Hannes
and Shaun.
- Multi-connection support for nbd. From Josef.
- Cleanup of request and bio flags, so we have a clear split between
which are bio (or rq) private, and which ones are shared. From
Christoph.
- A set of patches from Bart, that improve how we handle queue
stopping and starting in blk-mq.
- Support for WRITE_ZEROES from Chaitanya.
- Lightnvm updates from Javier/Matias.
- Supoort for FC for the nvme-over-fabrics code. From James Smart.
- A bunch of fixes from a whole slew of people, too many to name
here"
* 'for-4.10/block' of git://git.kernel.dk/linux-block: (182 commits)
blk-stat: fix a few cases of missing batch flushing
blk-flush: run the queue when inserting blk-mq flush
elevator: make the rqhash helpers exported
blk-mq: abstract out blk_mq_dispatch_rq_list() helper
blk-mq: add blk_mq_start_stopped_hw_queue()
block: improve handling of the magic discard payload
blk-wbt: don't throttle discard or write zeroes
nbd: use dev_err_ratelimited in io path
nbd: reset the setup task for NBD_CLEAR_SOCK
nvme-fabrics: Add FC LLDD loopback driver to test FC-NVME
nvme-fabrics: Add target support for FC transport
nvme-fabrics: Add host support for FC transport
nvme-fabrics: Add FC transport LLDD api definitions
nvme-fabrics: Add FC transport FC-NVME definitions
nvme-fabrics: Add FC transport error codes to nvme.h
Add type 0x28 NVME type code to scsi fc headers
nvme-fabrics: patch target code in prep for FC transport support
nvme-fabrics: set sqe.command_id in core not transports
parser: add u64 number parser
nvme-rdma: align to generic ib_event logging helper
...
Diffstat (limited to 'block/blk-wbt.h')
-rw-r--r-- | block/blk-wbt.h | 171 |
1 files changed, 171 insertions, 0 deletions
diff --git a/block/blk-wbt.h b/block/blk-wbt.h new file mode 100644 index 000000000000..65f1de519f67 --- /dev/null +++ b/block/blk-wbt.h @@ -0,0 +1,171 @@ +#ifndef WB_THROTTLE_H +#define WB_THROTTLE_H + +#include <linux/kernel.h> +#include <linux/atomic.h> +#include <linux/wait.h> +#include <linux/timer.h> +#include <linux/ktime.h> + +#include "blk-stat.h" + +enum wbt_flags { + WBT_TRACKED = 1, /* write, tracked for throttling */ + WBT_READ = 2, /* read */ + WBT_KSWAPD = 4, /* write, from kswapd */ + + WBT_NR_BITS = 3, /* number of bits */ +}; + +enum { + WBT_NUM_RWQ = 2, +}; + +/* + * Enable states. Either off, or on by default (done at init time), + * or on through manual setup in sysfs. + */ +enum { + WBT_STATE_ON_DEFAULT = 1, + WBT_STATE_ON_MANUAL = 2, +}; + +static inline void wbt_clear_state(struct blk_issue_stat *stat) +{ + stat->time &= BLK_STAT_TIME_MASK; +} + +static inline enum wbt_flags wbt_stat_to_mask(struct blk_issue_stat *stat) +{ + return (stat->time & BLK_STAT_MASK) >> BLK_STAT_SHIFT; +} + +static inline void wbt_track(struct blk_issue_stat *stat, enum wbt_flags wb_acct) +{ + stat->time |= ((u64) wb_acct) << BLK_STAT_SHIFT; +} + +static inline bool wbt_is_tracked(struct blk_issue_stat *stat) +{ + return (stat->time >> BLK_STAT_SHIFT) & WBT_TRACKED; +} + +static inline bool wbt_is_read(struct blk_issue_stat *stat) +{ + return (stat->time >> BLK_STAT_SHIFT) & WBT_READ; +} + +struct rq_wait { + wait_queue_head_t wait; + atomic_t inflight; +}; + +struct rq_wb { + /* + * Settings that govern how we throttle + */ + unsigned int wb_background; /* background writeback */ + unsigned int wb_normal; /* normal writeback */ + unsigned int wb_max; /* max throughput writeback */ + int scale_step; + bool scaled_max; + + short enable_state; /* WBT_STATE_* */ + + /* + * Number of consecutive periods where we don't have enough + * information to make a firm scale up/down decision. + */ + unsigned int unknown_cnt; + + u64 win_nsec; /* default window size */ + u64 cur_win_nsec; /* current window size */ + + struct timer_list window_timer; + + s64 sync_issue; + void *sync_cookie; + + unsigned int wc; + unsigned int queue_depth; + + unsigned long last_issue; /* last non-throttled issue */ + unsigned long last_comp; /* last non-throttled comp */ + unsigned long min_lat_nsec; + struct request_queue *queue; + struct rq_wait rq_wait[WBT_NUM_RWQ]; +}; + +static inline unsigned int wbt_inflight(struct rq_wb *rwb) +{ + unsigned int i, ret = 0; + + for (i = 0; i < WBT_NUM_RWQ; i++) + ret += atomic_read(&rwb->rq_wait[i].inflight); + + return ret; +} + +#ifdef CONFIG_BLK_WBT + +void __wbt_done(struct rq_wb *, enum wbt_flags); +void wbt_done(struct rq_wb *, struct blk_issue_stat *); +enum wbt_flags wbt_wait(struct rq_wb *, struct bio *, spinlock_t *); +int wbt_init(struct request_queue *); +void wbt_exit(struct request_queue *); +void wbt_update_limits(struct rq_wb *); +void wbt_requeue(struct rq_wb *, struct blk_issue_stat *); +void wbt_issue(struct rq_wb *, struct blk_issue_stat *); +void wbt_disable_default(struct request_queue *); + +void wbt_set_queue_depth(struct rq_wb *, unsigned int); +void wbt_set_write_cache(struct rq_wb *, bool); + +u64 wbt_default_latency_nsec(struct request_queue *); + +#else + +static inline void __wbt_done(struct rq_wb *rwb, enum wbt_flags flags) +{ +} +static inline void wbt_done(struct rq_wb *rwb, struct blk_issue_stat *stat) +{ +} +static inline enum wbt_flags wbt_wait(struct rq_wb *rwb, struct bio *bio, + spinlock_t *lock) +{ + return 0; +} +static inline int wbt_init(struct request_queue *q) +{ + return -EINVAL; +} +static inline void wbt_exit(struct request_queue *q) +{ +} +static inline void wbt_update_limits(struct rq_wb *rwb) +{ +} +static inline void wbt_requeue(struct rq_wb *rwb, struct blk_issue_stat *stat) +{ +} +static inline void wbt_issue(struct rq_wb *rwb, struct blk_issue_stat *stat) +{ +} +static inline void wbt_disable_default(struct request_queue *q) +{ +} +static inline void wbt_set_queue_depth(struct rq_wb *rwb, unsigned int depth) +{ +} +static inline void wbt_set_write_cache(struct rq_wb *rwb, bool wc) +{ +} +static inline u64 wbt_default_latency_nsec(struct request_queue *q) +{ + return 0; +} + +#endif /* CONFIG_BLK_WBT */ + +#endif |