summaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-11-14 15:32:19 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2017-11-14 15:32:19 -0800
commite2c5923c349c1738fe8fda980874d93f6fb2e5b6 (patch)
treeb97a90170c45211bcc437761653aa8016c34afcd /include
parentabc36be236358162202e86ad88616ff95a755101 (diff)
parenta04b5de5050ab8b891128eb2c47a0916fe8622e1 (diff)
downloadlinux-e2c5923c349c1738fe8fda980874d93f6fb2e5b6.tar.bz2
Merge branch 'for-4.15/block' of git://git.kernel.dk/linux-block
Pull core block layer updates from Jens Axboe: "This is the main pull request for block storage for 4.15-rc1. Nothing out of the ordinary in here, and no API changes or anything like that. Just various new features for drivers, core changes, etc. In particular, this pull request contains: - A patch series from Bart, closing the whole on blk/scsi-mq queue quescing. - A series from Christoph, building towards hidden gendisks (for multipath) and ability to move bio chains around. - NVMe - Support for native multipath for NVMe (Christoph). - Userspace notifications for AENs (Keith). - Command side-effects support (Keith). - SGL support (Chaitanya Kulkarni) - FC fixes and improvements (James Smart) - Lots of fixes and tweaks (Various) - bcache - New maintainer (Michael Lyle) - Writeback control improvements (Michael) - Various fixes (Coly, Elena, Eric, Liang, et al) - lightnvm updates, mostly centered around the pblk interface (Javier, Hans, and Rakesh). - Removal of unused bio/bvec kmap atomic interfaces (me, Christoph) - Writeback series that fix the much discussed hundreds of millions of sync-all units. This goes all the way, as discussed previously (me). - Fix for missing wakeup on writeback timer adjustments (Yafang Shao). - Fix laptop mode on blk-mq (me). - {mq,name} tupple lookup for IO schedulers, allowing us to have alias names. This means you can use 'deadline' on both !mq and on mq (where it's called mq-deadline). (me). - blktrace race fix, oopsing on sg load (me). - blk-mq optimizations (me). - Obscure waitqueue race fix for kyber (Omar). - NBD fixes (Josef). - Disable writeback throttling by default on bfq, like we do on cfq (Luca Miccio). - Series from Ming that enable us to treat flush requests on blk-mq like any other request. This is a really nice cleanup. - Series from Ming that improves merging on blk-mq with schedulers, getting us closer to flipping the switch on scsi-mq again. - BFQ updates (Paolo). - blk-mq atomic flags memory ordering fixes (Peter Z). - Loop cgroup support (Shaohua). - Lots of minor fixes from lots of different folks, both for core and driver code" * 'for-4.15/block' of git://git.kernel.dk/linux-block: (294 commits) nvme: fix visibility of "uuid" ns attribute blk-mq: fixup some comment typos and lengths ide: ide-atapi: fix compile error with defining macro DEBUG blk-mq: improve tag waiting setup for non-shared tags brd: remove unused brd_mutex blk-mq: only run the hardware queue if IO is pending block: avoid null pointer dereference on null disk fs: guard_bio_eod() needs to consider partitions xtensa/simdisk: fix compile error nvme: expose subsys attribute to sysfs nvme: create 'slaves' and 'holders' entries for hidden controllers block: create 'slaves' and 'holders' entries for hidden gendisks nvme: also expose the namespace identification sysfs files for mpath nodes nvme: implement multipath access to nvme subsystems nvme: track shared namespaces nvme: introduce a nvme_ns_ids structure nvme: track subsystems block, nvme: Introduce blk_mq_req_flags_t block, scsi: Make SCSI quiesce and resume work reliably block: Add the QUEUE_FLAG_PREEMPT_ONLY request queue flag ...
Diffstat (limited to 'include')
-rw-r--r--include/linux/backing-dev-defs.h24
-rw-r--r--include/linux/backing-dev.h4
-rw-r--r--include/linux/bio.h25
-rw-r--r--include/linux/blk-cgroup.h25
-rw-r--r--include/linux/blk-mq.h40
-rw-r--r--include/linux/blk_types.h16
-rw-r--r--include/linux/blkdev.h36
-rw-r--r--include/linux/buffer_head.h2
-rw-r--r--include/linux/elevator.h1
-rw-r--r--include/linux/genhd.h4
-rw-r--r--include/linux/kthread.h11
-rw-r--r--include/linux/lightnvm.h11
-rw-r--r--include/linux/nvme-fc-driver.h15
-rw-r--r--include/linux/nvme.h30
-rw-r--r--include/linux/sbitmap.h64
-rw-r--r--include/linux/writeback.h30
-rw-r--r--include/scsi/scsi_device.h1
-rw-r--r--include/trace/events/writeback.h1
18 files changed, 201 insertions, 139 deletions
diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
index fff4cfa0c21d..bfe86b54f6c1 100644
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -25,6 +25,7 @@ enum wb_state {
WB_shutting_down, /* wb_shutdown() in progress */
WB_writeback_running, /* Writeback is in progress */
WB_has_dirty_io, /* Dirty inodes on ->b_{dirty|io|more_io} */
+ WB_start_all, /* nr_pages == 0 (all) work pending */
};
enum wb_congested_state {
@@ -45,6 +46,28 @@ enum wb_stat_item {
#define WB_STAT_BATCH (8*(1+ilog2(nr_cpu_ids)))
/*
+ * why some writeback work was initiated
+ */
+enum wb_reason {
+ WB_REASON_BACKGROUND,
+ WB_REASON_VMSCAN,
+ WB_REASON_SYNC,
+ WB_REASON_PERIODIC,
+ WB_REASON_LAPTOP_TIMER,
+ WB_REASON_FREE_MORE_MEM,
+ WB_REASON_FS_FREE_SPACE,
+ /*
+ * There is no bdi forker thread any more and works are done
+ * by emergency worker, however, this is TPs userland visible
+ * and we'll be exposing exactly the same information,
+ * so it has a mismatch name.
+ */
+ WB_REASON_FORKER_THREAD,
+
+ WB_REASON_MAX,
+};
+
+/*
* For cgroup writeback, multiple wb's may map to the same blkcg. Those
* wb's can operate mostly independently but should share the congested
* state. To facilitate such sharing, the congested state is tracked using
@@ -116,6 +139,7 @@ struct bdi_writeback {
struct fprop_local_percpu completions;
int dirty_exceeded;
+ enum wb_reason start_all_reason;
spinlock_t work_lock; /* protects work_list & dwork scheduling */
struct list_head work_list;
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 16621579a3db..f41ca8486e02 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -39,8 +39,6 @@ static inline struct backing_dev_info *bdi_alloc(gfp_t gfp_mask)
return bdi_alloc_node(gfp_mask, NUMA_NO_NODE);
}
-void wb_start_writeback(struct bdi_writeback *wb, long nr_pages,
- bool range_cyclic, enum wb_reason reason);
void wb_start_background_writeback(struct bdi_writeback *wb);
void wb_workfn(struct work_struct *work);
void wb_wakeup_delayed(struct bdi_writeback *wb);
@@ -175,8 +173,6 @@ static inline int wb_congested(struct bdi_writeback *wb, int cong_bits)
long congestion_wait(int sync, long timeout);
long wait_iff_congested(struct pglist_data *pgdat, int sync, long timeout);
-int pdflush_proc_obsolete(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos);
static inline bool bdi_cap_stable_pages_required(struct backing_dev_info *bdi)
{
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 275c91c99516..d4eec19a6d3c 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -129,18 +129,6 @@ static inline void *bio_data(struct bio *bio)
#define bvec_to_phys(bv) (page_to_phys((bv)->bv_page) + (unsigned long) (bv)->bv_offset)
/*
- * queues that have highmem support enabled may still need to revert to
- * PIO transfers occasionally and thus map high pages temporarily. For
- * permanent PIO fall back, user is probably better off disabling highmem
- * I/O completely on that queue (see ide-dma for example)
- */
-#define __bio_kmap_atomic(bio, iter) \
- (kmap_atomic(bio_iter_iovec((bio), (iter)).bv_page) + \
- bio_iter_iovec((bio), (iter)).bv_offset)
-
-#define __bio_kunmap_atomic(addr) kunmap_atomic(addr)
-
-/*
* merge helpers etc
*/
@@ -522,13 +510,11 @@ do { \
#ifdef CONFIG_BLK_CGROUP
int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css);
-int bio_associate_current(struct bio *bio);
void bio_disassociate_task(struct bio *bio);
void bio_clone_blkcg_association(struct bio *dst, struct bio *src);
#else /* CONFIG_BLK_CGROUP */
static inline int bio_associate_blkcg(struct bio *bio,
struct cgroup_subsys_state *blkcg_css) { return 0; }
-static inline int bio_associate_current(struct bio *bio) { return -ENOENT; }
static inline void bio_disassociate_task(struct bio *bio) { }
static inline void bio_clone_blkcg_association(struct bio *dst,
struct bio *src) { }
@@ -575,17 +561,6 @@ static inline void bvec_kunmap_irq(char *buffer, unsigned long *flags)
}
#endif
-static inline char *__bio_kmap_irq(struct bio *bio, struct bvec_iter iter,
- unsigned long *flags)
-{
- return bvec_kmap_irq(&bio_iter_iovec(bio, iter), flags);
-}
-#define __bio_kunmap_irq(buf, flags) bvec_kunmap_irq(buf, flags)
-
-#define bio_kmap_irq(bio, flags) \
- __bio_kmap_irq((bio), (bio)->bi_iter, (flags))
-#define bio_kunmap_irq(buf,flags) __bio_kunmap_irq(buf, flags)
-
/*
* BIO list management for use by remapping drivers (e.g. DM or MD) and loop.
*
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index 8bbc3716507a..e9825ff57b15 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -20,6 +20,7 @@
#include <linux/radix-tree.h>
#include <linux/blkdev.h>
#include <linux/atomic.h>
+#include <linux/kthread.h>
/* percpu_counter batch for blkg_[rw]stats, per-cpu drift doesn't matter */
#define BLKG_STAT_CPU_BATCH (INT_MAX / 2)
@@ -224,22 +225,16 @@ static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css)
return css ? container_of(css, struct blkcg, css) : NULL;
}
-static inline struct blkcg *task_blkcg(struct task_struct *tsk)
-{
- return css_to_blkcg(task_css(tsk, io_cgrp_id));
-}
-
static inline struct blkcg *bio_blkcg(struct bio *bio)
{
+ struct cgroup_subsys_state *css;
+
if (bio && bio->bi_css)
return css_to_blkcg(bio->bi_css);
- return task_blkcg(current);
-}
-
-static inline struct cgroup_subsys_state *
-task_get_blkcg_css(struct task_struct *task)
-{
- return task_get_css(task, io_cgrp_id);
+ css = kthread_blkcg();
+ if (css)
+ return css_to_blkcg(css);
+ return css_to_blkcg(task_css(current, io_cgrp_id));
}
/**
@@ -736,12 +731,6 @@ struct blkcg_policy {
#define blkcg_root_css ((struct cgroup_subsys_state *)ERR_PTR(-EINVAL))
-static inline struct cgroup_subsys_state *
-task_get_blkcg_css(struct task_struct *task)
-{
- return NULL;
-}
-
#ifdef CONFIG_BLOCK
static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; }
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 994cbb0f7ffc..95c9a5c862e2 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -31,10 +31,12 @@ struct blk_mq_hw_ctx {
struct sbitmap ctx_map;
+ struct blk_mq_ctx *dispatch_from;
+
struct blk_mq_ctx **ctxs;
unsigned int nr_ctx;
- wait_queue_entry_t dispatch_wait;
+ wait_queue_entry_t dispatch_wait;
atomic_t wait_index;
struct blk_mq_tags *tags;
@@ -91,6 +93,8 @@ struct blk_mq_queue_data {
typedef blk_status_t (queue_rq_fn)(struct blk_mq_hw_ctx *,
const struct blk_mq_queue_data *);
+typedef bool (get_budget_fn)(struct blk_mq_hw_ctx *);
+typedef void (put_budget_fn)(struct blk_mq_hw_ctx *);
typedef enum blk_eh_timer_return (timeout_fn)(struct request *, bool);
typedef int (init_hctx_fn)(struct blk_mq_hw_ctx *, void *, unsigned int);
typedef void (exit_hctx_fn)(struct blk_mq_hw_ctx *, unsigned int);
@@ -113,6 +117,15 @@ struct blk_mq_ops {
queue_rq_fn *queue_rq;
/*
+ * Reserve budget before queue request, once .queue_rq is
+ * run, it is driver's responsibility to release the
+ * reserved budget. Also we have to handle failure case
+ * of .get_budget for avoiding I/O deadlock.
+ */
+ get_budget_fn *get_budget;
+ put_budget_fn *put_budget;
+
+ /*
* Called on request timeout
*/
timeout_fn *timeout;
@@ -169,8 +182,7 @@ enum {
BLK_MQ_S_STOPPED = 0,
BLK_MQ_S_TAG_ACTIVE = 1,
BLK_MQ_S_SCHED_RESTART = 2,
- BLK_MQ_S_TAG_WAITING = 3,
- BLK_MQ_S_START_ON_RUN = 4,
+ BLK_MQ_S_START_ON_RUN = 3,
BLK_MQ_MAX_DEPTH = 10240,
@@ -198,15 +210,21 @@ void blk_mq_free_request(struct request *rq);
bool blk_mq_can_queue(struct blk_mq_hw_ctx *);
enum {
- BLK_MQ_REQ_NOWAIT = (1 << 0), /* return when out of requests */
- BLK_MQ_REQ_RESERVED = (1 << 1), /* allocate from reserved pool */
- BLK_MQ_REQ_INTERNAL = (1 << 2), /* allocate internal/sched tag */
+ /* return when out of requests */
+ BLK_MQ_REQ_NOWAIT = (__force blk_mq_req_flags_t)(1 << 0),
+ /* allocate from reserved pool */
+ BLK_MQ_REQ_RESERVED = (__force blk_mq_req_flags_t)(1 << 1),
+ /* allocate internal/sched tag */
+ BLK_MQ_REQ_INTERNAL = (__force blk_mq_req_flags_t)(1 << 2),
+ /* set RQF_PREEMPT */
+ BLK_MQ_REQ_PREEMPT = (__force blk_mq_req_flags_t)(1 << 3),
};
struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op,
- unsigned int flags);
+ blk_mq_req_flags_t flags);
struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
- unsigned int op, unsigned int flags, unsigned int hctx_idx);
+ unsigned int op, blk_mq_req_flags_t flags,
+ unsigned int hctx_idx);
struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag);
enum {
@@ -249,7 +267,7 @@ void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async);
void blk_mq_quiesce_queue(struct request_queue *q);
void blk_mq_unquiesce_queue(struct request_queue *q);
void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs);
-void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
+bool blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
void blk_mq_run_hw_queues(struct request_queue *q, bool async);
void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs);
void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset,
@@ -260,8 +278,8 @@ void blk_freeze_queue_start(struct request_queue *q);
void blk_mq_freeze_queue_wait(struct request_queue *q);
int blk_mq_freeze_queue_wait_timeout(struct request_queue *q,
unsigned long timeout);
-int blk_mq_reinit_tagset(struct blk_mq_tag_set *set,
- int (reinit_request)(void *, struct request *));
+int blk_mq_tagset_iter(struct blk_mq_tag_set *set, void *data,
+ int (reinit_request)(void *, struct request *));
int blk_mq_map_queues(struct blk_mq_tag_set *set);
void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues);
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 96ac3815542c..a1e628e032da 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -163,6 +163,8 @@ struct bio {
*/
#define BIO_RESET_BITS BVEC_POOL_OFFSET
+typedef __u32 __bitwise blk_mq_req_flags_t;
+
/*
* Operations and flags common to the bio and request structures.
* We use 8 bits for encoding the operation, and the remaining 24 for flags.
@@ -225,11 +227,14 @@ enum req_flag_bits {
__REQ_PREFLUSH, /* request for cache flush */
__REQ_RAHEAD, /* read ahead, can fail anytime */
__REQ_BACKGROUND, /* background IO */
+ __REQ_NOWAIT, /* Don't wait if request will block */
/* command specific flags for REQ_OP_WRITE_ZEROES: */
__REQ_NOUNMAP, /* do not free blocks when zeroing */
- __REQ_NOWAIT, /* Don't wait if request will block */
+ /* for driver use */
+ __REQ_DRV,
+
__REQ_NR_BITS, /* stops here */
};
@@ -246,9 +251,11 @@ enum req_flag_bits {
#define REQ_PREFLUSH (1ULL << __REQ_PREFLUSH)
#define REQ_RAHEAD (1ULL << __REQ_RAHEAD)
#define REQ_BACKGROUND (1ULL << __REQ_BACKGROUND)
+#define REQ_NOWAIT (1ULL << __REQ_NOWAIT)
#define REQ_NOUNMAP (1ULL << __REQ_NOUNMAP)
-#define REQ_NOWAIT (1ULL << __REQ_NOWAIT)
+
+#define REQ_DRV (1ULL << __REQ_DRV)
#define REQ_FAILFAST_MASK \
(REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER)
@@ -330,11 +337,10 @@ static inline bool blk_qc_t_is_internal(blk_qc_t cookie)
}
struct blk_rq_stat {
- s64 mean;
+ u64 mean;
u64 min;
u64 max;
- s32 nr_samples;
- s32 nr_batch;
+ u32 nr_samples;
u64 batch;
};
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 8da66379f7ea..8089ca17db9a 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -267,6 +267,7 @@ struct blk_queue_ctx;
typedef void (request_fn_proc) (struct request_queue *q);
typedef blk_qc_t (make_request_fn) (struct request_queue *q, struct bio *bio);
+typedef bool (poll_q_fn) (struct request_queue *q, blk_qc_t);
typedef int (prep_rq_fn) (struct request_queue *, struct request *);
typedef void (unprep_rq_fn) (struct request_queue *, struct request *);
@@ -409,6 +410,7 @@ struct request_queue {
request_fn_proc *request_fn;
make_request_fn *make_request_fn;
+ poll_q_fn *poll_fn;
prep_rq_fn *prep_rq_fn;
unprep_rq_fn *unprep_rq_fn;
softirq_done_fn *softirq_done_fn;
@@ -610,7 +612,6 @@ struct request_queue {
#define QUEUE_FLAG_NOMERGES 5 /* disable merge attempts */
#define QUEUE_FLAG_SAME_COMP 6 /* complete on same CPU-group */
#define QUEUE_FLAG_FAIL_IO 7 /* fake timeout */
-#define QUEUE_FLAG_STACKABLE 8 /* supports request stacking */
#define QUEUE_FLAG_NONROT 9 /* non-rotational device (SSD) */
#define QUEUE_FLAG_VIRT QUEUE_FLAG_NONROT /* paravirt device */
#define QUEUE_FLAG_IO_STAT 10 /* do IO stats */
@@ -632,14 +633,13 @@ struct request_queue {
#define QUEUE_FLAG_REGISTERED 26 /* queue has been registered to a disk */
#define QUEUE_FLAG_SCSI_PASSTHROUGH 27 /* queue supports SCSI commands */
#define QUEUE_FLAG_QUIESCED 28 /* queue has been quiesced */
+#define QUEUE_FLAG_PREEMPT_ONLY 29 /* only process REQ_PREEMPT requests */
#define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \
- (1 << QUEUE_FLAG_STACKABLE) | \
(1 << QUEUE_FLAG_SAME_COMP) | \
(1 << QUEUE_FLAG_ADD_RANDOM))
#define QUEUE_FLAG_MQ_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \
- (1 << QUEUE_FLAG_STACKABLE) | \
(1 << QUEUE_FLAG_SAME_COMP) | \
(1 << QUEUE_FLAG_POLL))
@@ -723,8 +723,6 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
#define blk_queue_nonrot(q) test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags)
#define blk_queue_io_stat(q) test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags)
#define blk_queue_add_random(q) test_bit(QUEUE_FLAG_ADD_RANDOM, &(q)->queue_flags)
-#define blk_queue_stackable(q) \
- test_bit(QUEUE_FLAG_STACKABLE, &(q)->queue_flags)
#define blk_queue_discard(q) test_bit(QUEUE_FLAG_DISCARD, &(q)->queue_flags)
#define blk_queue_secure_erase(q) \
(test_bit(QUEUE_FLAG_SECERASE, &(q)->queue_flags))
@@ -736,6 +734,11 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
((rq)->cmd_flags & (REQ_FAILFAST_DEV|REQ_FAILFAST_TRANSPORT| \
REQ_FAILFAST_DRIVER))
#define blk_queue_quiesced(q) test_bit(QUEUE_FLAG_QUIESCED, &(q)->queue_flags)
+#define blk_queue_preempt_only(q) \
+ test_bit(QUEUE_FLAG_PREEMPT_ONLY, &(q)->queue_flags)
+
+extern int blk_set_preempt_only(struct request_queue *q);
+extern void blk_clear_preempt_only(struct request_queue *q);
static inline bool blk_account_rq(struct request *rq)
{
@@ -923,24 +926,17 @@ static inline void rq_flush_dcache_pages(struct request *rq)
}
#endif
-#ifdef CONFIG_PRINTK
-#define vfs_msg(sb, level, fmt, ...) \
- __vfs_msg(sb, level, fmt, ##__VA_ARGS__)
-#else
-#define vfs_msg(sb, level, fmt, ...) \
-do { \
- no_printk(fmt, ##__VA_ARGS__); \
- __vfs_msg(sb, "", " "); \
-} while (0)
-#endif
-
extern int blk_register_queue(struct gendisk *disk);
extern void blk_unregister_queue(struct gendisk *disk);
extern blk_qc_t generic_make_request(struct bio *bio);
+extern blk_qc_t direct_make_request(struct bio *bio);
extern void blk_rq_init(struct request_queue *q, struct request *rq);
extern void blk_init_request_from_bio(struct request *req, struct bio *bio);
extern void blk_put_request(struct request *);
extern void __blk_put_request(struct request_queue *, struct request *);
+extern struct request *blk_get_request_flags(struct request_queue *,
+ unsigned int op,
+ blk_mq_req_flags_t flags);
extern struct request *blk_get_request(struct request_queue *, unsigned int op,
gfp_t gfp_mask);
extern void blk_requeue_request(struct request_queue *, struct request *);
@@ -964,7 +960,7 @@ extern int scsi_cmd_ioctl(struct request_queue *, struct gendisk *, fmode_t,
extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t,
struct scsi_ioctl_command __user *);
-extern int blk_queue_enter(struct request_queue *q, bool nowait);
+extern int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags);
extern void blk_queue_exit(struct request_queue *q);
extern void blk_start_queue(struct request_queue *q);
extern void blk_start_queue_async(struct request_queue *q);
@@ -991,7 +987,7 @@ extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *,
int blk_status_to_errno(blk_status_t status);
blk_status_t errno_to_blk_status(int errno);
-bool blk_mq_poll(struct request_queue *q, blk_qc_t cookie);
+bool blk_poll(struct request_queue *q, blk_qc_t cookie);
static inline struct request_queue *bdev_get_queue(struct block_device *bdev)
{
@@ -1110,6 +1106,8 @@ extern struct request *blk_peek_request(struct request_queue *q);
extern void blk_start_request(struct request *rq);
extern struct request *blk_fetch_request(struct request_queue *q);
+void blk_steal_bios(struct bio_list *list, struct request *rq);
+
/*
* Request completion related functions.
*
@@ -1372,7 +1370,7 @@ static inline int sb_issue_zeroout(struct super_block *sb, sector_t block,
gfp_mask, 0);
}
-extern int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm);
+extern int blk_verify_command(unsigned char *cmd, fmode_t mode);
enum blk_default_limits {
BLK_MAX_SEGMENTS = 128,
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index afa37f807f12..8b1bf8d3d4a2 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -157,7 +157,7 @@ void set_bh_page(struct buffer_head *bh,
struct page *page, unsigned long offset);
int try_to_free_buffers(struct page *);
struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
- int retry);
+ bool retry);
void create_empty_buffers(struct page *, unsigned long,
unsigned long b_state);
void end_buffer_read_sync(struct buffer_head *bh, int uptodate);
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index ddb7632d73b9..3d794b3dc532 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -145,6 +145,7 @@ struct elevator_type
size_t icq_align; /* ditto */
struct elv_fs_entry *elevator_attrs;
char elevator_name[ELV_NAME_MAX];
+ const char *elevator_alias;
struct module *elevator_owner;
bool uses_mq;
#ifdef CONFIG_BLK_DEBUG_FS
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index eaefb7a62f83..5144ebe046c9 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -141,6 +141,7 @@ struct hd_struct {
#define GENHD_FL_NATIVE_CAPACITY 128
#define GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE 256
#define GENHD_FL_NO_PART_SCAN 512
+#define GENHD_FL_HIDDEN 1024
enum {
DISK_EVENT_MEDIA_CHANGE = 1 << 0, /* media changed */
@@ -236,7 +237,7 @@ static inline bool disk_part_scan_enabled(struct gendisk *disk)
static inline dev_t disk_devt(struct gendisk *disk)
{
- return disk_to_dev(disk)->devt;
+ return MKDEV(disk->major, disk->first_minor);
}
static inline dev_t part_devt(struct hd_struct *part)
@@ -244,6 +245,7 @@ static inline dev_t part_devt(struct hd_struct *part)
return part_to_dev(part)->devt;
}
+extern struct hd_struct *__disk_get_part(struct gendisk *disk, int partno);
extern struct hd_struct *disk_get_part(struct gendisk *disk, int partno);
static inline void disk_put_part(struct hd_struct *part)
diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index 86d53a3cb497..3203e36b2ee8 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -4,6 +4,7 @@
/* Simple interface for creating and stopping kernel threads without mess. */
#include <linux/err.h>
#include <linux/sched.h>
+#include <linux/cgroup.h>
__printf(4, 5)
struct task_struct *kthread_create_on_node(int (*threadfn)(void *data),
@@ -199,4 +200,14 @@ bool kthread_cancel_delayed_work_sync(struct kthread_delayed_work *work);
void kthread_destroy_worker(struct kthread_worker *worker);
+#ifdef CONFIG_BLK_CGROUP
+void kthread_associate_blkcg(struct cgroup_subsys_state *css);
+struct cgroup_subsys_state *kthread_blkcg(void);
+#else
+static inline void kthread_associate_blkcg(struct cgroup_subsys_state *css) { }
+static inline struct cgroup_subsys_state *kthread_blkcg(void)
+{
+ return NULL;
+}
+#endif
#endif /* _LINUX_KTHREAD_H */
diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index a29a8db5cc2f..2d1d9de06728 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -57,6 +57,7 @@ typedef int (nvm_get_l2p_tbl_fn)(struct nvm_dev *, u64, u32,
typedef int (nvm_op_bb_tbl_fn)(struct nvm_dev *, struct ppa_addr, u8 *);
typedef int (nvm_op_set_bb_fn)(struct nvm_dev *, struct ppa_addr *, int, int);
typedef int (nvm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *);
+typedef int (nvm_submit_io_sync_fn)(struct nvm_dev *, struct nvm_rq *);
typedef void *(nvm_create_dma_pool_fn)(struct nvm_dev *, char *);
typedef void (nvm_destroy_dma_pool_fn)(void *);
typedef void *(nvm_dev_dma_alloc_fn)(struct nvm_dev *, void *, gfp_t,
@@ -70,6 +71,7 @@ struct nvm_dev_ops {
nvm_op_set_bb_fn *set_bb_tbl;
nvm_submit_io_fn *submit_io;
+ nvm_submit_io_sync_fn *submit_io_sync;
nvm_create_dma_pool_fn *create_dma_pool;
nvm_destroy_dma_pool_fn *destroy_dma_pool;
@@ -461,10 +463,9 @@ struct nvm_tgt_type {
/* For internal use */
struct list_head list;
+ struct module *owner;
};
-extern struct nvm_tgt_type *nvm_find_target_type(const char *, int);
-
extern int nvm_register_tgt_type(struct nvm_tgt_type *);
extern void nvm_unregister_tgt_type(struct nvm_tgt_type *);
@@ -479,10 +480,8 @@ extern int nvm_set_tgt_bb_tbl(struct nvm_tgt_dev *, struct ppa_addr *,
int, int);
extern int nvm_max_phys_sects(struct nvm_tgt_dev *);
extern int nvm_submit_io(struct nvm_tgt_dev *, struct nvm_rq *);
+extern int nvm_submit_io_sync(struct nvm_tgt_dev *, struct nvm_rq *);
extern int nvm_erase_sync(struct nvm_tgt_dev *, struct ppa_addr *, int);
-extern int nvm_set_rqd_ppalist(struct nvm_tgt_dev *, struct nvm_rq *,
- const struct ppa_addr *, int, int);
-extern void nvm_free_rqd_ppalist(struct nvm_tgt_dev *, struct nvm_rq *);
extern int nvm_get_l2p_tbl(struct nvm_tgt_dev *, u64, u32, nvm_l2p_update_fn *,
void *);
extern int nvm_get_area(struct nvm_tgt_dev *, sector_t *, sector_t);
@@ -491,8 +490,6 @@ extern void nvm_end_io(struct nvm_rq *);
extern int nvm_bb_tbl_fold(struct nvm_dev *, u8 *, int);
extern int nvm_get_tgt_bb_tbl(struct nvm_tgt_dev *, struct ppa_addr, u8 *);
-extern int nvm_dev_factory(struct nvm_dev *, int flags);
-
extern void nvm_part_to_tgt(struct nvm_dev *, sector_t *, int);
#else /* CONFIG_NVM */
diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h
index a726f96010d5..496ff759f84c 100644
--- a/include/linux/nvme-fc-driver.h
+++ b/include/linux/nvme-fc-driver.h
@@ -40,6 +40,8 @@
* @node_name: FC WWNN for the port
* @port_name: FC WWPN for the port
* @port_role: What NVME roles are supported (see FC_PORT_ROLE_xxx)
+ * @dev_loss_tmo: maximum delay for reconnects to an association on
+ * this device. Used only on a remoteport.
*
* Initialization values for dynamic port fields:
* @port_id: FC N_Port_ID currently assigned the port. Upper 8 bits must
@@ -50,6 +52,7 @@ struct nvme_fc_port_info {
u64 port_name;
u32 port_role;
u32 port_id;
+ u32 dev_loss_tmo;
};
@@ -102,8 +105,6 @@ enum nvmefc_fcp_datadir {
};
-#define NVME_FC_MAX_SEGMENTS 256
-
/**
* struct nvmefc_fcp_req - Request structure passed from NVME-FC transport
* to LLDD in order to perform a NVME FCP IO operation.
@@ -202,6 +203,9 @@ enum nvme_fc_obj_state {
* The length of the buffer corresponds to the local_priv_sz
* value specified in the nvme_fc_port_template supplied by
* the LLDD.
+ * @dev_loss_tmo: maximum delay for reconnects to an association on
+ * this device. To modify, lldd must call
+ * nvme_fc_set_remoteport_devloss().
*
* Fields with dynamic values. Values may change base on link state. LLDD
* may reference fields directly to change them. Initialized by the
@@ -259,10 +263,9 @@ struct nvme_fc_remote_port {
u32 port_role;
u64 node_name;
u64 port_name;
-
struct nvme_fc_local_port *localport;
-
void *private;
+ u32 dev_loss_tmo;
/* dynamic fields */
u32 port_id;
@@ -446,6 +449,10 @@ int nvme_fc_register_remoteport(struct nvme_fc_local_port *localport,
int nvme_fc_unregister_remoteport(struct nvme_fc_remote_port *remoteport);
+void nvme_fc_rescan_remoteport(struct nvme_fc_remote_port *remoteport);
+
+int nvme_fc_set_remoteport_devloss(struct nvme_fc_remote_port *remoteport,
+ u32 dev_loss_tmo);
/*
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 9310ce77d8e1..aea87f0d917b 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -90,6 +90,14 @@ enum {
};
#define NVME_AQ_DEPTH 32
+#define NVME_NR_AEN_COMMANDS 1
+#define NVME_AQ_BLK_MQ_DEPTH (NVME_AQ_DEPTH - NVME_NR_AEN_COMMANDS)
+
+/*
+ * Subtract one to leave an empty queue entry for 'Full Queue' condition. See
+ * NVM-Express 1.2 specification, section 4.1.2.
+ */
+#define NVME_AQ_MQ_TAG_DEPTH (NVME_AQ_BLK_MQ_DEPTH - 1)
enum {
NVME_REG_CAP = 0x0000, /* Controller Capabilities */
@@ -267,6 +275,7 @@ enum {
NVME_CTRL_OACS_SEC_SUPP = 1 << 0,
NVME_CTRL_OACS_DIRECTIVES = 1 << 5,
NVME_CTRL_OACS_DBBUF_SUPP = 1 << 8,
+ NVME_CTRL_LPA_CMD_EFFECTS_LOG = 1 << 1,
};
struct nvme_lbaf {
@@ -396,6 +405,21 @@ struct nvme_fw_slot_info_log {
};
enum {
+ NVME_CMD_EFFECTS_CSUPP = 1 << 0,
+ NVME_CMD_EFFECTS_LBCC = 1 << 1,
+ NVME_CMD_EFFECTS_NCC = 1 << 2,
+ NVME_CMD_EFFECTS_NIC = 1 << 3,
+ NVME_CMD_EFFECTS_CCC = 1 << 4,
+ NVME_CMD_EFFECTS_CSE_MASK = 3 << 16,
+};
+
+struct nvme_effects_log {
+ __le32 acs[256];
+ __le32 iocs[256];
+ __u8 resv[2048];
+};
+
+enum {
NVME_SMART_CRIT_SPARE = 1 << 0,
NVME_SMART_CRIT_TEMPERATURE = 1 << 1,
NVME_SMART_CRIT_RELIABILITY = 1 << 2,
@@ -404,6 +428,10 @@ enum {
};
enum {
+ NVME_AER_ERROR = 0,
+ NVME_AER_SMART = 1,
+ NVME_AER_CSS = 6,
+ NVME_AER_VS = 7,
NVME_AER_NOTICE_NS_CHANGED = 0x0002,
NVME_AER_NOTICE_FW_ACT_STARTING = 0x0102,
};
@@ -681,6 +709,7 @@ enum nvme_admin_opcode {
nvme_admin_format_nvm = 0x80,
nvme_admin_security_send = 0x81,
nvme_admin_security_recv = 0x82,
+ nvme_admin_sanitize_nvm = 0x84,
};
enum {
@@ -712,6 +741,7 @@ enum {
NVME_LOG_ERROR = 0x01,
NVME_LOG_SMART = 0x02,
NVME_LOG_FW_SLOT = 0x03,
+ NVME_LOG_CMD_EFFECTS = 0x05,
NVME_LOG_DISC = 0x70,
NVME_LOG_RESERVATION = 0x80,
NVME_FWACT_REPL = (0 << 3),
diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h
index a1904aadbc45..0dcc60e820de 100644
--- a/include/linux/sbitmap.h
+++ b/include/linux/sbitmap.h
@@ -211,10 +211,14 @@ bool sbitmap_any_bit_set(const struct sbitmap *sb);
*/
bool sbitmap_any_bit_clear(const struct sbitmap *sb);
+#define SB_NR_TO_INDEX(sb, bitnr) ((bitnr) >> (sb)->shift)
+#define SB_NR_TO_BIT(sb, bitnr) ((bitnr) & ((1U << (sb)->shift) - 1U))
+
typedef bool (*sb_for_each_fn)(struct sbitmap *, unsigned int, void *);
/**
- * sbitmap_for_each_set() - Iterate over each set bit in a &struct sbitmap.
+ * __sbitmap_for_each_set() - Iterate over each set bit in a &struct sbitmap.
+ * @start: Where to start the iteration.
* @sb: Bitmap to iterate over.
* @fn: Callback. Should return true to continue or false to break early.
* @data: Pointer to pass to callback.
@@ -222,35 +226,61 @@ typedef bool (*sb_for_each_fn)(struct sbitmap *, unsigned int, void *);
* This is inline even though it's non-trivial so that the function calls to the
* callback will hopefully get optimized away.
*/
-static inline void sbitmap_for_each_set(struct sbitmap *sb, sb_for_each_fn fn,
- void *data)
+static inline void __sbitmap_for_each_set(struct sbitmap *sb,
+ unsigned int start,
+ sb_for_each_fn fn, void *data)
{
- unsigned int i;
+ unsigned int index;
+ unsigned int nr;
+ unsigned int scanned = 0;
- for (i = 0; i < sb->map_nr; i++) {
- struct sbitmap_word *word = &sb->map[i];
- unsigned int off, nr;
+ if (start >= sb->depth)
+ start = 0;
+ index = SB_NR_TO_INDEX(sb, start);
+ nr = SB_NR_TO_BIT(sb, start);
- if (!word->word)
- continue;
+ while (scanned < sb->depth) {
+ struct sbitmap_word *word = &sb->map[index];
+ unsigned int depth = min_t(unsigned int, word->depth - nr,
+ sb->depth - scanned);
- nr = 0;
- off = i << sb->shift;
+ scanned += depth;
+ if (!word->word)
+ goto next;
+
+ /*
+ * On the first iteration of the outer loop, we need to add the
+ * bit offset back to the size of the word for find_next_bit().
+ * On all other iterations, nr is zero, so this is a noop.
+ */
+ depth += nr;
while (1) {
- nr = find_next_bit(&word->word, word->depth, nr);
- if (nr >= word->depth)
+ nr = find_next_bit(&word->word, depth, nr);
+ if (nr >= depth)
break;
-
- if (!fn(sb, off + nr, data))
+ if (!fn(sb, (index << sb->shift) + nr, data))
return;
nr++;
}
+next:
+ nr = 0;
+ if (++index >= sb->map_nr)
+ index = 0;
}
}
-#define SB_NR_TO_INDEX(sb, bitnr) ((bitnr) >> (sb)->shift)
-#define SB_NR_TO_BIT(sb, bitnr) ((bitnr) & ((1U << (sb)->shift) - 1U))
+/**
+ * sbitmap_for_each_set() - Iterate over each set bit in a &struct sbitmap.
+ * @sb: Bitmap to iterate over.
+ * @fn: Callback. Should return true to continue or false to break early.
+ * @data: Pointer to pass to callback.
+ */
+static inline void sbitmap_for_each_set(struct sbitmap *sb, sb_for_each_fn fn,
+ void *data)
+{
+ __sbitmap_for_each_set(sb, 0, fn, data);
+}
static inline unsigned long *__sbitmap_word(struct sbitmap *sb,
unsigned int bitnr)
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index e12d92808e98..f42d85631d17 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -43,28 +43,6 @@ enum writeback_sync_modes {
};
/*
- * why some writeback work was initiated
- */
-enum wb_reason {
- WB_REASON_BACKGROUND,
- WB_REASON_VMSCAN,
- WB_REASON_SYNC,
- WB_REASON_PERIODIC,
- WB_REASON_LAPTOP_TIMER,
- WB_REASON_FREE_MORE_MEM,
- WB_REASON_FS_FREE_SPACE,
- /*
- * There is no bdi forker thread any more and works are done
- * by emergency worker, however, this is TPs userland visible
- * and we'll be exposing exactly the same information,
- * so it has a mismatch name.
- */
- WB_REASON_FORKER_THREAD,
-
- WB_REASON_MAX,
-};
-
-/*
* A control structure which tells the writeback code what to do. These are
* always on the stack, and hence need no locking. They are always initialised
* in a manner such that unspecified fields are set to zero.
@@ -186,11 +164,11 @@ struct bdi_writeback;
void writeback_inodes_sb(struct super_block *, enum wb_reason reason);
void writeback_inodes_sb_nr(struct super_block *, unsigned long nr,
enum wb_reason reason);
-bool try_to_writeback_inodes_sb(struct super_block *, enum wb_reason reason);
-bool try_to_writeback_inodes_sb_nr(struct super_block *, unsigned long nr,
- enum wb_reason reason);
+void try_to_writeback_inodes_sb(struct super_block *sb, enum wb_reason reason);
void sync_inodes_sb(struct super_block *);
-void wakeup_flusher_threads(long nr_pages, enum wb_reason reason);
+void wakeup_flusher_threads(enum wb_reason reason);
+void wakeup_flusher_threads_bdi(struct backing_dev_info *bdi,
+ enum wb_reason reason);
void inode_wait_for_writeback(struct inode *inode);
/* writeback.h requires fs.h; it, too, is not included from here. */
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index 571ddb49b926..73af87dfbff8 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -221,6 +221,7 @@ struct scsi_device {
unsigned char access_state;
struct mutex state_mutex;
enum scsi_device_state sdev_state;
+ struct task_struct *quiesced_by;
unsigned long sdev_data[0];
} __attribute__((aligned(sizeof(unsigned long))));
diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
index 2e1fa7910306..32db72c7c055 100644
--- a/include/trace/events/writeback.h
+++ b/include/trace/events/writeback.h
@@ -287,7 +287,6 @@ DEFINE_EVENT(writeback_class, name, \
TP_PROTO(struct bdi_writeback *wb), \
TP_ARGS(wb))
-DEFINE_WRITEBACK_EVENT(writeback_nowork);
DEFINE_WRITEBACK_EVENT(writeback_wake_background);
TRACE_EVENT(writeback_bdi_register,