From 431b17f9d5453533cba7d73e7e40428e4f90b35d Mon Sep 17 00:00:00 2001 From: Paolo Valente Date: Mon, 3 Jul 2017 10:00:10 +0200 Subject: block, bfq: don't change ioprio class for a bfq_queue on a service tree On each deactivation or re-scheduling (after being served) of a bfq_queue, BFQ invokes the function __bfq_entity_update_weight_prio(), to perform pending updates of ioprio, weight and ioprio class for the bfq_queue. BFQ also invokes this function on I/O-request dispatches, to raise or lower weights more quickly when needed, thereby improving latency. However, the entity representing the bfq_queue may be on the active (sub)tree of a service tree when this happens, and, although with a very low probability, the bfq_queue may happen to also have a pending change of its ioprio class. If both conditions hold when __bfq_entity_update_weight_prio() is invoked, then the entity moves to a sort of hybrid state: the new service tree for the entity, as returned by bfq_entity_service_tree(), differs from service tree on which the entity still is. The functions that handle activations and deactivations of entities do not cope with such a hybrid state (and would need to become more complex to cope). This commit addresses this issue by just making __bfq_entity_update_weight_prio() not perform also a possible pending change of ioprio class, when invoked on an I/O-request dispatch for a bfq_queue. Such a change is thus postponed to when __bfq_entity_update_weight_prio() is invoked on deactivation or re-scheduling of the bfq_queue. Reported-by: Marco Piazza Reported-by: Laurentiu Nicola Signed-off-by: Paolo Valente Tested-by: Marco Piazza Signed-off-by: Jens Axboe --- block/bfq-iosched.c | 14 ++++++++++---- block/bfq-iosched.h | 3 ++- block/bfq-wf2q.c | 39 ++++++++++++++++++++++++++++++++++----- 3 files changed, 46 insertions(+), 10 deletions(-) (limited to 'block') diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 12bbc6b8657d..60a6835265fc 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -3483,11 +3483,17 @@ static void bfq_update_wr_data(struct bfq_data *bfqd, struct bfq_queue *bfqq) } } } - /* Update weight both if it must be raised and if it must be lowered */ + /* + * To improve latency (for this or other queues), immediately + * update weight both if it must be raised and if it must be + * lowered. Since, entity may be on some active tree here, and + * might have a pending change of its ioprio class, invoke + * next function with the last parameter unset (see the + * comments on the function). + */ if ((entity->weight > entity->orig_weight) != (bfqq->wr_coeff > 1)) - __bfq_entity_update_weight_prio( - bfq_entity_service_tree(entity), - entity); + __bfq_entity_update_weight_prio(bfq_entity_service_tree(entity), + entity, false); } /* diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h index 5c3bf9861492..8fd83b885774 100644 --- a/block/bfq-iosched.h +++ b/block/bfq-iosched.h @@ -892,7 +892,8 @@ void bfq_put_idle_entity(struct bfq_service_tree *st, struct bfq_entity *entity); struct bfq_service_tree * __bfq_entity_update_weight_prio(struct bfq_service_tree *old_st, - struct bfq_entity *entity); + struct bfq_entity *entity, + bool update_class_too); void bfq_bfqq_served(struct bfq_queue *bfqq, int served); void bfq_bfqq_charge_time(struct bfq_data *bfqd, struct bfq_queue *bfqq, unsigned long time_ms); diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c index 8726ede19eef..5ec05cd42b80 100644 --- a/block/bfq-wf2q.c +++ b/block/bfq-wf2q.c @@ -694,10 +694,28 @@ struct bfq_service_tree *bfq_entity_service_tree(struct bfq_entity *entity) return sched_data->service_tree + idx; } - +/* + * Update weight and priority of entity. If update_class_too is true, + * then update the ioprio_class of entity too. + * + * The reason why the update of ioprio_class is controlled through the + * last parameter is as follows. Changing the ioprio class of an + * entity implies changing the destination service trees for that + * entity. If such a change occurred when the entity is already on one + * of the service trees for its previous class, then the state of the + * entity would become more complex: none of the new possible service + * trees for the entity, according to bfq_entity_service_tree(), would + * match any of the possible service trees on which the entity + * is. Complex operations involving these trees, such as entity + * activations and deactivations, should take into account this + * additional complexity. To avoid this issue, this function is + * invoked with update_class_too unset in the points in the code where + * entity may happen to be on some tree. + */ struct bfq_service_tree * __bfq_entity_update_weight_prio(struct bfq_service_tree *old_st, - struct bfq_entity *entity) + struct bfq_entity *entity, + bool update_class_too) { struct bfq_service_tree *new_st = old_st; @@ -739,9 +757,15 @@ __bfq_entity_update_weight_prio(struct bfq_service_tree *old_st, bfq_weight_to_ioprio(entity->orig_weight); } - if (bfqq) + if (bfqq && update_class_too) bfqq->ioprio_class = bfqq->new_ioprio_class; - entity->prio_changed = 0; + + /* + * Reset prio_changed only if the ioprio_class change + * is not pending any longer. + */ + if (!bfqq || bfqq->ioprio_class == bfqq->new_ioprio_class) + entity->prio_changed = 0; /* * NOTE: here we may be changing the weight too early, @@ -867,7 +891,12 @@ static void bfq_update_fin_time_enqueue(struct bfq_entity *entity, { struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); - st = __bfq_entity_update_weight_prio(st, entity); + /* + * When this function is invoked, entity is not in any service + * tree, then it is safe to invoke next function with the last + * parameter set (see the comments on the function). + */ + st = __bfq_entity_update_weight_prio(st, entity, true); bfq_calc_finish(entity, entity->budget); /* -- cgit v1.2.3 From 32825c45ff8f4cce937ab85b030dc693ceb1aa0a Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 3 Jul 2017 20:37:14 +0800 Subject: blk-mq-sched: fix performance regression of mq-deadline When mq-deadline is taken, IOPS of sequential read and seqential write is observed more than 20% drop on sata(scsi-mq) devices, compared with using 'none' scheduler. The reason is that the default nr_requests for scheduler is too big for small queuedepth devices, and latency is increased much. Since the principle of taking 256 requests for mq scheduler is based on 128 queue depth, this patch changes into double size of min(hw queue_depth, 128). Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-mq-sched.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'block') diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index 7f0dc48ffb40..4ab69435708c 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -515,10 +515,12 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e) } /* - * Default to 256, since we don't split into sync/async like the - * old code did. Additionally, this is a per-hw queue depth. + * Default to double of smaller one between hw queue_depth and 128, + * since we don't split into sync/async like the old code did. + * Additionally, this is a per-hw queue depth. */ - q->nr_requests = 2 * BLKDEV_MAX_RQ; + q->nr_requests = 2 * min_t(unsigned int, q->tag_set->queue_depth, + BLKDEV_MAX_RQ); queue_for_each_hw_ctx(q, hctx, i) { ret = blk_mq_sched_alloc_tags(q, hctx, i); -- cgit v1.2.3 From 376a78abf5cc721a86ed42a1a24044d35fb8d2a8 Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Thu, 29 Jun 2017 11:31:08 -0700 Subject: bio-integrity: bio_trim should truncate integrity vector accordingly Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: Martin K. Petersen Signed-off-by: Dmitry Monakhov Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/bio.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'block') diff --git a/block/bio.c b/block/bio.c index 1cfcd0df3f30..5b4b32a2f8d0 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1900,6 +1900,10 @@ void bio_trim(struct bio *bio, int offset, int size) bio_advance(bio, offset << 9); bio->bi_iter.bi_size = size; + + if (bio_integrity(bio)) + bio_integrity_trim(bio, 0, size); + } EXPORT_SYMBOL_GPL(bio_trim); -- cgit v1.2.3 From 309a62fa3a9e78cb37a620913151cbb47d83b81d Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Thu, 29 Jun 2017 11:31:09 -0700 Subject: bio-integrity: bio_integrity_advance must update integrity seed SCSI drivers do care about bip_seed so we must update it accordingly. Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Signed-off-by: Dmitry Monakhov Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/bio-integrity.c | 1 + 1 file changed, 1 insertion(+) (limited to 'block') diff --git a/block/bio-integrity.c b/block/bio-integrity.c index b8a3a65f7364..8c2253c59edb 100644 --- a/block/bio-integrity.c +++ b/block/bio-integrity.c @@ -425,6 +425,7 @@ void bio_integrity_advance(struct bio *bio, unsigned int bytes_done) struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); unsigned bytes = bio_integrity_bytes(bi, bytes_done >> 9); + bip->bip_iter.bi_sector += bytes_done >> 9; bvec_iter_advance(bip->bip_vec, &bip->bip_iter, bytes); } EXPORT_SYMBOL(bio_integrity_advance); -- cgit v1.2.3 From fbd08e7673f950854679e5d79a30bb25e77a9d08 Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Thu, 29 Jun 2017 11:31:10 -0700 Subject: bio-integrity: fix interface for bio_integrity_trim bio_integrity_trim inherent it's interface from bio_trim and accept offset and size, but this API is error prone because data offset must always be insync with bio's data offset. That is why we have integrity update hook in bio_advance() So only meaningful values are: offset == 0, sectors == bio_sectors(bio) Let's just remove them completely. Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Signed-off-by: Dmitry Monakhov Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/bio-integrity.c | 11 ++--------- block/bio.c | 4 ++-- drivers/md/dm.c | 2 +- include/linux/bio.h | 5 ++--- 4 files changed, 7 insertions(+), 15 deletions(-) (limited to 'block') diff --git a/block/bio-integrity.c b/block/bio-integrity.c index 8c2253c59edb..3a0d71199fb0 100644 --- a/block/bio-integrity.c +++ b/block/bio-integrity.c @@ -433,22 +433,15 @@ EXPORT_SYMBOL(bio_integrity_advance); /** * bio_integrity_trim - Trim integrity vector * @bio: bio whose integrity vector to update - * @offset: offset to first data sector - * @sectors: number of data sectors * * Description: Used to trim the integrity vector in a cloned bio. - * The ivec will be advanced corresponding to 'offset' data sectors - * and the length will be truncated corresponding to 'len' data - * sectors. */ -void bio_integrity_trim(struct bio *bio, unsigned int offset, - unsigned int sectors) +void bio_integrity_trim(struct bio *bio) { struct bio_integrity_payload *bip = bio_integrity(bio); struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); - bio_integrity_advance(bio, offset << 9); - bip->bip_iter.bi_size = bio_integrity_bytes(bi, sectors); + bip->bip_iter.bi_size = bio_integrity_bytes(bi, bio_sectors(bio)); } EXPORT_SYMBOL(bio_integrity_trim); diff --git a/block/bio.c b/block/bio.c index 5b4b32a2f8d0..a6b225324a61 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1868,7 +1868,7 @@ struct bio *bio_split(struct bio *bio, int sectors, split->bi_iter.bi_size = sectors << 9; if (bio_integrity(split)) - bio_integrity_trim(split, 0, sectors); + bio_integrity_trim(split); bio_advance(bio, split->bi_iter.bi_size); @@ -1902,7 +1902,7 @@ void bio_trim(struct bio *bio, int offset, int size) bio->bi_iter.bi_size = size; if (bio_integrity(bio)) - bio_integrity_trim(bio, 0, size); + bio_integrity_trim(bio); } EXPORT_SYMBOL_GPL(bio_trim); diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 402946035308..13e714ea7a42 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1153,7 +1153,7 @@ static int clone_bio(struct dm_target_io *tio, struct bio *bio, clone->bi_iter.bi_size = to_bytes(len); if (unlikely(bio_integrity(bio) != NULL)) - bio_integrity_trim(clone, 0, len); + bio_integrity_trim(clone); return 0; } diff --git a/include/linux/bio.h b/include/linux/bio.h index 664a27da276d..1d74f5120369 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -728,7 +728,7 @@ extern bool bio_integrity_enabled(struct bio *bio); extern int bio_integrity_prep(struct bio *); extern void bio_integrity_endio(struct bio *); extern void bio_integrity_advance(struct bio *, unsigned int); -extern void bio_integrity_trim(struct bio *, unsigned int, unsigned int); +extern void bio_integrity_trim(struct bio *); extern int bio_integrity_clone(struct bio *, struct bio *, gfp_t); extern int bioset_integrity_create(struct bio_set *, int); extern void bioset_integrity_free(struct bio_set *); @@ -778,8 +778,7 @@ static inline void bio_integrity_advance(struct bio *bio, return; } -static inline void bio_integrity_trim(struct bio *bio, unsigned int offset, - unsigned int sectors) +static inline void bio_integrity_trim(struct bio *bio) { return; } -- cgit v1.2.3 From e23947bd76f00701f9407af23e671f4da96f5f25 Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Thu, 29 Jun 2017 11:31:11 -0700 Subject: bio-integrity: fold bio_integrity_enabled to bio_integrity_prep Currently all integrity prep hooks are open-coded, and if prepare fails we ignore it's code and fail bio with EIO. Let's return real error to upper layer, so later caller may react accordingly. In fact no one want to use bio_integrity_prep() w/o bio_integrity_enabled, so it is reasonable to fold it in to one function. Signed-off-by: Dmitry Monakhov Reviewed-by: Martin K. Petersen [hch: merged with the latest block tree, return bool from bio_integrity_prep] Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- Documentation/block/data-integrity.txt | 6 +-- block/bio-integrity.c | 88 +++++++++++++++------------------- block/blk-core.c | 5 +- block/blk-mq.c | 4 +- drivers/nvdimm/blk.c | 13 +---- drivers/nvdimm/btt.c | 13 +---- include/linux/bio.h | 12 ++--- 7 files changed, 50 insertions(+), 91 deletions(-) (limited to 'block') diff --git a/Documentation/block/data-integrity.txt b/Documentation/block/data-integrity.txt index f56ec97f0d14..934c44ea0c57 100644 --- a/Documentation/block/data-integrity.txt +++ b/Documentation/block/data-integrity.txt @@ -192,7 +192,7 @@ will require extra work due to the application tag. supported by the block device. - int bio_integrity_prep(bio); + bool bio_integrity_prep(bio); To generate IMD for WRITE and to set up buffers for READ, the filesystem must call bio_integrity_prep(bio). @@ -201,9 +201,7 @@ will require extra work due to the application tag. sector must be set, and the bio should have all data pages added. It is up to the caller to ensure that the bio does not change while I/O is in progress. - - bio_integrity_prep() should only be called if - bio_integrity_enabled() returned 1. + Complete bio with error if prepare failed for some reson. 5.3 PASSING EXISTING INTEGRITY METADATA diff --git a/block/bio-integrity.c b/block/bio-integrity.c index 3a0d71199fb0..44c4c52681c2 100644 --- a/block/bio-integrity.c +++ b/block/bio-integrity.c @@ -159,44 +159,6 @@ int bio_integrity_add_page(struct bio *bio, struct page *page, } EXPORT_SYMBOL(bio_integrity_add_page); -/** - * bio_integrity_enabled - Check whether integrity can be passed - * @bio: bio to check - * - * Description: Determines whether bio_integrity_prep() can be called - * on this bio or not. bio data direction and target device must be - * set prior to calling. The functions honors the write_generate and - * read_verify flags in sysfs. - */ -bool bio_integrity_enabled(struct bio *bio) -{ - struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); - - if (bio_op(bio) != REQ_OP_READ && bio_op(bio) != REQ_OP_WRITE) - return false; - - if (!bio_sectors(bio)) - return false; - - /* Already protected? */ - if (bio_integrity(bio)) - return false; - - if (bi == NULL) - return false; - - if (bio_data_dir(bio) == READ && bi->profile->verify_fn != NULL && - (bi->flags & BLK_INTEGRITY_VERIFY)) - return true; - - if (bio_data_dir(bio) == WRITE && bi->profile->generate_fn != NULL && - (bi->flags & BLK_INTEGRITY_GENERATE)) - return true; - - return false; -} -EXPORT_SYMBOL(bio_integrity_enabled); - /** * bio_integrity_intervals - Return number of integrity intervals for a bio * @bi: blk_integrity profile for device @@ -262,14 +224,15 @@ static blk_status_t bio_integrity_process(struct bio *bio, * bio_integrity_prep - Prepare bio for integrity I/O * @bio: bio to prepare * - * Description: Allocates a buffer for integrity metadata, maps the - * pages and attaches them to a bio. The bio must have data - * direction, target device and start sector set priot to calling. In - * the WRITE case, integrity metadata will be generated using the - * block device's integrity function. In the READ case, the buffer + * Description: Checks if the bio already has an integrity payload attached. + * If it does, the payload has been generated by another kernel subsystem, + * and we just pass it through. Otherwise allocates integrity payload. + * The bio must have data direction, target device and start sector set priot + * to calling. In the WRITE case, integrity metadata will be generated using + * the block device's integrity function. In the READ case, the buffer * will be prepared for DMA and a suitable end_io handler set up. */ -int bio_integrity_prep(struct bio *bio) +bool bio_integrity_prep(struct bio *bio) { struct bio_integrity_payload *bip; struct blk_integrity *bi; @@ -279,20 +242,41 @@ int bio_integrity_prep(struct bio *bio) unsigned int len, nr_pages; unsigned int bytes, offset, i; unsigned int intervals; + blk_status_t status; bi = bdev_get_integrity(bio->bi_bdev); q = bdev_get_queue(bio->bi_bdev); - BUG_ON(bi == NULL); - BUG_ON(bio_integrity(bio)); + if (bio_op(bio) != REQ_OP_READ && bio_op(bio) != REQ_OP_WRITE) + return true; + + if (!bio_sectors(bio)) + return true; + /* Already protected? */ + if (bio_integrity(bio)) + return true; + + if (bi == NULL) + return true; + + if (bio_data_dir(bio) == READ) { + if (!bi->profile->verify_fn || + !(bi->flags & BLK_INTEGRITY_VERIFY)) + return true; + } else { + if (!bi->profile->generate_fn || + !(bi->flags & BLK_INTEGRITY_GENERATE)) + return true; + } intervals = bio_integrity_intervals(bi, bio_sectors(bio)); /* Allocate kernel buffer for protection data */ len = intervals * bi->tuple_size; buf = kmalloc(len, GFP_NOIO | q->bounce_gfp); + status = BLK_STS_RESOURCE; if (unlikely(buf == NULL)) { printk(KERN_ERR "could not allocate integrity buffer\n"); - return -ENOMEM; + goto err_end_io; } end = (((unsigned long) buf) + len + PAGE_SIZE - 1) >> PAGE_SHIFT; @@ -304,7 +288,8 @@ int bio_integrity_prep(struct bio *bio) if (IS_ERR(bip)) { printk(KERN_ERR "could not allocate data integrity bioset\n"); kfree(buf); - return PTR_ERR(bip); + status = BLK_STS_RESOURCE; + goto err_end_io; } bip->bip_flags |= BIP_BLOCK_INTEGRITY; @@ -349,8 +334,13 @@ int bio_integrity_prep(struct bio *bio) /* Auto-generate integrity metadata if this is a write */ if (bio_data_dir(bio) == WRITE) bio_integrity_process(bio, bi->profile->generate_fn); + return true; + +err_end_io: + bio->bi_status = status; + bio_endio(bio); + return false; - return 0; } EXPORT_SYMBOL(bio_integrity_prep); diff --git a/block/blk-core.c b/block/blk-core.c index af393d5a9680..970b9c9638c5 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1787,11 +1787,8 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio) blk_queue_split(q, &bio); - if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) { - bio->bi_status = BLK_STS_IOERR; - bio_endio(bio); + if (!bio_integrity_prep(bio)) return BLK_QC_T_NONE; - } if (op_is_flush(bio->bi_opf)) { spin_lock_irq(q->queue_lock); diff --git a/block/blk-mq.c b/block/blk-mq.c index ced2b000ca02..77617fb12661 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1550,10 +1550,8 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) blk_queue_split(q, &bio); - if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) { - bio_io_error(bio); + if (!bio_integrity_prep(bio)) return BLK_QC_T_NONE; - } if (!is_flush_fua && !blk_queue_nomerges(q) && blk_attempt_plug_merge(q, bio, &request_count, &same_queue_rq)) diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c index f12d23c49771..1a578b2a437b 100644 --- a/drivers/nvdimm/blk.c +++ b/drivers/nvdimm/blk.c @@ -179,16 +179,8 @@ static blk_qc_t nd_blk_make_request(struct request_queue *q, struct bio *bio) int err = 0, rw; bool do_acct; - /* - * bio_integrity_enabled also checks if the bio already has an - * integrity payload attached. If it does, we *don't* do a - * bio_integrity_prep here - the payload has been generated by - * another kernel subsystem, and we just pass it through. - */ - if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) { - bio->bi_status = BLK_STS_IOERR; - goto out; - } + if (!bio_integrity_prep(bio)) + return BLK_QC_T_NONE; bip = bio_integrity(bio); nsblk = q->queuedata; @@ -212,7 +204,6 @@ static blk_qc_t nd_blk_make_request(struct request_queue *q, struct bio *bio) if (do_acct) nd_iostat_end(bio, start); - out: bio_endio(bio); return BLK_QC_T_NONE; } diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c index b6ba0618ea46..b5caaee78bbf 100644 --- a/drivers/nvdimm/btt.c +++ b/drivers/nvdimm/btt.c @@ -1203,16 +1203,8 @@ static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio) int err = 0; bool do_acct; - /* - * bio_integrity_enabled also checks if the bio already has an - * integrity payload attached. If it does, we *don't* do a - * bio_integrity_prep here - the payload has been generated by - * another kernel subsystem, and we just pass it through. - */ - if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) { - bio->bi_status = BLK_STS_IOERR; - goto out; - } + if (!bio_integrity_prep(bio)) + return BLK_QC_T_NONE; do_acct = nd_iostat_start(bio, &start); bio_for_each_segment(bvec, bio, iter) { @@ -1239,7 +1231,6 @@ static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio) if (do_acct) nd_iostat_end(bio, start); -out: bio_endio(bio); return BLK_QC_T_NONE; } diff --git a/include/linux/bio.h b/include/linux/bio.h index 1d74f5120369..b3b5f5a89a9c 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -724,8 +724,7 @@ struct biovec_slab { extern struct bio_integrity_payload *bio_integrity_alloc(struct bio *, gfp_t, unsigned int); extern void bio_integrity_free(struct bio *); extern int bio_integrity_add_page(struct bio *, struct page *, unsigned int, unsigned int); -extern bool bio_integrity_enabled(struct bio *bio); -extern int bio_integrity_prep(struct bio *); +extern bool bio_integrity_prep(struct bio *); extern void bio_integrity_endio(struct bio *); extern void bio_integrity_advance(struct bio *, unsigned int); extern void bio_integrity_trim(struct bio *); @@ -741,11 +740,6 @@ static inline void *bio_integrity(struct bio *bio) return NULL; } -static inline bool bio_integrity_enabled(struct bio *bio) -{ - return false; -} - static inline int bioset_integrity_create(struct bio_set *bs, int pool_size) { return 0; @@ -756,9 +750,9 @@ static inline void bioset_integrity_free (struct bio_set *bs) return; } -static inline int bio_integrity_prep(struct bio *bio) +static inline bool bio_integrity_prep(struct bio *bio) { - return 0; + return true; } static inline void bio_integrity_free(struct bio *bio) -- cgit v1.2.3 From 128b6f9fdd9ace9e56cb3a263b4bc269658f9c40 Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Thu, 29 Jun 2017 11:31:12 -0700 Subject: t10-pi: Move opencoded contants to common header Signed-off-by: Dmitry Monakhov Reviewed-by: Martin K. Petersen Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/t10-pi.c | 9 +++------ drivers/scsi/lpfc/lpfc_scsi.c | 5 +++-- drivers/scsi/qla2xxx/qla_isr.c | 8 ++++---- drivers/target/target_core_sbc.c | 2 +- include/linux/t10-pi.h | 2 ++ 5 files changed, 13 insertions(+), 13 deletions(-) (limited to 'block') diff --git a/block/t10-pi.c b/block/t10-pi.c index 3416dadf7b15..a98db384048f 100644 --- a/block/t10-pi.c +++ b/block/t10-pi.c @@ -28,9 +28,6 @@ typedef __be16 (csum_fn) (void *, unsigned int); -static const __be16 APP_ESCAPE = (__force __be16) 0xffff; -static const __be32 REF_ESCAPE = (__force __be32) 0xffffffff; - static __be16 t10_pi_crc_fn(void *data, unsigned int len) { return cpu_to_be16(crc_t10dif(data, len)); @@ -82,7 +79,7 @@ static blk_status_t t10_pi_verify(struct blk_integrity_iter *iter, switch (type) { case 1: case 2: - if (pi->app_tag == APP_ESCAPE) + if (pi->app_tag == T10_PI_APP_ESCAPE) goto next; if (be32_to_cpu(pi->ref_tag) != @@ -95,8 +92,8 @@ static blk_status_t t10_pi_verify(struct blk_integrity_iter *iter, } break; case 3: - if (pi->app_tag == APP_ESCAPE && - pi->ref_tag == REF_ESCAPE) + if (pi->app_tag == T10_PI_APP_ESCAPE && + pi->ref_tag == T10_PI_REF_ESCAPE) goto next; break; } diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c index 54fd0c81ceaf..99d2e990b231 100644 --- a/drivers/scsi/lpfc/lpfc_scsi.c +++ b/drivers/scsi/lpfc/lpfc_scsi.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -2934,8 +2935,8 @@ lpfc_calc_bg_err(struct lpfc_hba *phba, struct lpfc_scsi_buf *lpfc_cmd) * First check to see if a protection data * check is valid */ - if ((src->ref_tag == 0xffffffff) || - (src->app_tag == 0xffff)) { + if ((src->ref_tag == T10_PI_REF_ESCAPE) || + (src->app_tag == T10_PI_APP_ESCAPE)) { start_ref_tag++; goto skipit; } diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c index 2572121b765b..de031aed94f6 100644 --- a/drivers/scsi/qla2xxx/qla_isr.c +++ b/drivers/scsi/qla2xxx/qla_isr.c @@ -1950,9 +1950,9 @@ qla2x00_handle_dif_error(srb_t *sp, struct sts_entry_24xx *sts24) * For type 3: ref & app tag is all 'f's * For type 0,1,2: app tag is all 'f's */ - if ((a_app_tag == 0xffff) && + if ((a_app_tag == T10_PI_APP_ESCAPE) && ((scsi_get_prot_type(cmd) != SCSI_PROT_DIF_TYPE3) || - (a_ref_tag == 0xffffffff))) { + (a_ref_tag == T10_PI_REF_ESCAPE))) { uint32_t blocks_done, resid; sector_t lba_s = scsi_get_lba(cmd); @@ -1994,9 +1994,9 @@ qla2x00_handle_dif_error(srb_t *sp, struct sts_entry_24xx *sts24) spt = page_address(sg_page(sg)) + sg->offset; spt += j; - spt->app_tag = 0xffff; + spt->app_tag = T10_PI_APP_ESCAPE; if (scsi_get_prot_type(cmd) == SCSI_PROT_DIF_TYPE3) - spt->ref_tag = 0xffffffff; + spt->ref_tag = T10_PI_REF_ESCAPE; } return 0; diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c index 4316f7b65fb7..dc9456e7dac9 100644 --- a/drivers/target/target_core_sbc.c +++ b/drivers/target/target_core_sbc.c @@ -1450,7 +1450,7 @@ sbc_dif_verify(struct se_cmd *cmd, sector_t start, unsigned int sectors, (unsigned long long)sector, sdt->guard_tag, sdt->app_tag, be32_to_cpu(sdt->ref_tag)); - if (sdt->app_tag == cpu_to_be16(0xffff)) { + if (sdt->app_tag == T10_PI_APP_ESCAPE) { dsg_off += block_size; goto next; } diff --git a/include/linux/t10-pi.h b/include/linux/t10-pi.h index 9375d23a24e7..635a3c5706bd 100644 --- a/include/linux/t10-pi.h +++ b/include/linux/t10-pi.h @@ -33,6 +33,8 @@ struct t10_pi_tuple { __be32 ref_tag; /* Target LBA or indirect LBA */ }; +#define T10_PI_APP_ESCAPE cpu_to_be16(0xffff) +#define T10_PI_REF_ESCAPE cpu_to_be32(0xffffffff) extern const struct blk_integrity_profile t10_pi_type1_crc; extern const struct blk_integrity_profile t10_pi_type1_ip; -- cgit v1.2.3 From 63573e359d052e506d305c263576499f06355985 Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Thu, 29 Jun 2017 11:31:15 -0700 Subject: bio-integrity: Restore original iterator on verify stage Currently ->verify_fn not woks at all because at the moment it is called bio->bi_iter.bi_size == 0, so we do not iterate integrity bvecs at all. In order to perform verification we need to know original data vector, with new bvec rewind API this is trivial. testcase: https://github.com/dmonakhov/xfstests/commit/3c6509eaa83b9c17cd0bc95d73fcdd76e1c54a85 Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Signed-off-by: Dmitry Monakhov [hch: adopted for new status values] Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/bio-integrity.c | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) (limited to 'block') diff --git a/block/bio-integrity.c b/block/bio-integrity.c index 44c4c52681c2..8df4eb103ba9 100644 --- a/block/bio-integrity.c +++ b/block/bio-integrity.c @@ -184,10 +184,11 @@ static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi, /** * bio_integrity_process - Process integrity metadata for a bio * @bio: bio to generate/verify integrity metadata for + * @proc_iter: iterator to process * @proc_fn: Pointer to the relevant processing function */ static blk_status_t bio_integrity_process(struct bio *bio, - integrity_processing_fn *proc_fn) + struct bvec_iter *proc_iter, integrity_processing_fn *proc_fn) { struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); struct blk_integrity_iter iter; @@ -200,10 +201,10 @@ static blk_status_t bio_integrity_process(struct bio *bio, iter.disk_name = bio->bi_bdev->bd_disk->disk_name; iter.interval = 1 << bi->interval_exp; - iter.seed = bip_get_seed(bip); + iter.seed = proc_iter->bi_sector; iter.prot_buf = prot_buf; - bio_for_each_segment(bv, bio, bviter) { + __bio_for_each_segment(bv, bio, bviter, *proc_iter) { void *kaddr = kmap_atomic(bv.bv_page); iter.data_buf = kaddr + bv.bv_offset; @@ -332,8 +333,10 @@ bool bio_integrity_prep(struct bio *bio) } /* Auto-generate integrity metadata if this is a write */ - if (bio_data_dir(bio) == WRITE) - bio_integrity_process(bio, bi->profile->generate_fn); + if (bio_data_dir(bio) == WRITE) { + bio_integrity_process(bio, &bio->bi_iter, + bi->profile->generate_fn); + } return true; err_end_io: @@ -358,8 +361,19 @@ static void bio_integrity_verify_fn(struct work_struct *work) container_of(work, struct bio_integrity_payload, bip_work); struct bio *bio = bip->bip_bio; struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); + struct bvec_iter iter = bio->bi_iter; - bio->bi_status = bio_integrity_process(bio, bi->profile->verify_fn); + /* + * At the moment verify is called bio's iterator was advanced + * during split and completion, we need to rewind iterator to + * it's original position. + */ + if (bio_rewind_iter(bio, &iter, iter.bi_done)) { + bio->bi_status = bio_integrity_process(bio, &iter, + bi->profile->verify_fn); + } else { + bio->bi_status = BLK_STS_IOERR; + } /* Restore original bio completion handler */ bio->bi_end_io = bip->bip_end_io; -- cgit v1.2.3 From 7c20f11680a441df09de7235206f70115fbf6290 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 3 Jul 2017 16:58:43 -0600 Subject: bio-integrity: stop abusing bi_end_io And instead call directly into the integrity code from bio_end_io. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/bio-integrity.c | 39 ++++++++++++--------------------------- block/bio.c | 5 ++--- block/blk.h | 11 +++++++++++ include/linux/bio.h | 9 --------- 4 files changed, 25 insertions(+), 39 deletions(-) (limited to 'block') diff --git a/block/bio-integrity.c b/block/bio-integrity.c index 8df4eb103ba9..f733beab6ca2 100644 --- a/block/bio-integrity.c +++ b/block/bio-integrity.c @@ -102,7 +102,7 @@ EXPORT_SYMBOL(bio_integrity_alloc); * Description: Used to free the integrity portion of a bio. Usually * called from bio_free(). */ -void bio_integrity_free(struct bio *bio) +static void bio_integrity_free(struct bio *bio) { struct bio_integrity_payload *bip = bio_integrity(bio); struct bio_set *bs = bio->bi_pool; @@ -120,8 +120,8 @@ void bio_integrity_free(struct bio *bio) } bio->bi_integrity = NULL; + bio->bi_opf &= ~REQ_INTEGRITY; } -EXPORT_SYMBOL(bio_integrity_free); /** * bio_integrity_add_page - Attach integrity metadata @@ -326,12 +326,6 @@ bool bio_integrity_prep(struct bio *bio) offset = 0; } - /* Install custom I/O completion handler if read verify is enabled */ - if (bio_data_dir(bio) == READ) { - bip->bip_end_io = bio->bi_end_io; - bio->bi_end_io = bio_integrity_endio; - } - /* Auto-generate integrity metadata if this is a write */ if (bio_data_dir(bio) == WRITE) { bio_integrity_process(bio, &bio->bi_iter, @@ -375,13 +369,12 @@ static void bio_integrity_verify_fn(struct work_struct *work) bio->bi_status = BLK_STS_IOERR; } - /* Restore original bio completion handler */ - bio->bi_end_io = bip->bip_end_io; + bio_integrity_free(bio); bio_endio(bio); } /** - * bio_integrity_endio - Integrity I/O completion function + * __bio_integrity_endio - Integrity I/O completion function * @bio: Protected bio * @error: Pointer to errno * @@ -392,27 +385,19 @@ static void bio_integrity_verify_fn(struct work_struct *work) * in process context. This function postpones completion * accordingly. */ -void bio_integrity_endio(struct bio *bio) +bool __bio_integrity_endio(struct bio *bio) { - struct bio_integrity_payload *bip = bio_integrity(bio); + if (bio_op(bio) == REQ_OP_READ && !bio->bi_status) { + struct bio_integrity_payload *bip = bio_integrity(bio); - BUG_ON(bip->bip_bio != bio); - - /* In case of an I/O error there is no point in verifying the - * integrity metadata. Restore original bio end_io handler - * and run it. - */ - if (bio->bi_status) { - bio->bi_end_io = bip->bip_end_io; - bio_endio(bio); - - return; + INIT_WORK(&bip->bip_work, bio_integrity_verify_fn); + queue_work(kintegrityd_wq, &bip->bip_work); + return false; } - INIT_WORK(&bip->bip_work, bio_integrity_verify_fn); - queue_work(kintegrityd_wq, &bip->bip_work); + bio_integrity_free(bio); + return true; } -EXPORT_SYMBOL(bio_integrity_endio); /** * bio_integrity_advance - Advance integrity vector diff --git a/block/bio.c b/block/bio.c index a6b225324a61..9cabf5d0be20 100644 --- a/block/bio.c +++ b/block/bio.c @@ -243,9 +243,6 @@ fallback: void bio_uninit(struct bio *bio) { bio_disassociate_task(bio); - - if (bio_integrity(bio)) - bio_integrity_free(bio); } EXPORT_SYMBOL(bio_uninit); @@ -1813,6 +1810,8 @@ void bio_endio(struct bio *bio) again: if (!bio_remaining_done(bio)) return; + if (!bio_integrity_endio(bio)) + return; /* * Need to have a real endio function for chained bios, otherwise diff --git a/block/blk.h b/block/blk.h index 01ebb8185f6b..3a3d715bd725 100644 --- a/block/blk.h +++ b/block/blk.h @@ -81,10 +81,21 @@ static inline void blk_queue_enter_live(struct request_queue *q) #ifdef CONFIG_BLK_DEV_INTEGRITY void blk_flush_integrity(void); +bool __bio_integrity_endio(struct bio *); +static inline bool bio_integrity_endio(struct bio *bio) +{ + if (bio_integrity(bio)) + return __bio_integrity_endio(bio); + return true; +} #else static inline void blk_flush_integrity(void) { } +static inline bool bio_integrity_endio(struct bio *bio) +{ + return true; +} #endif void blk_timeout_work(struct work_struct *work); diff --git a/include/linux/bio.h b/include/linux/bio.h index 1eba19580185..7b1cf4ba0902 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -320,8 +320,6 @@ struct bio_integrity_payload { struct bvec_iter bip_iter; - bio_end_io_t *bip_end_io; /* saved I/O completion fn */ - unsigned short bip_slab; /* slab the bip came from */ unsigned short bip_vcnt; /* # of integrity bio_vecs */ unsigned short bip_max_vcnt; /* integrity bio_vec slots */ @@ -739,10 +737,8 @@ struct biovec_slab { bip_for_each_vec(_bvl, _bio->bi_integrity, _iter) extern struct bio_integrity_payload *bio_integrity_alloc(struct bio *, gfp_t, unsigned int); -extern void bio_integrity_free(struct bio *); extern int bio_integrity_add_page(struct bio *, struct page *, unsigned int, unsigned int); extern bool bio_integrity_prep(struct bio *); -extern void bio_integrity_endio(struct bio *); extern void bio_integrity_advance(struct bio *, unsigned int); extern void bio_integrity_trim(struct bio *); extern int bio_integrity_clone(struct bio *, struct bio *, gfp_t); @@ -772,11 +768,6 @@ static inline bool bio_integrity_prep(struct bio *bio) return true; } -static inline void bio_integrity_free(struct bio *bio) -{ - return; -} - static inline int bio_integrity_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp_mask) { -- cgit v1.2.3 From ea4d12dabf872b496218cdc8e7874feef8676cdd Mon Sep 17 00:00:00 2001 From: kbuild test robot Date: Wed, 5 Jul 2017 02:14:10 +0800 Subject: bio-integrity: fix boolreturn.cocci warnings block/bio-integrity.c:318:10-11: WARNING: return of 0/1 in function 'bio_integrity_prep' with return type bool Return statements in functions returning bool should use true/false instead of 1/0. Generated by: scripts/coccinelle/misc/boolreturn.cocci Fixes: e23947bd76f0 ("bio-integrity: fold bio_integrity_enabled to bio_integrity_prep") CC: Dmitry Monakhov Signed-off-by: Fengguang Wu Signed-off-by: Jens Axboe --- block/bio-integrity.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'block') diff --git a/block/bio-integrity.c b/block/bio-integrity.c index f733beab6ca2..83e92beb3c9f 100644 --- a/block/bio-integrity.c +++ b/block/bio-integrity.c @@ -316,7 +316,7 @@ bool bio_integrity_prep(struct bio *bio) bytes, offset); if (ret == 0) - return 0; + return false; if (ret < bytes) break; -- cgit v1.2.3 From 615d22a51c04856efe62af6e1d5b450aaf5cc2c0 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 6 Jul 2017 20:21:15 +0900 Subject: block: Fix __blkdev_issue_zeroout loop The BIO issuing loop in __blkdev_issue_zeroout() is allocating BIOs with a maximum number of bvec (pages) equal to min(nr_sects, (sector_t)BIO_MAX_PAGES) This works since the requested number of bvecs will always be limited to the absolute maximum number supported (BIO_MAX_PAGES), but this is ineficient as too many bvec entries may be requested due to the different units being used in the min() operation (number of sectors vs number of pages). To fix this, introduce the helper __blkdev_sectors_to_bio_pages() to correctly calculate the number of bvecs for zeroout BIOs as the issuing loop progresses. The calculation is done using consistent units and makes sure that the number of pages return is at least 1 (for cases where the number of sectors is less that the number of sectors in a page). Also remove a trailing space after the bit shift in the internal loop min() call. Signed-off-by: Damien Le Moal Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-lib.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) (limited to 'block') diff --git a/block/blk-lib.c b/block/blk-lib.c index e8caecd71688..3fe0aec90597 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -261,6 +261,19 @@ static int __blkdev_issue_write_zeroes(struct block_device *bdev, return 0; } +/* + * Convert a number of 512B sectors to a number of pages. + * The result is limited to a number of pages that can fit into a BIO. + * Also make sure that the result is always at least 1 (page) for the cases + * where nr_sects is lower than the number of sectors in a page. + */ +static unsigned int __blkdev_sectors_to_bio_pages(sector_t nr_sects) +{ + sector_t bytes = (nr_sects << 9) + PAGE_SIZE - 1; + + return min(bytes >> PAGE_SHIFT, (sector_t)BIO_MAX_PAGES); +} + /** * __blkdev_issue_zeroout - generate number of zero filed write bios * @bdev: blockdev to issue @@ -307,18 +320,18 @@ int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, ret = 0; while (nr_sects != 0) { - bio = next_bio(bio, min(nr_sects, (sector_t)BIO_MAX_PAGES), - gfp_mask); + bio = next_bio(bio, __blkdev_sectors_to_bio_pages(nr_sects), + gfp_mask); bio->bi_iter.bi_sector = sector; bio->bi_bdev = bdev; bio_set_op_attrs(bio, REQ_OP_WRITE, 0); while (nr_sects != 0) { - sz = min((sector_t) PAGE_SIZE >> 9 , nr_sects); - bi_size = bio_add_page(bio, ZERO_PAGE(0), sz << 9, 0); + sz = min((sector_t) PAGE_SIZE, nr_sects << 9); + bi_size = bio_add_page(bio, ZERO_PAGE(0), sz, 0); nr_sects -= bi_size >> 9; sector += bi_size >> 9; - if (bi_size < (sz << 9)) + if (bi_size < sz) break; } cond_resched(); -- cgit v1.2.3 From b222dd2fdd53a40dd8f1d3082ae98e52883cce0d Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Mon, 10 Jul 2017 11:40:17 -0700 Subject: block: call bio_uninit in bio_endio bio_free isn't a good place to free cgroup info. There are a lot of cases bio is allocated in special way (for example, in stack) and never gets called by bio_put hence bio_free, we are leaking memory. This patch moves the free to bio endio, which should be called anyway. The bio_uninit call in bio_free is kept, in case the bio never gets called bio endio. This assumes ->bi_end_io() doesn't access cgroup info, which seems true in my audit. This along with Christoph's integrity patch should fix the memory leak issue. Cc: Christoph Hellwig Signed-off-by: Shaohua Li Signed-off-by: Jens Axboe --- block/bio.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'block') diff --git a/block/bio.c b/block/bio.c index 9cabf5d0be20..9a63597aaacc 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1833,6 +1833,8 @@ again: } blk_throtl_bio_endio(bio); + /* release cgroup info */ + bio_uninit(bio); if (bio->bi_end_io) bio->bi_end_io(bio); } -- cgit v1.2.3