diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-12-13 10:43:59 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-12-13 10:43:59 -0800 |
commit | ce8a79d5601aab94c02ed4539c48e8605422ac94 (patch) | |
tree | 7830a97a475d57284640c8e2d3516521722708b6 /block/bfq-iosched.c | |
parent | 96f7e448b9f4546ffd0356ffceb2b9586777f316 (diff) | |
parent | f596da3efaf4130ff61cd029558845808df9bf99 (diff) | |
download | linux-ce8a79d5601aab94c02ed4539c48e8605422ac94.tar.bz2 |
Merge tag 'for-6.2/block-2022-12-08' of git://git.kernel.dk/linux
Pull block updates from Jens Axboe:
- NVMe pull requests via Christoph:
- Support some passthrough commands without CAP_SYS_ADMIN (Kanchan
Joshi)
- Refactor PCIe probing and reset (Christoph Hellwig)
- Various fabrics authentication fixes and improvements (Sagi
Grimberg)
- Avoid fallback to sequential scan due to transient issues (Uday
Shankar)
- Implement support for the DEAC bit in Write Zeroes (Christoph
Hellwig)
- Allow overriding the IEEE OUI and firmware revision in configfs
for nvmet (Aleksandr Miloserdov)
- Force reconnect when number of queue changes in nvmet (Daniel
Wagner)
- Minor fixes and improvements (Uros Bizjak, Joel Granados, Sagi
Grimberg, Christoph Hellwig, Christophe JAILLET)
- Fix and cleanup nvme-fc req allocation (Chaitanya Kulkarni)
- Use the common tagset helpers in nvme-pci driver (Christoph
Hellwig)
- Cleanup the nvme-pci removal path (Christoph Hellwig)
- Use kstrtobool() instead of strtobool (Christophe JAILLET)
- Allow unprivileged passthrough of Identify Controller (Joel
Granados)
- Support io stats on the mpath device (Sagi Grimberg)
- Minor nvmet cleanup (Sagi Grimberg)
- MD pull requests via Song:
- Code cleanups (Christoph)
- Various fixes
- Floppy pull request from Denis:
- Fix a memory leak in the init error path (Yuan)
- Series fixing some batch wakeup issues with sbitmap (Gabriel)
- Removal of the pktcdvd driver that was deprecated more than 5 years
ago, and subsequent removal of the devnode callback in struct
block_device_operations as no users are now left (Greg)
- Fix for partition read on an exclusively opened bdev (Jan)
- Series of elevator API cleanups (Jinlong, Christoph)
- Series of fixes and cleanups for blk-iocost (Kemeng)
- Series of fixes and cleanups for blk-throttle (Kemeng)
- Series adding concurrent support for sync queues in BFQ (Yu)
- Series bringing drbd a bit closer to the out-of-tree maintained
version (Christian, Joel, Lars, Philipp)
- Misc drbd fixes (Wang)
- blk-wbt fixes and tweaks for enable/disable (Yu)
- Fixes for mq-deadline for zoned devices (Damien)
- Add support for read-only and offline zones for null_blk
(Shin'ichiro)
- Series fixing the delayed holder tracking, as used by DM (Yu,
Christoph)
- Series enabling bio alloc caching for IRQ based IO (Pavel)
- Series enabling userspace peer-to-peer DMA (Logan)
- BFQ waker fixes (Khazhismel)
- Series fixing elevator refcount issues (Christoph, Jinlong)
- Series cleaning up references around queue destruction (Christoph)
- Series doing quiesce by tagset, enabling cleanups in drivers
(Christoph, Chao)
- Series untangling the queue kobject and queue references (Christoph)
- Misc fixes and cleanups (Bart, David, Dawei, Jinlong, Kemeng, Ye,
Yang, Waiman, Shin'ichiro, Randy, Pankaj, Christoph)
* tag 'for-6.2/block-2022-12-08' of git://git.kernel.dk/linux: (247 commits)
blktrace: Fix output non-blktrace event when blk_classic option enabled
block: sed-opal: Don't include <linux/kernel.h>
sed-opal: allow using IOC_OPAL_SAVE for locking too
blk-cgroup: Fix typo in comment
block: remove bio_set_op_attrs
nvmet: don't open-code NVME_NS_ATTR_RO enumeration
nvme-pci: use the tagset alloc/free helpers
nvme: add the Apple shared tag workaround to nvme_alloc_io_tag_set
nvme: only set reserved_tags in nvme_alloc_io_tag_set for fabrics controllers
nvme: consolidate setting the tagset flags
nvme: pass nr_maps explicitly to nvme_alloc_io_tag_set
block: bio_copy_data_iter
nvme-pci: split out a nvme_pci_ctrl_is_dead helper
nvme-pci: return early on ctrl state mismatch in nvme_reset_work
nvme-pci: rename nvme_disable_io_queues
nvme-pci: cleanup nvme_suspend_queue
nvme-pci: remove nvme_pci_disable
nvme-pci: remove nvme_disable_admin_queue
nvme: merge nvme_shutdown_ctrl into nvme_disable_ctrl
nvme: use nvme_wait_ready in nvme_shutdown_ctrl
...
Diffstat (limited to 'block/bfq-iosched.c')
-rw-r--r-- | block/bfq-iosched.c | 102 |
1 files changed, 22 insertions, 80 deletions
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 7ea427817f7f..a72304c728fc 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -820,7 +820,7 @@ bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq) * much easier to maintain the needed state: * 1) all active queues have the same weight, * 2) all active queues belong to the same I/O-priority class, - * 3) there are no active groups. + * 3) there is at most one active group. * In particular, the last condition is always true if hierarchical * support or the cgroups interface are not enabled, thus no state * needs to be maintained in this case. @@ -852,7 +852,7 @@ static bool bfq_asymmetric_scenario(struct bfq_data *bfqd, return varied_queue_weights || multiple_classes_busy #ifdef CONFIG_BFQ_GROUP_IOSCHED - || bfqd->num_groups_with_pending_reqs > 0 + || bfqd->num_groups_with_pending_reqs > 1 #endif ; } @@ -870,9 +870,9 @@ static bool bfq_asymmetric_scenario(struct bfq_data *bfqd, * In most scenarios, the rate at which nodes are created/destroyed * should be low too. */ -void bfq_weights_tree_add(struct bfq_data *bfqd, struct bfq_queue *bfqq, - struct rb_root_cached *root) +void bfq_weights_tree_add(struct bfq_queue *bfqq) { + struct rb_root_cached *root = &bfqq->bfqd->queue_weights_tree; struct bfq_entity *entity = &bfqq->entity; struct rb_node **new = &(root->rb_root.rb_node), *parent = NULL; bool leftmost = true; @@ -944,13 +944,14 @@ inc_counter: * See the comments to the function bfq_weights_tree_add() for considerations * about overhead. */ -void __bfq_weights_tree_remove(struct bfq_data *bfqd, - struct bfq_queue *bfqq, - struct rb_root_cached *root) +void bfq_weights_tree_remove(struct bfq_queue *bfqq) { + struct rb_root_cached *root; + if (!bfqq->weight_counter) return; + root = &bfqq->bfqd->queue_weights_tree; bfqq->weight_counter->num_active--; if (bfqq->weight_counter->num_active > 0) goto reset_entity_pointer; @@ -964,59 +965,6 @@ reset_entity_pointer: } /* - * Invoke __bfq_weights_tree_remove on bfqq and decrement the number - * of active groups for each queue's inactive parent entity. - */ -void bfq_weights_tree_remove(struct bfq_data *bfqd, - struct bfq_queue *bfqq) -{ - struct bfq_entity *entity = bfqq->entity.parent; - - for_each_entity(entity) { - struct bfq_sched_data *sd = entity->my_sched_data; - - if (sd->next_in_service || sd->in_service_entity) { - /* - * entity is still active, because either - * next_in_service or in_service_entity is not - * NULL (see the comments on the definition of - * next_in_service for details on why - * in_service_entity must be checked too). - * - * As a consequence, its parent entities are - * active as well, and thus this loop must - * stop here. - */ - break; - } - - /* - * The decrement of num_groups_with_pending_reqs is - * not performed immediately upon the deactivation of - * entity, but it is delayed to when it also happens - * that the first leaf descendant bfqq of entity gets - * all its pending requests completed. The following - * instructions perform this delayed decrement, if - * needed. See the comments on - * num_groups_with_pending_reqs for details. - */ - if (entity->in_groups_with_pending_reqs) { - entity->in_groups_with_pending_reqs = false; - bfqd->num_groups_with_pending_reqs--; - } - } - - /* - * Next function is invoked last, because it causes bfqq to be - * freed if the following holds: bfqq is not in service and - * has no dispatched request. DO NOT use bfqq after the next - * function invocation. - */ - __bfq_weights_tree_remove(bfqd, bfqq, - &bfqd->queue_weights_tree); -} - -/* * Return expired entry, or NULL to just start from scratch in rbtree. */ static struct request *bfq_check_fifo(struct bfq_queue *bfqq, @@ -2135,7 +2083,9 @@ static void bfq_check_waker(struct bfq_data *bfqd, struct bfq_queue *bfqq, if (!bfqd->last_completed_rq_bfqq || bfqd->last_completed_rq_bfqq == bfqq || bfq_bfqq_has_short_ttime(bfqq) || - now_ns - bfqd->last_completion >= 4 * NSEC_PER_MSEC) + now_ns - bfqd->last_completion >= 4 * NSEC_PER_MSEC || + bfqd->last_completed_rq_bfqq == &bfqd->oom_bfqq || + bfqq == &bfqd->oom_bfqq) return; /* @@ -2373,22 +2323,6 @@ static sector_t get_sdist(sector_t last_pos, struct request *rq) return 0; } -#if 0 /* Still not clear if we can do without next two functions */ -static void bfq_activate_request(struct request_queue *q, struct request *rq) -{ - struct bfq_data *bfqd = q->elevator->elevator_data; - - bfqd->rq_in_driver++; -} - -static void bfq_deactivate_request(struct request_queue *q, struct request *rq) -{ - struct bfq_data *bfqd = q->elevator->elevator_data; - - bfqd->rq_in_driver--; -} -#endif - static void bfq_remove_request(struct request_queue *q, struct request *rq) { @@ -6261,7 +6195,8 @@ static void bfq_completed_request(struct bfq_queue *bfqq, struct bfq_data *bfqd) */ bfqq->budget_timeout = jiffies; - bfq_weights_tree_remove(bfqd, bfqq); + bfq_del_bfqq_in_groups_with_pending_reqs(bfqq); + bfq_weights_tree_remove(bfqq); } now_ns = ktime_get_ns(); @@ -6784,6 +6719,12 @@ static struct bfq_queue *bfq_init_rq(struct request *rq) bfqq = bfq_get_bfqq_handle_split(bfqd, bic, bio, true, is_sync, NULL); + if (unlikely(bfqq == &bfqd->oom_bfqq)) + bfqq_already_existing = true; + } else + bfqq_already_existing = true; + + if (!bfqq_already_existing) { bfqq->waker_bfqq = old_bfqq->waker_bfqq; bfqq->tentative_waker_bfqq = NULL; @@ -6797,8 +6738,7 @@ static struct bfq_queue *bfq_init_rq(struct request *rq) if (bfqq->waker_bfqq) hlist_add_head(&bfqq->woken_list_node, &bfqq->waker_bfqq->woken_list); - } else - bfqq_already_existing = true; + } } } @@ -7045,6 +6985,7 @@ static void bfq_exit_queue(struct elevator_queue *e) #endif blk_stat_disable_accounting(bfqd->queue); + clear_bit(ELEVATOR_FLAG_DISABLE_WBT, &e->flags); wbt_enable_default(bfqd->queue); kfree(bfqd); @@ -7190,6 +7131,7 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e) /* We dispatch from request queue wide instead of hw queue */ blk_queue_flag_set(QUEUE_FLAG_SQ_SCHED, q); + set_bit(ELEVATOR_FLAG_DISABLE_WBT, &eq->flags); wbt_disable_default(q); blk_stat_enable_accounting(q); |