summaryrefslogtreecommitdiffstats
path: root/drivers/md/dm-bio-prison-v2.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-05-03 10:31:20 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2017-05-03 10:31:20 -0700
commitd35a878ae1c50977b55e352fd46e36e35add72a0 (patch)
tree7cd4e0ec418c6f3be365e56ee3c49bab218cd608 /drivers/md/dm-bio-prison-v2.c
parente5021876c91dc3894b2174cca8fa797f8e29e7b9 (diff)
parent390020ad2af9ca04844c4f3b1f299ad8746d84c8 (diff)
downloadlinux-d35a878ae1c50977b55e352fd46e36e35add72a0.tar.bz2
Merge tag 'for-4.12/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm
Pull device mapper updates from Mike Snitzer: - A major update for DM cache that reduces the latency for deciding whether blocks should migrate to/from the cache. The bio-prison-v2 interface supports this improvement by enabling direct dispatch of work to workqueues rather than having to delay the actual work dispatch to the DM cache core. So the dm-cache policies are much more nimble by being able to drive IO as they see fit. One immediate benefit from the improved latency is a cache that should be much more adaptive to changing workloads. - Add a new DM integrity target that emulates a block device that has additional per-sector tags that can be used for storing integrity information. - Add a new authenticated encryption feature to the DM crypt target that builds on the capabilities provided by the DM integrity target. - Add MD interface for switching the raid4/5/6 journal mode and update the DM raid target to use it to enable aid4/5/6 journal write-back support. - Switch the DM verity target over to using the asynchronous hash crypto API (this helps work better with architectures that have access to off-CPU algorithm providers, which should reduce CPU utilization). - Various request-based DM and DM multipath fixes and improvements from Bart and Christoph. - A DM thinp target fix for a bio structure leak that occurs for each discard IFF discard passdown is enabled. - A fix for a possible deadlock in DM bufio and a fix to re-check the new buffer allocation watermark in the face of competing admin changes to the 'max_cache_size_bytes' tunable. - A couple DM core cleanups. * tag 'for-4.12/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: (50 commits) dm bufio: check new buffer allocation watermark every 30 seconds dm bufio: avoid a possible ABBA deadlock dm mpath: make it easier to detect unintended I/O request flushes dm mpath: cleanup QUEUE_IF_NO_PATH bit manipulation by introducing assign_bit() dm mpath: micro-optimize the hot path relative to MPATHF_QUEUE_IF_NO_PATH dm: introduce enum dm_queue_mode to cleanup related code dm mpath: verify __pg_init_all_paths locking assumptions at runtime dm: verify suspend_locking assumptions at runtime dm block manager: remove an unused argument from dm_block_manager_create() dm rq: check blk_mq_register_dev() return value in dm_mq_init_request_queue() dm mpath: delay requeuing while path initialization is in progress dm mpath: avoid that path removal can trigger an infinite loop dm mpath: split and rename activate_path() to prepare for its expanded use dm ioctl: prevent stack leak in dm ioctl call dm integrity: use previously calculated log2 of sectors_per_block dm integrity: use hex2bin instead of open-coded variant dm crypt: replace custom implementation of hex2bin() dm crypt: remove obsolete references to per-CPU state dm verity: switch to using asynchronous hash crypto API dm crypt: use WQ_HIGHPRI for the IO and crypt workqueues ...
Diffstat (limited to 'drivers/md/dm-bio-prison-v2.c')
-rw-r--r--drivers/md/dm-bio-prison-v2.c369
1 files changed, 369 insertions, 0 deletions
diff --git a/drivers/md/dm-bio-prison-v2.c b/drivers/md/dm-bio-prison-v2.c
new file mode 100644
index 000000000000..c9b11f799cd8
--- /dev/null
+++ b/drivers/md/dm-bio-prison-v2.c
@@ -0,0 +1,369 @@
+/*
+ * Copyright (C) 2012-2017 Red Hat, Inc.
+ *
+ * This file is released under the GPL.
+ */
+
+#include "dm.h"
+#include "dm-bio-prison-v2.h"
+
+#include <linux/spinlock.h>
+#include <linux/mempool.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/rwsem.h>
+
+/*----------------------------------------------------------------*/
+
+#define MIN_CELLS 1024
+
+struct dm_bio_prison_v2 {
+ struct workqueue_struct *wq;
+
+ spinlock_t lock;
+ mempool_t *cell_pool;
+ struct rb_root cells;
+};
+
+static struct kmem_cache *_cell_cache;
+
+/*----------------------------------------------------------------*/
+
+/*
+ * @nr_cells should be the number of cells you want in use _concurrently_.
+ * Don't confuse it with the number of distinct keys.
+ */
+struct dm_bio_prison_v2 *dm_bio_prison_create_v2(struct workqueue_struct *wq)
+{
+ struct dm_bio_prison_v2 *prison = kmalloc(sizeof(*prison), GFP_KERNEL);
+
+ if (!prison)
+ return NULL;
+
+ prison->wq = wq;
+ spin_lock_init(&prison->lock);
+
+ prison->cell_pool = mempool_create_slab_pool(MIN_CELLS, _cell_cache);
+ if (!prison->cell_pool) {
+ kfree(prison);
+ return NULL;
+ }
+
+ prison->cells = RB_ROOT;
+
+ return prison;
+}
+EXPORT_SYMBOL_GPL(dm_bio_prison_create_v2);
+
+void dm_bio_prison_destroy_v2(struct dm_bio_prison_v2 *prison)
+{
+ mempool_destroy(prison->cell_pool);
+ kfree(prison);
+}
+EXPORT_SYMBOL_GPL(dm_bio_prison_destroy_v2);
+
+struct dm_bio_prison_cell_v2 *dm_bio_prison_alloc_cell_v2(struct dm_bio_prison_v2 *prison, gfp_t gfp)
+{
+ return mempool_alloc(prison->cell_pool, gfp);
+}
+EXPORT_SYMBOL_GPL(dm_bio_prison_alloc_cell_v2);
+
+void dm_bio_prison_free_cell_v2(struct dm_bio_prison_v2 *prison,
+ struct dm_bio_prison_cell_v2 *cell)
+{
+ mempool_free(cell, prison->cell_pool);
+}
+EXPORT_SYMBOL_GPL(dm_bio_prison_free_cell_v2);
+
+static void __setup_new_cell(struct dm_cell_key_v2 *key,
+ struct dm_bio_prison_cell_v2 *cell)
+{
+ memset(cell, 0, sizeof(*cell));
+ memcpy(&cell->key, key, sizeof(cell->key));
+ bio_list_init(&cell->bios);
+}
+
+static int cmp_keys(struct dm_cell_key_v2 *lhs,
+ struct dm_cell_key_v2 *rhs)
+{
+ if (lhs->virtual < rhs->virtual)
+ return -1;
+
+ if (lhs->virtual > rhs->virtual)
+ return 1;
+
+ if (lhs->dev < rhs->dev)
+ return -1;
+
+ if (lhs->dev > rhs->dev)
+ return 1;
+
+ if (lhs->block_end <= rhs->block_begin)
+ return -1;
+
+ if (lhs->block_begin >= rhs->block_end)
+ return 1;
+
+ return 0;
+}
+
+/*
+ * Returns true if node found, otherwise it inserts a new one.
+ */
+static bool __find_or_insert(struct dm_bio_prison_v2 *prison,
+ struct dm_cell_key_v2 *key,
+ struct dm_bio_prison_cell_v2 *cell_prealloc,
+ struct dm_bio_prison_cell_v2 **result)
+{
+ int r;
+ struct rb_node **new = &prison->cells.rb_node, *parent = NULL;
+
+ while (*new) {
+ struct dm_bio_prison_cell_v2 *cell =
+ container_of(*new, struct dm_bio_prison_cell_v2, node);
+
+ r = cmp_keys(key, &cell->key);
+
+ parent = *new;
+ if (r < 0)
+ new = &((*new)->rb_left);
+
+ else if (r > 0)
+ new = &((*new)->rb_right);
+
+ else {
+ *result = cell;
+ return true;
+ }
+ }
+
+ __setup_new_cell(key, cell_prealloc);
+ *result = cell_prealloc;
+ rb_link_node(&cell_prealloc->node, parent, new);
+ rb_insert_color(&cell_prealloc->node, &prison->cells);
+
+ return false;
+}
+
+static bool __get(struct dm_bio_prison_v2 *prison,
+ struct dm_cell_key_v2 *key,
+ unsigned lock_level,
+ struct bio *inmate,
+ struct dm_bio_prison_cell_v2 *cell_prealloc,
+ struct dm_bio_prison_cell_v2 **cell)
+{
+ if (__find_or_insert(prison, key, cell_prealloc, cell)) {
+ if ((*cell)->exclusive_lock) {
+ if (lock_level <= (*cell)->exclusive_level) {
+ bio_list_add(&(*cell)->bios, inmate);
+ return false;
+ }
+ }
+
+ (*cell)->shared_count++;
+
+ } else
+ (*cell)->shared_count = 1;
+
+ return true;
+}
+
+bool dm_cell_get_v2(struct dm_bio_prison_v2 *prison,
+ struct dm_cell_key_v2 *key,
+ unsigned lock_level,
+ struct bio *inmate,
+ struct dm_bio_prison_cell_v2 *cell_prealloc,
+ struct dm_bio_prison_cell_v2 **cell_result)
+{
+ int r;
+ unsigned long flags;
+
+ spin_lock_irqsave(&prison->lock, flags);
+ r = __get(prison, key, lock_level, inmate, cell_prealloc, cell_result);
+ spin_unlock_irqrestore(&prison->lock, flags);
+
+ return r;
+}
+EXPORT_SYMBOL_GPL(dm_cell_get_v2);
+
+static bool __put(struct dm_bio_prison_v2 *prison,
+ struct dm_bio_prison_cell_v2 *cell)
+{
+ BUG_ON(!cell->shared_count);
+ cell->shared_count--;
+
+ // FIXME: shared locks granted above the lock level could starve this
+ if (!cell->shared_count) {
+ if (cell->exclusive_lock){
+ if (cell->quiesce_continuation) {
+ queue_work(prison->wq, cell->quiesce_continuation);
+ cell->quiesce_continuation = NULL;
+ }
+ } else {
+ rb_erase(&cell->node, &prison->cells);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool dm_cell_put_v2(struct dm_bio_prison_v2 *prison,
+ struct dm_bio_prison_cell_v2 *cell)
+{
+ bool r;
+ unsigned long flags;
+
+ spin_lock_irqsave(&prison->lock, flags);
+ r = __put(prison, cell);
+ spin_unlock_irqrestore(&prison->lock, flags);
+
+ return r;
+}
+EXPORT_SYMBOL_GPL(dm_cell_put_v2);
+
+static int __lock(struct dm_bio_prison_v2 *prison,
+ struct dm_cell_key_v2 *key,
+ unsigned lock_level,
+ struct dm_bio_prison_cell_v2 *cell_prealloc,
+ struct dm_bio_prison_cell_v2 **cell_result)
+{
+ struct dm_bio_prison_cell_v2 *cell;
+
+ if (__find_or_insert(prison, key, cell_prealloc, &cell)) {
+ if (cell->exclusive_lock)
+ return -EBUSY;
+
+ cell->exclusive_lock = true;
+ cell->exclusive_level = lock_level;
+ *cell_result = cell;
+
+ // FIXME: we don't yet know what level these shared locks
+ // were taken at, so have to quiesce them all.
+ return cell->shared_count > 0;
+
+ } else {
+ cell = cell_prealloc;
+ cell->shared_count = 0;
+ cell->exclusive_lock = true;
+ cell->exclusive_level = lock_level;
+ *cell_result = cell;
+ }
+
+ return 0;
+}
+
+int dm_cell_lock_v2(struct dm_bio_prison_v2 *prison,
+ struct dm_cell_key_v2 *key,
+ unsigned lock_level,
+ struct dm_bio_prison_cell_v2 *cell_prealloc,
+ struct dm_bio_prison_cell_v2 **cell_result)
+{
+ int r;
+ unsigned long flags;
+
+ spin_lock_irqsave(&prison->lock, flags);
+ r = __lock(prison, key, lock_level, cell_prealloc, cell_result);
+ spin_unlock_irqrestore(&prison->lock, flags);
+
+ return r;
+}
+EXPORT_SYMBOL_GPL(dm_cell_lock_v2);
+
+static void __quiesce(struct dm_bio_prison_v2 *prison,
+ struct dm_bio_prison_cell_v2 *cell,
+ struct work_struct *continuation)
+{
+ if (!cell->shared_count)
+ queue_work(prison->wq, continuation);
+ else
+ cell->quiesce_continuation = continuation;
+}
+
+void dm_cell_quiesce_v2(struct dm_bio_prison_v2 *prison,
+ struct dm_bio_prison_cell_v2 *cell,
+ struct work_struct *continuation)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&prison->lock, flags);
+ __quiesce(prison, cell, continuation);
+ spin_unlock_irqrestore(&prison->lock, flags);
+}
+EXPORT_SYMBOL_GPL(dm_cell_quiesce_v2);
+
+static int __promote(struct dm_bio_prison_v2 *prison,
+ struct dm_bio_prison_cell_v2 *cell,
+ unsigned new_lock_level)
+{
+ if (!cell->exclusive_lock)
+ return -EINVAL;
+
+ cell->exclusive_level = new_lock_level;
+ return cell->shared_count > 0;
+}
+
+int dm_cell_lock_promote_v2(struct dm_bio_prison_v2 *prison,
+ struct dm_bio_prison_cell_v2 *cell,
+ unsigned new_lock_level)
+{
+ int r;
+ unsigned long flags;
+
+ spin_lock_irqsave(&prison->lock, flags);
+ r = __promote(prison, cell, new_lock_level);
+ spin_unlock_irqrestore(&prison->lock, flags);
+
+ return r;
+}
+EXPORT_SYMBOL_GPL(dm_cell_lock_promote_v2);
+
+static bool __unlock(struct dm_bio_prison_v2 *prison,
+ struct dm_bio_prison_cell_v2 *cell,
+ struct bio_list *bios)
+{
+ BUG_ON(!cell->exclusive_lock);
+
+ bio_list_merge(bios, &cell->bios);
+ bio_list_init(&cell->bios);
+
+ if (cell->shared_count) {
+ cell->exclusive_lock = 0;
+ return false;
+ }
+
+ rb_erase(&cell->node, &prison->cells);
+ return true;
+}
+
+bool dm_cell_unlock_v2(struct dm_bio_prison_v2 *prison,
+ struct dm_bio_prison_cell_v2 *cell,
+ struct bio_list *bios)
+{
+ bool r;
+ unsigned long flags;
+
+ spin_lock_irqsave(&prison->lock, flags);
+ r = __unlock(prison, cell, bios);
+ spin_unlock_irqrestore(&prison->lock, flags);
+
+ return r;
+}
+EXPORT_SYMBOL_GPL(dm_cell_unlock_v2);
+
+/*----------------------------------------------------------------*/
+
+int __init dm_bio_prison_init_v2(void)
+{
+ _cell_cache = KMEM_CACHE(dm_bio_prison_cell_v2, 0);
+ if (!_cell_cache)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void dm_bio_prison_exit_v2(void)
+{
+ kmem_cache_destroy(_cell_cache);
+ _cell_cache = NULL;
+}