From c7ac86de6a1bcb1b59c83e19b0d0d64a59604ade Mon Sep 17 00:00:00 2001
From: "Bryn M. Reeves" <breeves@redhat.com>
Date: Fri, 19 Oct 2007 22:29:32 +0100
Subject: dm mpath: rdac fix init race

Re-order the initialisation of dm-rdac to avoid registering the hw
handler before the workqueue has been initialised. Closes a race
that would potentially give an oops.

Signed-off-by: Bryn M. Reeves <breeves@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-mpath-rdac.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/drivers/md/dm-mpath-rdac.c b/drivers/md/dm-mpath-rdac.c
index 16b161345775..e04eb5c697fb 100644
--- a/drivers/md/dm-mpath-rdac.c
+++ b/drivers/md/dm-mpath-rdac.c
@@ -664,20 +664,21 @@ static struct hw_handler_type rdac_handler = {
 
 static int __init rdac_init(void)
 {
-	int r = dm_register_hw_handler(&rdac_handler);
-
-	if (r < 0) {
-		DMERR("%s: register failed %d", RDAC_DM_HWH_NAME, r);
-		return r;
-	}
+	int r;
 
 	rdac_wkqd = create_singlethread_workqueue("rdac_wkqd");
 	if (!rdac_wkqd) {
 		DMERR("Failed to create workqueue rdac_wkqd.");
-		dm_unregister_hw_handler(&rdac_handler);
 		return -ENOMEM;
 	}
 
+	r = dm_register_hw_handler(&rdac_handler);
+	if (r < 0) {
+		DMERR("%s: register failed %d", RDAC_DM_HWH_NAME, r);
+		destroy_workqueue(rdac_wkqd);
+		return r;
+	}
+
 	DMINFO("%s: version %s loaded", RDAC_DM_HWH_NAME, RDAC_DM_HWH_VER);
 	return 0;
 }
-- 
cgit v1.2.3


From 027d50f92ea26fd065aeb141ebfcbbbe010825e3 Mon Sep 17 00:00:00 2001
From: Milan Broz <mbroz@redhat.com>
Date: Fri, 19 Oct 2007 22:38:36 +0100
Subject: dm io:ctl use constant struct size

Make size of dm_ioctl struct always 312 bytes on all supported
architectures.

This change retains compatibility with already-compiled code because
it uses an embedded offset to locate the payload that follows the
structure.

On 64-bit architectures there is no change at all; on 32-bit
we are increasing the size of dm-ioctl from 308 to 312 bytes.

Currently with 32-bit userspace / 64-bit kernel on x86_64
some ioctls (including rename, message) are incorrectly rejected
by the comparison against 'param + 1'.  This breaks userspace
lvrename and multipath 'fail_if_no_path' changes, for example.

(BTW Device-mapper uses its own versioning and ignores the ioctl
size bits.  Only the generic ioctl compat code on mixed arches
checks them, and that will continue to accept both sizes for now,
but we intend to list 308 as deprecated and eventually remove it.)

Signed-off-by: Milan Broz <mbroz@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
Cc: Guido Guenther <agx@sigxcpu.org>
Cc: Kevin Corry <kevcorry@us.ibm.com>
Cc: stable@kernel.org
---
 drivers/md/dm-ioctl.c    | 6 +++---
 include/linux/dm-ioctl.h | 5 +++--
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index b441d82c338a..ba5d8f7cc5f2 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -700,7 +700,7 @@ static int dev_rename(struct dm_ioctl *param, size_t param_size)
 	int r;
 	char *new_name = (char *) param + param->data_start;
 
-	if (new_name < (char *) (param + 1) ||
+	if (new_name < (char *) param->data ||
 	    invalid_str(new_name, (void *) param + param_size)) {
 		DMWARN("Invalid new logical volume name supplied.");
 		return -EINVAL;
@@ -726,7 +726,7 @@ static int dev_set_geometry(struct dm_ioctl *param, size_t param_size)
 	if (!md)
 		return -ENXIO;
 
-	if (geostr < (char *) (param + 1) ||
+	if (geostr < (char *) param->data ||
 	    invalid_str(geostr, (void *) param + param_size)) {
 		DMWARN("Invalid geometry supplied.");
 		goto out;
@@ -1233,7 +1233,7 @@ static int target_message(struct dm_ioctl *param, size_t param_size)
 	if (r)
 		goto out;
 
-	if (tmsg < (struct dm_target_msg *) (param + 1) ||
+	if (tmsg < (struct dm_target_msg *) param->data ||
 	    invalid_str(tmsg->message, (void *) param + param_size)) {
 		DMWARN("Invalid target message parameters.");
 		r = -EINVAL;
diff --git a/include/linux/dm-ioctl.h b/include/linux/dm-ioctl.h
index b93486107821..523281c5b7f5 100644
--- a/include/linux/dm-ioctl.h
+++ b/include/linux/dm-ioctl.h
@@ -131,6 +131,7 @@ struct dm_ioctl {
 	char name[DM_NAME_LEN];	/* device name */
 	char uuid[DM_UUID_LEN];	/* unique identifier for
 				 * the block device */
+	char data[7];		/* padding or data */
 };
 
 /*
@@ -285,9 +286,9 @@ typedef char ioctl_struct[308];
 #define DM_DEV_SET_GEOMETRY	_IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl)
 
 #define DM_VERSION_MAJOR	4
-#define DM_VERSION_MINOR	11
+#define DM_VERSION_MINOR	12
 #define DM_VERSION_PATCHLEVEL	0
-#define DM_VERSION_EXTRA	"-ioctl (2006-10-12)"
+#define DM_VERSION_EXTRA	"-ioctl (2007-10-02)"
 
 /* Status bits */
 #define DM_READONLY_FLAG	(1 << 0) /* In/Out */
-- 
cgit v1.2.3


From 55b42c5ae9c048de25233434afc7b71b01bee9e6 Mon Sep 17 00:00:00 2001
From: Dmitry Monakhov <dmonakhov@openvz.org>
Date: Fri, 19 Oct 2007 22:38:37 +0100
Subject: dm crypt: drop device ref in ctr error path

Add a missing 'dm_put_device' in an error path in crypt target constructor.

Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
Signed-off-by: Milan Broz <mbroz@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-crypt.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 64fee90bb68b..ecd9ef0418cd 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -861,7 +861,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 		cc->iv_mode = kmalloc(strlen(ivmode) + 1, GFP_KERNEL);
 		if (!cc->iv_mode) {
 			ti->error = "Error kmallocing iv_mode string";
-			goto bad5;
+			goto bad_iv_mode;
 		}
 		strcpy(cc->iv_mode, ivmode);
 	} else
@@ -870,6 +870,8 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	ti->private = cc;
 	return 0;
 
+bad_iv_mode:
+	dm_put_device(ti, cc->dev);
 bad5:
 	bioset_free(cc->bs);
 bad_bs:
-- 
cgit v1.2.3


From 815f9e32709b014a459919176a4f0feebd42731e Mon Sep 17 00:00:00 2001
From: Dmitry Monakhov <dmonakhov@openvz.org>
Date: Fri, 19 Oct 2007 22:38:38 +0100
Subject: dm crypt: missing kfree in ctr error path

Insert missing kfree() in crypt_iv_essiv_ctr() error path.

Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-crypt.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index ecd9ef0418cd..3fa3b24947da 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -175,6 +175,7 @@ static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti,
 
 	if (err) {
 		ti->error = "Error calculating hash in ESSIV";
+		kfree(salt);
 		return err;
 	}
 
-- 
cgit v1.2.3


From a72cf737e09da409e047863e38410930dae5fe05 Mon Sep 17 00:00:00 2001
From: Dmitry Monakhov <dmonakhov@openvz.org>
Date: Fri, 19 Oct 2007 22:38:39 +0100
Subject: dm raid1: fix leakage

Add missing 'dm_io_client_destroy' to alloc_context error path.
Reorganize mirror constructor error path in order to prevent
workqueue leakage.

Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-raid1.c | 28 +++++++++++++++-------------
 1 file changed, 15 insertions(+), 13 deletions(-)

diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index d09ff15490a5..6f60f307fae2 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -974,6 +974,7 @@ static struct mirror_set *alloc_context(unsigned int nr_mirrors,
 
 	if (rh_init(&ms->rh, ms, dl, region_size, ms->nr_regions)) {
 		ti->error = "Error creating dirty region hash";
+		dm_io_client_destroy(ms->io_client);
 		kfree(ms);
 		return NULL;
 	}
@@ -1163,16 +1164,14 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	ms->kmirrord_wq = create_singlethread_workqueue("kmirrord");
 	if (!ms->kmirrord_wq) {
 		DMERR("couldn't start kmirrord");
-		free_context(ms, ti, m);
-		return -ENOMEM;
+		r = -ENOMEM;
+		goto err_free_context;
 	}
 	INIT_WORK(&ms->kmirrord_work, do_mirror);
 
 	r = parse_features(ms, argc, argv, &args_used);
-	if (r) {
-		free_context(ms, ti, ms->nr_mirrors);
-		return r;
-	}
+	if (r)
+		goto err_destroy_wq;
 
 	argv += args_used;
 	argc -= args_used;
@@ -1188,19 +1187,22 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 
 	if (argc) {
 		ti->error = "Too many mirror arguments";
-		free_context(ms, ti, ms->nr_mirrors);
-		return -EINVAL;
+		r = -EINVAL;
+		goto err_destroy_wq;
 	}
 
 	r = kcopyd_client_create(DM_IO_PAGES, &ms->kcopyd_client);
-	if (r) {
-		destroy_workqueue(ms->kmirrord_wq);
-		free_context(ms, ti, ms->nr_mirrors);
-		return r;
-	}
+	if (r)
+		goto err_destroy_wq;
 
 	wake(ms);
 	return 0;
+
+err_destroy_wq:
+	destroy_workqueue(ms->kmirrord_wq);
+err_free_context:
+	free_context(ms, ti, ms->nr_mirrors);
+	return r;
 }
 
 static void mirror_dtr(struct dm_target *ti)
-- 
cgit v1.2.3


From 2e64a0f92830791f160274c46aef3678fabcf044 Mon Sep 17 00:00:00 2001
From: Dmitry Monakhov <dmonakhov@openvz.org>
Date: Fri, 19 Oct 2007 22:38:41 +0100
Subject: dm delay: fix ctr error paths

Add missing 'dm_put_device' to dm-delay target constructor.

Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
Signed-off-by: Milan Broz <mbroz@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-delay.c | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c
index 6928c136d3c5..0fd1f7c2e162 100644
--- a/drivers/md/dm-delay.c
+++ b/drivers/md/dm-delay.c
@@ -163,34 +163,32 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 		goto bad;
 	}
 
-	if (argc == 3) {
-		dc->dev_write = NULL;
+	dc->dev_write = NULL;
+	if (argc == 3)
 		goto out;
-	}
 
 	if (sscanf(argv[4], "%llu", &tmpll) != 1) {
 		ti->error = "Invalid write device sector";
-		goto bad;
+		goto bad_dev_read;
 	}
 	dc->start_write = tmpll;
 
 	if (sscanf(argv[5], "%u", &dc->write_delay) != 1) {
 		ti->error = "Invalid write delay";
-		goto bad;
+		goto bad_dev_read;
 	}
 
 	if (dm_get_device(ti, argv[3], dc->start_write, ti->len,
 			  dm_table_get_mode(ti->table), &dc->dev_write)) {
 		ti->error = "Write device lookup failed";
-		dm_put_device(ti, dc->dev_read);
-		goto bad;
+		goto bad_dev_read;
 	}
 
 out:
 	dc->delayed_pool = mempool_create_slab_pool(128, delayed_cache);
 	if (!dc->delayed_pool) {
 		DMERR("Couldn't create delayed bio pool.");
-		goto bad;
+		goto bad_dev_write;
 	}
 
 	setup_timer(&dc->delay_timer, handle_delayed_timer, (unsigned long)dc);
@@ -203,6 +201,11 @@ out:
 	ti->private = dc;
 	return 0;
 
+bad_dev_write:
+	if (dc->dev_write)
+		dm_put_device(ti, dc->dev_write);
+bad_dev_read:
+	dm_put_device(ti, dc->dev_read);
 bad:
 	kfree(dc);
 	return -EINVAL;
-- 
cgit v1.2.3


From 79662d1ea37392651f2cff08626cab6a40ba3adc Mon Sep 17 00:00:00 2001
From: Milan Broz <mbroz@redhat.com>
Date: Fri, 19 Oct 2007 22:38:42 +0100
Subject: dm delay: fix status

Fix missing space in dm-delay target status output
if separate read and write delay are configured.

Signed-off-by: Milan Broz <mbroz@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-delay.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c
index 0fd1f7c2e162..719f871bbd56 100644
--- a/drivers/md/dm-delay.c
+++ b/drivers/md/dm-delay.c
@@ -308,7 +308,7 @@ static int delay_status(struct dm_target *ti, status_type_t type,
 		       (unsigned long long) dc->start_read,
 		       dc->read_delay);
 		if (dc->dev_write)
-			DMEMIT("%s %llu %u", dc->dev_write->name,
+			DMEMIT(" %s %llu %u", dc->dev_write->name,
 			       (unsigned long long) dc->start_write,
 			       dc->write_delay);
 		break;
-- 
cgit v1.2.3


From ae9da83f6d800fe1f3b23bfbc8f7222ad1c5bb74 Mon Sep 17 00:00:00 2001
From: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Date: Fri, 19 Oct 2007 22:38:43 +0100
Subject: dm: fix thaw_bdev

This patch fixes a bd_mount_sem counter corruption bug in device-mapper.

thaw_bdev() should be called only when freeze_bdev() was called for the
device.
Otherwise, thaw_bdev() will up bd_mount_sem and corrupt the semaphore counter.
struct block_device with the corrupted semaphore may remain in slab cache
and be reused later.

Attached patch will fix it by calling unlock_fs() instead.
unlock_fs() will determine whether it should call thaw_bdev()
by checking the device is frozen or not.

Easy reproducer is:
  #!/bin/sh
  while [ 1 ]; do
     dmsetup --notable create a
     dmsetup --nolockfs suspend a
     dmsetup remove a
  done

It's not easy to see the effect of corrupted semaphore.
So I have tested with putting printk below in bdev_alloc_inode():
        if (atomic_read(&ei->bdev.bd_mount_sem.count) != 1)
                printk(KERN_DEBUG "Incorrect semaphore count = %d (%p)\n",
                        atomic_read(&ei->bdev.bd_mount_sem.count),
                        &ei->bdev);

Without the patch, I saw something like:
 Incorrect semaphore count = 17 (f2ab91c0)

With the patch, the message didn't appear.

The bug was introduced in 2.6.16 with this bug fix:

commit d9dde59ba03095e526640988c0fedd75e93bc8b7
Date:   Fri Feb 24 13:04:24 2006 -0800

    [PATCH] dm: missing bdput/thaw_bdev at removal

    Need to unfreeze and release bdev otherwise the bdev inode with
    inconsistent state is reused later and cause problem.

and backported to 2.6.15.5.

It occurs only in free_dev(), which is called only when the dm device is
removed.  The buggy code is executed only if md->suspended_bdev is
non-NULL and that can happen only when the device was suspended without
noflush.

Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
Cc: stable@kernel.org
---
 drivers/md/dm.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index d837d37f6209..7cb61ab887a2 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1044,12 +1044,14 @@ static struct mapped_device *alloc_dev(int minor)
 	return NULL;
 }
 
+static void unlock_fs(struct mapped_device *md);
+
 static void free_dev(struct mapped_device *md)
 {
 	int minor = md->disk->first_minor;
 
 	if (md->suspended_bdev) {
-		thaw_bdev(md->suspended_bdev, NULL);
+		unlock_fs(md);
 		bdput(md->suspended_bdev);
 	}
 	mempool_destroy(md->tio_pool);
-- 
cgit v1.2.3


From 6f3c3f0afa50782dc1742c968646c491657d255a Mon Sep 17 00:00:00 2001
From: vignesh babu <vignesh.babu@wipro.com>
Date: Fri, 19 Oct 2007 22:38:44 +0100
Subject: dm: use is_power_of_2

Replacing n & (n - 1) for power of 2 check by is_power_of_2(n)

Signed-off-by: vignesh babu <vignesh.babu@wipro.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-raid1.c  | 3 ++-
 drivers/md/dm-snap.c   | 3 ++-
 drivers/md/dm-stripe.c | 3 ++-
 3 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 6f60f307fae2..2b2ca371e20b 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -19,6 +19,7 @@
 #include <linux/time.h>
 #include <linux/vmalloc.h>
 #include <linux/workqueue.h>
+#include <linux/log2.h>
 
 #define DM_MSG_PREFIX "raid1"
 #define DM_IO_PAGES 64
@@ -995,7 +996,7 @@ static void free_context(struct mirror_set *ms, struct dm_target *ti,
 
 static inline int _check_region_size(struct dm_target *ti, uint32_t size)
 {
-	return !(size % (PAGE_SIZE >> 9) || (size & (size - 1)) ||
+	return !(size % (PAGE_SIZE >> 9) || !is_power_of_2(size) ||
 		 size > ti->len);
 }
 
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 98a633f3d6b0..cee16fadd9ee 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -17,6 +17,7 @@
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
+#include <linux/log2.h>
 
 #include "dm-snap.h"
 #include "dm-bio-list.h"
@@ -415,7 +416,7 @@ static int set_chunk_size(struct dm_snapshot *s, const char *chunk_size_arg,
 	chunk_size = round_up(chunk_size, PAGE_SIZE >> 9);
 
 	/* Check chunk_size is a power of 2 */
-	if (chunk_size & (chunk_size - 1)) {
+	if (!is_power_of_2(chunk_size)) {
 		*error = "Chunk size is not a power of 2";
 		return -EINVAL;
 	}
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index 51f5e0760012..969944a8aba2 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -11,6 +11,7 @@
 #include <linux/blkdev.h>
 #include <linux/bio.h>
 #include <linux/slab.h>
+#include <linux/log2.h>
 
 #define DM_MSG_PREFIX "striped"
 
@@ -99,7 +100,7 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	/*
 	 * chunk_size is a power of two
 	 */
-	if (!chunk_size || (chunk_size & (chunk_size - 1)) ||
+	if (!is_power_of_2(chunk_size) ||
 	    (chunk_size < (PAGE_SIZE >> SECTOR_SHIFT))) {
 		ti->error = "Invalid chunk size";
 		return -EINVAL;
-- 
cgit v1.2.3


From 094262db9e4c615e0db7a7b924d244b7a6c186b0 Mon Sep 17 00:00:00 2001
From: Dmitry Monakhov <dmonakhov@openvz.org>
Date: Fri, 19 Oct 2007 22:38:51 +0100
Subject: dm: use kzalloc

Convert kmalloc() + memset() to kzalloc().

Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-emc.c           | 6 ++----
 drivers/md/dm-hw-handler.c    | 6 ++----
 drivers/md/dm-path-selector.c | 6 ++----
 drivers/md/dm-table.c         | 3 +--
 drivers/md/dm-target.c        | 6 ++----
 5 files changed, 9 insertions(+), 18 deletions(-)

diff --git a/drivers/md/dm-emc.c b/drivers/md/dm-emc.c
index 342517261ece..ed28fe2072f0 100644
--- a/drivers/md/dm-emc.c
+++ b/drivers/md/dm-emc.c
@@ -211,12 +211,10 @@ fail_path:
 
 static struct emc_handler *alloc_emc_handler(void)
 {
-	struct emc_handler *h = kmalloc(sizeof(*h), GFP_KERNEL);
+	struct emc_handler *h = kzalloc(sizeof(*h), GFP_KERNEL);
 
-	if (h) {
-		memset(h, 0, sizeof(*h));
+	if (h)
 		spin_lock_init(&h->lock);
-	}
 
 	return h;
 }
diff --git a/drivers/md/dm-hw-handler.c b/drivers/md/dm-hw-handler.c
index baafaaba4d4b..2ee84d8aa0bf 100644
--- a/drivers/md/dm-hw-handler.c
+++ b/drivers/md/dm-hw-handler.c
@@ -91,12 +91,10 @@ void dm_put_hw_handler(struct hw_handler_type *hwht)
 
 static struct hwh_internal *_alloc_hw_handler(struct hw_handler_type *hwht)
 {
-	struct hwh_internal *hwhi = kmalloc(sizeof(*hwhi), GFP_KERNEL);
+	struct hwh_internal *hwhi = kzalloc(sizeof(*hwhi), GFP_KERNEL);
 
-	if (hwhi) {
-		memset(hwhi, 0, sizeof(*hwhi));
+	if (hwhi)
 		hwhi->hwht = *hwht;
-	}
 
 	return hwhi;
 }
diff --git a/drivers/md/dm-path-selector.c b/drivers/md/dm-path-selector.c
index f10a0c89b3f4..ca1bb636a3e4 100644
--- a/drivers/md/dm-path-selector.c
+++ b/drivers/md/dm-path-selector.c
@@ -94,12 +94,10 @@ out:
 
 static struct ps_internal *_alloc_path_selector(struct path_selector_type *pst)
 {
-	struct ps_internal *psi = kmalloc(sizeof(*psi), GFP_KERNEL);
+	struct ps_internal *psi = kzalloc(sizeof(*psi), GFP_KERNEL);
 
-	if (psi) {
-		memset(psi, 0, sizeof(*psi));
+	if (psi)
 		psi->pst = *pst;
-	}
 
 	return psi;
 }
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index fbe477bb2c68..8939e6105088 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -213,12 +213,11 @@ static int alloc_targets(struct dm_table *t, unsigned int num)
 int dm_table_create(struct dm_table **result, int mode,
 		    unsigned num_targets, struct mapped_device *md)
 {
-	struct dm_table *t = kmalloc(sizeof(*t), GFP_KERNEL);
+	struct dm_table *t = kzalloc(sizeof(*t), GFP_KERNEL);
 
 	if (!t)
 		return -ENOMEM;
 
-	memset(t, 0, sizeof(*t));
 	INIT_LIST_HEAD(&t->devices);
 	atomic_set(&t->holders, 1);
 
diff --git a/drivers/md/dm-target.c b/drivers/md/dm-target.c
index 477a041a41cf..835cf95b857f 100644
--- a/drivers/md/dm-target.c
+++ b/drivers/md/dm-target.c
@@ -88,12 +88,10 @@ void dm_put_target_type(struct target_type *t)
 
 static struct tt_internal *alloc_target(struct target_type *t)
 {
-	struct tt_internal *ti = kmalloc(sizeof(*ti), GFP_KERNEL);
+	struct tt_internal *ti = kzalloc(sizeof(*ti), GFP_KERNEL);
 
-	if (ti) {
-		memset(ti, 0, sizeof(*ti));
+	if (ti)
 		ti->tt = *t;
-	}
 
 	return ti;
 }
-- 
cgit v1.2.3


From def5b5b26e6d97820465fadcb7d1cb73cc4daf33 Mon Sep 17 00:00:00 2001
From: Matthias Kaehlcke <matthias.kaehlcke@gmail.com>
Date: Fri, 19 Oct 2007 22:38:52 +0100
Subject: kcopyd use mutex instead of semaphore

Kcopyd uses a semaphore as mutex.  Use the mutex API instead of the (binary)
semaphore,

Signed-off-by: Matthias Kaehlcke <matthias.kaehlcke@gmail.com>
Cc: Neil Brown <neilb@suse.de>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 drivers/md/kcopyd.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/md/kcopyd.c b/drivers/md/kcopyd.c
index 7e052378c47e..f3831f31223e 100644
--- a/drivers/md/kcopyd.c
+++ b/drivers/md/kcopyd.c
@@ -198,7 +198,7 @@ struct kcopyd_job {
 	 * These fields are only used if the job has been split
 	 * into more manageable parts.
 	 */
-	struct semaphore lock;
+	struct mutex lock;
 	atomic_t sub_jobs;
 	sector_t progress;
 };
@@ -456,7 +456,7 @@ static void segment_complete(int read_err,
 	sector_t count = 0;
 	struct kcopyd_job *job = (struct kcopyd_job *) context;
 
-	down(&job->lock);
+	mutex_lock(&job->lock);
 
 	/* update the error */
 	if (read_err)
@@ -480,7 +480,7 @@ static void segment_complete(int read_err,
 			job->progress += count;
 		}
 	}
-	up(&job->lock);
+	mutex_unlock(&job->lock);
 
 	if (count) {
 		int i;
@@ -562,7 +562,7 @@ int kcopyd_copy(struct kcopyd_client *kc, struct io_region *from,
 		dispatch_job(job);
 
 	else {
-		init_MUTEX(&job->lock);
+		mutex_init(&job->lock);
 		job->progress = 0;
 		split_job(job);
 	}
-- 
cgit v1.2.3


From 9e4e5f87ebcadb7ad9aca640bbe1038e1545e9f8 Mon Sep 17 00:00:00 2001
From: Milan Broz <mbroz@redhat.com>
Date: Fri, 19 Oct 2007 22:38:53 +0100
Subject: dm: tidy bio_io_error usage

Use bio_io_error() in only two places and tidy the code,
preparing for later patches.

There is no functional change in this patch.

Signed-off-by: Milan Broz <mbroz@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm.c | 37 ++++++++++++++++++-------------------
 1 file changed, 18 insertions(+), 19 deletions(-)

diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 7cb61ab887a2..41c9549b32ad 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -751,15 +751,13 @@ static void __clone_and_map(struct clone_info *ci)
 /*
  * Split the bio into several clones.
  */
-static void __split_bio(struct mapped_device *md, struct bio *bio)
+static int __split_bio(struct mapped_device *md, struct bio *bio)
 {
 	struct clone_info ci;
 
 	ci.map = dm_get_table(md);
-	if (!ci.map) {
-		bio_io_error(bio);
-		return;
-	}
+	if (unlikely(!ci.map))
+		return -EIO;
 
 	ci.md = md;
 	ci.bio = bio;
@@ -779,6 +777,8 @@ static void __split_bio(struct mapped_device *md, struct bio *bio)
 	/* drop the extra reference count */
 	dec_pending(ci.io, 0);
 	dm_table_put(ci.map);
+
+	return 0;
 }
 /*-----------------------------------------------------------------
  * CRUD END
@@ -790,7 +790,7 @@ static void __split_bio(struct mapped_device *md, struct bio *bio)
  */
 static int dm_request(struct request_queue *q, struct bio *bio)
 {
-	int r;
+	int r = -EIO;
 	int rw = bio_data_dir(bio);
 	struct mapped_device *md = q->queuedata;
 
@@ -815,18 +815,11 @@ static int dm_request(struct request_queue *q, struct bio *bio)
 	while (test_bit(DMF_BLOCK_IO, &md->flags)) {
 		up_read(&md->io_lock);
 
-		if (bio_rw(bio) == READA) {
-			bio_io_error(bio);
-			return 0;
-		}
-
-		r = queue_io(md, bio);
-		if (r < 0) {
-			bio_io_error(bio);
-			return 0;
+		if (bio_rw(bio) != READA)
+			r = queue_io(md, bio);
 
-		} else if (r == 0)
-			return 0;	/* deferred successfully */
+		if (r <= 0)
+			goto out_req;
 
 		/*
 		 * We're in a while loop, because someone could suspend
@@ -835,8 +828,13 @@ static int dm_request(struct request_queue *q, struct bio *bio)
 		down_read(&md->io_lock);
 	}
 
-	__split_bio(md, bio);
+	r = __split_bio(md, bio);
 	up_read(&md->io_lock);
+
+out_req:
+	if (r < 0)
+		bio_io_error(bio);
+
 	return 0;
 }
 
@@ -1235,7 +1233,8 @@ static void __flush_deferred_io(struct mapped_device *md, struct bio *c)
 	while (c) {
 		n = c->bi_next;
 		c->bi_next = NULL;
-		__split_bio(md, c);
+		if (__split_bio(md, c))
+			bio_io_error(c);
 		c = n;
 	}
 }
-- 
cgit v1.2.3


From bb56acf840600421e68f49bb037d1c659fcb37f8 Mon Sep 17 00:00:00 2001
From: Jesper Juhl <jesper.juhl@gmail.com>
Date: Fri, 19 Oct 2007 22:38:54 +0100
Subject: dm io:ctl remove vmalloc void cast

In drivers/md/dm-ioctl.c::copy_params() there's a call to vmalloc()
where we currently cast the return value, but that's pretty pointless
given that vmalloc() returns "void *".

Signed-off-by: Jesper Juhl <jesper.juhl@gmail.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-ioctl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index ba5d8f7cc5f2..6af95f9b7b52 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -1358,7 +1358,7 @@ static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl **param)
 	if (tmp.data_size < sizeof(tmp))
 		return -EINVAL;
 
-	dmi = (struct dm_ioctl *) vmalloc(tmp.data_size);
+	dmi = vmalloc(tmp.data_size);
 	if (!dmi)
 		return -ENOMEM;
 
-- 
cgit v1.2.3


From 051814c69fac92f1964739c0cf08b5e3b1156b04 Mon Sep 17 00:00:00 2001
From: Alasdair G Kergon <agk@redhat.com>
Date: Fri, 19 Oct 2007 22:38:55 +0100
Subject: dm: bio_list macro renaming

Remove BIO_LIST and DEFINE_BIO_LIST macros that gain us nothing
since contents are initialised to NULL.

Cc: Jan Engelhardt <jengelh@linux01.gwdg.de>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-bio-list.h | 5 -----
 drivers/md/dm-delay.c    | 2 +-
 2 files changed, 1 insertion(+), 6 deletions(-)

diff --git a/drivers/md/dm-bio-list.h b/drivers/md/dm-bio-list.h
index 3f7b827649e3..d4509be0fe67 100644
--- a/drivers/md/dm-bio-list.h
+++ b/drivers/md/dm-bio-list.h
@@ -21,11 +21,6 @@ static inline int bio_list_empty(const struct bio_list *bl)
 	return bl->head == NULL;
 }
 
-#define BIO_LIST_INIT { .head = NULL, .tail = NULL }
-
-#define BIO_LIST(bl) \
-	struct bio_list bl = BIO_LIST_INIT
-
 static inline void bio_list_init(struct bio_list *bl)
 {
 	bl->head = bl->tail = NULL;
diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c
index 719f871bbd56..bdd37f881c42 100644
--- a/drivers/md/dm-delay.c
+++ b/drivers/md/dm-delay.c
@@ -83,7 +83,7 @@ static struct bio *flush_delayed_bios(struct delay_c *dc, int flush_all)
 	struct dm_delay_info *delayed, *next;
 	unsigned long next_expires = 0;
 	int start_timer = 0;
-	BIO_LIST(flush_bios);
+	struct bio_list flush_bios = { };
 
 	mutex_lock(&delayed_bios_lock);
 	list_for_each_entry_safe(delayed, next, &dc->delayed_bios, list) {
-- 
cgit v1.2.3


From d336416ff1e6f7715f6dcb3b4e3e60626e406dd0 Mon Sep 17 00:00:00 2001
From: Alasdair G Kergon <agk@redhat.com>
Date: Fri, 19 Oct 2007 22:38:56 +0100
Subject: dm mpath: emc fix an error message

Correct an error message, reported by Michael Wood <michael@frogfoot.com>.

Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-emc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/md/dm-emc.c b/drivers/md/dm-emc.c
index ed28fe2072f0..6b91b9ab1d41 100644
--- a/drivers/md/dm-emc.c
+++ b/drivers/md/dm-emc.c
@@ -81,7 +81,7 @@ static struct bio *get_failover_bio(struct dm_path *path, unsigned data_size)
 	}
 
 	if (bio_add_page(bio, page, data_size, 0) != data_size) {
-		DMERR("get_failover_bio: alloc_page() failed.");
+		DMERR("get_failover_bio: bio_add_page() failed.");
 		__free_page(page);
 		bio_put(bio);
 		return NULL;
-- 
cgit v1.2.3


From 9934a8bea2fc67e6f07d74304eca2a91d251bfe8 Mon Sep 17 00:00:00 2001
From: Milan Broz <mbroz@redhat.com>
Date: Fri, 19 Oct 2007 22:38:57 +0100
Subject: dm crypt: use per device singlethread workqueues

Use a separate single-threaded workqueue for each crypt device
instead of one global workqueue.

Signed-off-by: Milan Broz <mbroz@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-crypt.c | 32 ++++++++++++++------------------
 1 file changed, 14 insertions(+), 18 deletions(-)

diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 3fa3b24947da..126ed21e6b17 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -80,6 +80,7 @@ struct crypt_config {
 	mempool_t *page_pool;
 	struct bio_set *bs;
 
+	struct workqueue_struct *queue;
 	/*
 	 * crypto related data
 	 */
@@ -480,13 +481,14 @@ static void dec_pending(struct dm_crypt_io *io, int error)
  * Needed because it would be very unwise to do decryption in an
  * interrupt context.
  */
-static struct workqueue_struct *_kcryptd_workqueue;
 static void kcryptd_do_work(struct work_struct *work);
 
 static void kcryptd_queue_io(struct dm_crypt_io *io)
 {
+	struct crypt_config *cc = io->target->private;
+
 	INIT_WORK(&io->work, kcryptd_do_work);
-	queue_work(_kcryptd_workqueue, &io->work);
+	queue_work(cc->queue, &io->work);
 }
 
 static void crypt_endio(struct bio *clone, int error)
@@ -868,9 +870,17 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	} else
 		cc->iv_mode = NULL;
 
+	cc->queue = create_singlethread_workqueue("kcryptd");
+	if (!cc->queue) {
+		ti->error = "Couldn't create kcryptd queue";
+		goto bad_queue;
+	}
+
 	ti->private = cc;
 	return 0;
 
+bad_queue:
+	kfree(cc->iv_mode);
 bad_iv_mode:
 	dm_put_device(ti, cc->dev);
 bad5:
@@ -895,7 +905,7 @@ static void crypt_dtr(struct dm_target *ti)
 {
 	struct crypt_config *cc = (struct crypt_config *) ti->private;
 
-	flush_workqueue(_kcryptd_workqueue);
+	destroy_workqueue(cc->queue);
 
 	bioset_free(cc->bs);
 	mempool_destroy(cc->page_pool);
@@ -1040,25 +1050,12 @@ static int __init dm_crypt_init(void)
 	if (!_crypt_io_pool)
 		return -ENOMEM;
 
-	_kcryptd_workqueue = create_workqueue("kcryptd");
-	if (!_kcryptd_workqueue) {
-		r = -ENOMEM;
-		DMERR("couldn't create kcryptd");
-		goto bad1;
-	}
-
 	r = dm_register_target(&crypt_target);
 	if (r < 0) {
 		DMERR("register failed %d", r);
-		goto bad2;
+		kmem_cache_destroy(_crypt_io_pool);
 	}
 
-	return 0;
-
-bad2:
-	destroy_workqueue(_kcryptd_workqueue);
-bad1:
-	kmem_cache_destroy(_crypt_io_pool);
 	return r;
 }
 
@@ -1069,7 +1066,6 @@ static void __exit dm_crypt_exit(void)
 	if (r < 0)
 		DMERR("unregister failed %d", r);
 
-	destroy_workqueue(_kcryptd_workqueue);
 	kmem_cache_destroy(_crypt_io_pool);
 }
 
-- 
cgit v1.2.3


From cabf08e4d3d1181d7c408edae97fb4d1c31518af Mon Sep 17 00:00:00 2001
From: Milan Broz <mbroz@redhat.com>
Date: Fri, 19 Oct 2007 22:38:58 +0100
Subject: dm crypt: add post processing queue

Add post-processing queue (per crypt device) for read operations.

Current implementation uses only one queue for all operations
and this can lead to starvation caused by many requests waiting
for memory allocation. But the needed memory-releasing operation
is queued after these requests (in the same queue).

Signed-off-by: Milan Broz <mbroz@redhat.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-crypt.c | 67 +++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 51 insertions(+), 16 deletions(-)

diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 126ed21e6b17..357387fa10ca 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -36,7 +36,6 @@ struct dm_crypt_io {
 	struct work_struct work;
 	atomic_t pending;
 	int error;
-	int post_process;
 };
 
 /*
@@ -80,7 +79,8 @@ struct crypt_config {
 	mempool_t *page_pool;
 	struct bio_set *bs;
 
-	struct workqueue_struct *queue;
+	struct workqueue_struct *io_queue;
+	struct workqueue_struct *crypt_queue;
 	/*
 	 * crypto related data
 	 */
@@ -476,19 +476,36 @@ static void dec_pending(struct dm_crypt_io *io, int error)
 }
 
 /*
- * kcryptd:
+ * kcryptd/kcryptd_io:
  *
  * Needed because it would be very unwise to do decryption in an
  * interrupt context.
+ *
+ * kcryptd performs the actual encryption or decryption.
+ *
+ * kcryptd_io performs the IO submission.
+ *
+ * They must be separated as otherwise the final stages could be
+ * starved by new requests which can block in the first stages due
+ * to memory allocation.
  */
 static void kcryptd_do_work(struct work_struct *work);
+static void kcryptd_do_crypt(struct work_struct *work);
 
 static void kcryptd_queue_io(struct dm_crypt_io *io)
 {
 	struct crypt_config *cc = io->target->private;
 
 	INIT_WORK(&io->work, kcryptd_do_work);
-	queue_work(cc->queue, &io->work);
+	queue_work(cc->io_queue, &io->work);
+}
+
+static void kcryptd_queue_crypt(struct dm_crypt_io *io)
+{
+	struct crypt_config *cc = io->target->private;
+
+	INIT_WORK(&io->work, kcryptd_do_crypt);
+	queue_work(cc->crypt_queue, &io->work);
 }
 
 static void crypt_endio(struct bio *clone, int error)
@@ -511,8 +528,7 @@ static void crypt_endio(struct bio *clone, int error)
 	}
 
 	bio_put(clone);
-	io->post_process = 1;
-	kcryptd_queue_io(io);
+	kcryptd_queue_crypt(io);
 	return;
 
 out:
@@ -634,10 +650,16 @@ static void kcryptd_do_work(struct work_struct *work)
 {
 	struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work);
 
-	if (io->post_process)
-		process_read_endio(io);
-	else if (bio_data_dir(io->base_bio) == READ)
+	if (bio_data_dir(io->base_bio) == READ)
 		process_read(io);
+}
+
+static void kcryptd_do_crypt(struct work_struct *work)
+{
+	struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work);
+
+	if (bio_data_dir(io->base_bio) == READ)
+		process_read_endio(io);
 	else
 		process_write(io);
 }
@@ -870,16 +892,24 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	} else
 		cc->iv_mode = NULL;
 
-	cc->queue = create_singlethread_workqueue("kcryptd");
-	if (!cc->queue) {
+	cc->io_queue = create_singlethread_workqueue("kcryptd_io");
+	if (!cc->io_queue) {
+		ti->error = "Couldn't create kcryptd io queue";
+		goto bad_io_queue;
+	}
+
+	cc->crypt_queue = create_singlethread_workqueue("kcryptd");
+	if (!cc->crypt_queue) {
 		ti->error = "Couldn't create kcryptd queue";
-		goto bad_queue;
+		goto bad_crypt_queue;
 	}
 
 	ti->private = cc;
 	return 0;
 
-bad_queue:
+bad_crypt_queue:
+	destroy_workqueue(cc->io_queue);
+bad_io_queue:
 	kfree(cc->iv_mode);
 bad_iv_mode:
 	dm_put_device(ti, cc->dev);
@@ -905,7 +935,8 @@ static void crypt_dtr(struct dm_target *ti)
 {
 	struct crypt_config *cc = (struct crypt_config *) ti->private;
 
-	destroy_workqueue(cc->queue);
+	destroy_workqueue(cc->io_queue);
+	destroy_workqueue(cc->crypt_queue);
 
 	bioset_free(cc->bs);
 	mempool_destroy(cc->page_pool);
@@ -931,9 +962,13 @@ static int crypt_map(struct dm_target *ti, struct bio *bio,
 	io = mempool_alloc(cc->io_pool, GFP_NOIO);
 	io->target = ti;
 	io->base_bio = bio;
-	io->error = io->post_process = 0;
+	io->error = 0;
 	atomic_set(&io->pending, 0);
-	kcryptd_queue_io(io);
+
+	if (bio_data_dir(io->base_bio) == READ)
+		kcryptd_queue_io(io);
+	else
+		kcryptd_queue_crypt(io);
 
 	return DM_MAPIO_SUBMITTED;
 }
-- 
cgit v1.2.3


From d469f84197a6415248ae4c5bd4f012cb609044fa Mon Sep 17 00:00:00 2001
From: Milan Broz <mbroz@redhat.com>
Date: Fri, 19 Oct 2007 22:42:37 +0100
Subject: dm crypt: tidy whitespace

Clean up, convert some spaces to tabs.

No functional change here.

Signed-off-by: Milan Broz <mbroz@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-crypt.c | 34 +++++++++++++++++-----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 357387fa10ca..00725d1863d5 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -56,7 +56,7 @@ struct crypt_config;
 
 struct crypt_iv_operations {
 	int (*ctr)(struct crypt_config *cc, struct dm_target *ti,
-	           const char *opts);
+		   const char *opts);
 	void (*dtr)(struct crypt_config *cc);
 	const char *(*status)(struct crypt_config *cc);
 	int (*generator)(struct crypt_config *cc, u8 *iv, sector_t sector);
@@ -138,7 +138,7 @@ static int crypt_iv_plain_gen(struct crypt_config *cc, u8 *iv, sector_t sector)
 }
 
 static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti,
-	                      const char *opts)
+			      const char *opts)
 {
 	struct crypto_cipher *essiv_tfm;
 	struct crypto_hash *hash_tfm;
@@ -190,7 +190,7 @@ static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti,
 	if (crypto_cipher_blocksize(essiv_tfm) !=
 	    crypto_blkcipher_ivsize(cc->tfm)) {
 		ti->error = "Block size of ESSIV cipher does "
-			        "not match IV size of block cipher";
+			    "not match IV size of block cipher";
 		crypto_free_cipher(essiv_tfm);
 		kfree(salt);
 		return -EINVAL;
@@ -321,10 +321,10 @@ crypt_convert_scatterlist(struct crypt_config *cc, struct scatterlist *out,
 	return r;
 }
 
-static void
-crypt_convert_init(struct crypt_config *cc, struct convert_context *ctx,
-                   struct bio *bio_out, struct bio *bio_in,
-                   sector_t sector, int write)
+static void crypt_convert_init(struct crypt_config *cc,
+			       struct convert_context *ctx,
+			       struct bio *bio_out, struct bio *bio_in,
+			       sector_t sector, int write)
 {
 	ctx->bio_in = bio_in;
 	ctx->bio_out = bio_out;
@@ -340,7 +340,7 @@ crypt_convert_init(struct crypt_config *cc, struct convert_context *ctx,
  * Encrypt / decrypt data from one bio to another one (can be the same one)
  */
 static int crypt_convert(struct crypt_config *cc,
-                         struct convert_context *ctx)
+			 struct convert_context *ctx)
 {
 	int r = 0;
 
@@ -372,7 +372,7 @@ static int crypt_convert(struct crypt_config *cc,
 		}
 
 		r = crypt_convert_scatterlist(cc, &sg_out, &sg_in, sg_in.length,
-		                              ctx->write, ctx->sector);
+					      ctx->write, ctx->sector);
 		if (r < 0)
 			break;
 
@@ -382,13 +382,13 @@ static int crypt_convert(struct crypt_config *cc,
 	return r;
 }
 
- static void dm_crypt_bio_destructor(struct bio *bio)
- {
+static void dm_crypt_bio_destructor(struct bio *bio)
+{
 	struct dm_crypt_io *io = bio->bi_private;
 	struct crypt_config *cc = io->target->private;
 
 	bio_free(bio, cc->bs);
- }
+}
 
 /*
  * Generate a new unfragmented bio with the given size
@@ -715,7 +715,7 @@ static int crypt_set_key(struct crypt_config *cc, char *key)
 	cc->key_size = key_size; /* initial settings */
 
 	if ((!key_size && strcmp(key, "-")) ||
-	    (key_size && crypt_decode_key(cc->key, key, key_size) < 0))
+	   (key_size && crypt_decode_key(cc->key, key, key_size) < 0))
 		return -EINVAL;
 
 	set_bit(DM_CRYPT_KEY_VALID, &cc->flags);
@@ -785,8 +785,8 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 		goto bad1;
 	}
 
-	if (snprintf(cc->cipher, CRYPTO_MAX_ALG_NAME, "%s(%s)", chainmode, 
-		     cipher) >= CRYPTO_MAX_ALG_NAME) {
+	if (snprintf(cc->cipher, CRYPTO_MAX_ALG_NAME, "%s(%s)",
+		     chainmode, cipher) >= CRYPTO_MAX_ALG_NAME) {
 		ti->error = "Chain mode + cipher name is too long";
 		goto bad1;
 	}
@@ -829,7 +829,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	if (cc->iv_size)
 		/* at least a 64 bit sector number should fit in our buffer */
 		cc->iv_size = max(cc->iv_size,
-		                  (unsigned int)(sizeof(u64) / sizeof(u8)));
+				  (unsigned int)(sizeof(u64) / sizeof(u8)));
 	else {
 		if (cc->iv_gen_ops) {
 			DMWARN("Selected cipher does not support IVs");
@@ -875,7 +875,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	cc->start = tmpll;
 
 	if (dm_get_device(ti, argv[3], cc->start, ti->len,
-	                  dm_table_get_mode(ti->table), &cc->dev)) {
+			  dm_table_get_mode(ti->table), &cc->dev)) {
 		ti->error = "Device lookup failed";
 		goto bad5;
 	}
-- 
cgit v1.2.3


From 636d5786c45414fd8e48f2a2325be072274fdba4 Mon Sep 17 00:00:00 2001
From: Milan Broz <mbroz@redhat.com>
Date: Fri, 19 Oct 2007 22:47:52 +0100
Subject: dm crypt: tidy labels

Replace numbers with names in labels in error paths, to avoid confusion
when new one get added between existing ones.

Signed-off-by: Milan Broz <mbroz@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-crypt.c | 38 +++++++++++++++++++-------------------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 00725d1863d5..64ffa0ea8ca4 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -771,7 +771,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 
  	if (crypt_set_key(cc, argv[1])) {
 		ti->error = "Error decoding key";
-		goto bad1;
+		goto bad_cipher;
 	}
 
 	/* Compatiblity mode for old dm-crypt cipher strings */
@@ -782,19 +782,19 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 
 	if (strcmp(chainmode, "ecb") && !ivmode) {
 		ti->error = "This chaining mode requires an IV mechanism";
-		goto bad1;
+		goto bad_cipher;
 	}
 
 	if (snprintf(cc->cipher, CRYPTO_MAX_ALG_NAME, "%s(%s)",
 		     chainmode, cipher) >= CRYPTO_MAX_ALG_NAME) {
 		ti->error = "Chain mode + cipher name is too long";
-		goto bad1;
+		goto bad_cipher;
 	}
 
 	tfm = crypto_alloc_blkcipher(cc->cipher, 0, CRYPTO_ALG_ASYNC);
 	if (IS_ERR(tfm)) {
 		ti->error = "Error allocating crypto tfm";
-		goto bad1;
+		goto bad_cipher;
 	}
 
 	strcpy(cc->cipher, cipher);
@@ -818,12 +818,12 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 		cc->iv_gen_ops = &crypt_iv_null_ops;
 	else {
 		ti->error = "Invalid IV mode";
-		goto bad2;
+		goto bad_ivmode;
 	}
 
 	if (cc->iv_gen_ops && cc->iv_gen_ops->ctr &&
 	    cc->iv_gen_ops->ctr(cc, ti, ivopts) < 0)
-		goto bad2;
+		goto bad_ivmode;
 
 	cc->iv_size = crypto_blkcipher_ivsize(tfm);
 	if (cc->iv_size)
@@ -842,13 +842,13 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	cc->io_pool = mempool_create_slab_pool(MIN_IOS, _crypt_io_pool);
 	if (!cc->io_pool) {
 		ti->error = "Cannot allocate crypt io mempool";
-		goto bad3;
+		goto bad_slab_pool;
 	}
 
 	cc->page_pool = mempool_create_page_pool(MIN_POOL_PAGES, 0);
 	if (!cc->page_pool) {
 		ti->error = "Cannot allocate page mempool";
-		goto bad4;
+		goto bad_page_pool;
 	}
 
 	cc->bs = bioset_create(MIN_IOS, MIN_IOS);
@@ -859,25 +859,25 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 
 	if (crypto_blkcipher_setkey(tfm, cc->key, key_size) < 0) {
 		ti->error = "Error setting key";
-		goto bad5;
+		goto bad_device;
 	}
 
 	if (sscanf(argv[2], "%llu", &tmpll) != 1) {
 		ti->error = "Invalid iv_offset sector";
-		goto bad5;
+		goto bad_device;
 	}
 	cc->iv_offset = tmpll;
 
 	if (sscanf(argv[4], "%llu", &tmpll) != 1) {
 		ti->error = "Invalid device sector";
-		goto bad5;
+		goto bad_device;
 	}
 	cc->start = tmpll;
 
 	if (dm_get_device(ti, argv[3], cc->start, ti->len,
 			  dm_table_get_mode(ti->table), &cc->dev)) {
 		ti->error = "Device lookup failed";
-		goto bad5;
+		goto bad_device;
 	}
 
 	if (ivmode && cc->iv_gen_ops) {
@@ -886,7 +886,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 		cc->iv_mode = kmalloc(strlen(ivmode) + 1, GFP_KERNEL);
 		if (!cc->iv_mode) {
 			ti->error = "Error kmallocing iv_mode string";
-			goto bad_iv_mode;
+			goto bad_ivmode_string;
 		}
 		strcpy(cc->iv_mode, ivmode);
 	} else
@@ -911,20 +911,20 @@ bad_crypt_queue:
 	destroy_workqueue(cc->io_queue);
 bad_io_queue:
 	kfree(cc->iv_mode);
-bad_iv_mode:
+bad_ivmode_string:
 	dm_put_device(ti, cc->dev);
-bad5:
+bad_device:
 	bioset_free(cc->bs);
 bad_bs:
 	mempool_destroy(cc->page_pool);
-bad4:
+bad_page_pool:
 	mempool_destroy(cc->io_pool);
-bad3:
+bad_slab_pool:
 	if (cc->iv_gen_ops && cc->iv_gen_ops->dtr)
 		cc->iv_gen_ops->dtr(cc);
-bad2:
+bad_ivmode:
 	crypto_free_blkcipher(tfm);
-bad1:
+bad_cipher:
 	/* Must zero key material before freeing */
 	memset(cc, 0, sizeof(*cc) + cc->key_size * sizeof(u8));
 	kfree(cc);
-- 
cgit v1.2.3


From c9e45581ad530cc1ca4b5d4add44a5b625234ada Mon Sep 17 00:00:00 2001
From: Dave Wysochanski <dwysocha@redhat.com>
Date: Fri, 19 Oct 2007 22:47:53 +0100
Subject: dm mpath: add retry pg init

This patch allows a failed path group initialisation command to be retried.

It adds a generic MP_RETRY flag and a "pg_init_retries" feature to
device-mapper multipath which limits the number of retries.

1. A hw handler sends a path initialization command to the storage and
the command completes with an error code indicating the command
should be retried.

2. The hardware handler calls dm_pg_init_complete() with MP_RETRY
set in err_flags to ask the dm multipath core to retry.

3. If the retry limit has not been exceeded, pg_init() is retried.
Otherwise fail_path() is called.

If you are using the userspace multipath-tools or device-mapper-multipath
package, you can set pg_init_retries in the 'device' section of your
/etc/multipath.conf file. For example:

features                "2 pg_init_retries 7"

The number of PG retries attempted is reported in the 'dmsetup status' output.

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
Acked-by: Mike Christie <michaelc@cs.wisc.edu>
Acked-by: Chandra Seetharaman <sekharan@us.ibm.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-hw-handler.h |  1 +
 drivers/md/dm-mpath.c      | 81 ++++++++++++++++++++++++++++++++++++++--------
 2 files changed, 68 insertions(+), 14 deletions(-)

diff --git a/drivers/md/dm-hw-handler.h b/drivers/md/dm-hw-handler.h
index e0832e6fcf36..46809dcb121a 100644
--- a/drivers/md/dm-hw-handler.h
+++ b/drivers/md/dm-hw-handler.h
@@ -58,5 +58,6 @@ unsigned dm_scsi_err_handler(struct hw_handler *hwh, struct bio *bio);
 #define MP_FAIL_PATH 1
 #define MP_BYPASS_PG 2
 #define MP_ERROR_IO  4	/* Don't retry this I/O */
+#define MP_RETRY 8
 
 #endif
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index 31056abca89d..dd5ad6310f54 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -75,6 +75,8 @@ struct multipath {
 	unsigned queue_io;		/* Must we queue all I/O? */
 	unsigned queue_if_no_path;	/* Queue I/O if last path fails? */
 	unsigned saved_queue_if_no_path;/* Saved state during suspension */
+	unsigned pg_init_retries;	/* Number of times to retry pg_init */
+	unsigned pg_init_count;		/* Number of times pg_init called */
 
 	struct work_struct process_queued_ios;
 	struct bio_list queued_ios;
@@ -225,6 +227,8 @@ static void __switch_pg(struct multipath *m, struct pgpath *pgpath)
 		m->pg_init_required = 0;
 		m->queue_io = 0;
 	}
+
+	m->pg_init_count = 0;
 }
 
 static int __choose_path_in_pg(struct multipath *m, struct priority_group *pg)
@@ -424,6 +428,7 @@ static void process_queued_ios(struct work_struct *work)
 		must_queue = 0;
 
 	if (m->pg_init_required && !m->pg_init_in_progress) {
+		m->pg_init_count++;
 		m->pg_init_required = 0;
 		m->pg_init_in_progress = 1;
 		init_required = 1;
@@ -689,9 +694,11 @@ static int parse_features(struct arg_set *as, struct multipath *m)
 	int r;
 	unsigned argc;
 	struct dm_target *ti = m->ti;
+	const char *param_name;
 
 	static struct param _params[] = {
-		{0, 1, "invalid number of feature args"},
+		{0, 3, "invalid number of feature args"},
+		{1, 50, "pg_init_retries must be between 1 and 50"},
 	};
 
 	r = read_param(_params, shift(as), &argc, &ti->error);
@@ -701,12 +708,28 @@ static int parse_features(struct arg_set *as, struct multipath *m)
 	if (!argc)
 		return 0;
 
-	if (!strnicmp(shift(as), MESG_STR("queue_if_no_path")))
-		return queue_if_no_path(m, 1, 0);
-	else {
+	do {
+		param_name = shift(as);
+		argc--;
+
+		if (!strnicmp(param_name, MESG_STR("queue_if_no_path"))) {
+			r = queue_if_no_path(m, 1, 0);
+			continue;
+		}
+
+		if (!strnicmp(param_name, MESG_STR("pg_init_retries")) &&
+		    (argc >= 1)) {
+			r = read_param(_params + 1, shift(as),
+				       &m->pg_init_retries, &ti->error);
+			argc--;
+			continue;
+		}
+
 		ti->error = "Unrecognised multipath feature request";
-		return -EINVAL;
-	}
+		r = -EINVAL;
+	} while (argc && !r);
+
+	return r;
 }
 
 static int multipath_ctr(struct dm_target *ti, unsigned int argc,
@@ -975,6 +998,26 @@ static int bypass_pg_num(struct multipath *m, const char *pgstr, int bypassed)
 	return 0;
 }
 
+/*
+ * Should we retry pg_init immediately?
+ */
+static int pg_init_limit_reached(struct multipath *m, struct pgpath *pgpath)
+{
+	unsigned long flags;
+	int limit_reached = 0;
+
+	spin_lock_irqsave(&m->lock, flags);
+
+	if (m->pg_init_count <= m->pg_init_retries)
+		m->pg_init_required = 1;
+	else
+		limit_reached = 1;
+
+	spin_unlock_irqrestore(&m->lock, flags);
+
+	return limit_reached;
+}
+
 /*
  * pg_init must call this when it has completed its initialisation
  */
@@ -985,8 +1028,14 @@ void dm_pg_init_complete(struct dm_path *path, unsigned err_flags)
 	struct multipath *m = pg->m;
 	unsigned long flags;
 
-	/* We insist on failing the path if the PG is already bypassed. */
-	if (err_flags && pg->bypassed)
+	/*
+	 * If requested, retry pg_init until maximum number of retries exceeded.
+	 * If retry not requested and PG already bypassed, always fail the path.
+	 */
+	if (err_flags & MP_RETRY) {
+		if (pg_init_limit_reached(m, pgpath))
+			err_flags |= MP_FAIL_PATH;
+	} else if (err_flags && pg->bypassed)
 		err_flags |= MP_FAIL_PATH;
 
 	if (err_flags & MP_FAIL_PATH)
@@ -996,7 +1045,7 @@ void dm_pg_init_complete(struct dm_path *path, unsigned err_flags)
 		bypass_pg(m, pg, 1);
 
 	spin_lock_irqsave(&m->lock, flags);
-	if (err_flags) {
+	if (err_flags & ~MP_RETRY) {
 		m->current_pgpath = NULL;
 		m->current_pg = NULL;
 	} else if (!m->pg_init_required)
@@ -1148,11 +1197,15 @@ static int multipath_status(struct dm_target *ti, status_type_t type,
 
 	/* Features */
 	if (type == STATUSTYPE_INFO)
-		DMEMIT("1 %u ", m->queue_size);
-	else if (m->queue_if_no_path)
-		DMEMIT("1 queue_if_no_path ");
-	else
-		DMEMIT("0 ");
+		DMEMIT("2 %u %u ", m->queue_size, m->pg_init_count);
+	else {
+		DMEMIT("%u ", m->queue_if_no_path +
+			      (m->pg_init_retries > 0) * 2);
+		if (m->queue_if_no_path)
+			DMEMIT("queue_if_no_path ");
+		if (m->pg_init_retries)
+			DMEMIT("pg_init_retries %u ", m->pg_init_retries);
+	}
 
 	if (hwh->type && hwh->type->status)
 		sz += hwh->type->status(hwh, type, result + sz, maxlen - sz);
-- 
cgit v1.2.3


From 16ebbf358477b762115fec2f85d9b9496a5cae76 Mon Sep 17 00:00:00 2001
From: Dave Wysochanski <dwysocha@redhat.com>
Date: Fri, 19 Oct 2007 22:47:54 +0100
Subject: dm mpath: add hp handler

This patch adds the most basic dm-multipath hardware support for the
HP active/passive arrays.

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Acked-by: Chandra Seetharaman <sekharan@us.ibm.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/Kconfig          |   6 ++
 drivers/md/Makefile         |   2 +
 drivers/md/dm-mpath-hp-sw.c | 206 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 214 insertions(+)
 create mode 100644 drivers/md/dm-mpath-hp-sw.c

diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index 34a8c60a254a..a99af8978203 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -267,6 +267,12 @@ config DM_MULTIPATH_RDAC
 	---help---
 	  Multipath support for LSI/Engenio RDAC.
 
+config DM_MULTIPATH_HP
+        tristate "HP MSA multipath support (EXPERIMENTAL)"
+        depends on DM_MULTIPATH && BLK_DEV_DM && EXPERIMENTAL
+        ---help---
+          Multipath support for HP MSA (Active/Passive) series hardware.
+
 config DM_DELAY
 	tristate "I/O delaying target (EXPERIMENTAL)"
 	depends on BLK_DEV_DM && EXPERIMENTAL
diff --git a/drivers/md/Makefile b/drivers/md/Makefile
index c49366cdc05d..fded842edeb1 100644
--- a/drivers/md/Makefile
+++ b/drivers/md/Makefile
@@ -8,6 +8,7 @@ dm-multipath-objs := dm-hw-handler.o dm-path-selector.o dm-mpath.o
 dm-snapshot-objs := dm-snap.o dm-exception-store.o
 dm-mirror-objs	:= dm-log.o dm-raid1.o
 dm-rdac-objs	:= dm-mpath-rdac.o
+dm-hp-sw-objs	:= dm-mpath-hp-sw.o
 md-mod-objs     := md.o bitmap.o
 raid456-objs	:= raid5.o raid6algos.o raid6recov.o raid6tables.o \
 		   raid6int1.o raid6int2.o raid6int4.o \
@@ -35,6 +36,7 @@ obj-$(CONFIG_DM_CRYPT)		+= dm-crypt.o
 obj-$(CONFIG_DM_DELAY)		+= dm-delay.o
 obj-$(CONFIG_DM_MULTIPATH)	+= dm-multipath.o dm-round-robin.o
 obj-$(CONFIG_DM_MULTIPATH_EMC)	+= dm-emc.o
+obj-$(CONFIG_DM_MULTIPATH_HP)	+= dm-hp-sw.o
 obj-$(CONFIG_DM_MULTIPATH_RDAC)	+= dm-rdac.o
 obj-$(CONFIG_DM_SNAPSHOT)	+= dm-snapshot.o
 obj-$(CONFIG_DM_MIRROR)		+= dm-mirror.o
diff --git a/drivers/md/dm-mpath-hp-sw.c b/drivers/md/dm-mpath-hp-sw.c
new file mode 100644
index 000000000000..575317037ce2
--- /dev/null
+++ b/drivers/md/dm-mpath-hp-sw.c
@@ -0,0 +1,206 @@
+/*
+ * Copyright (C) 2005 Mike Christie, All rights reserved.
+ * Copyright (C) 2007 Red Hat, Inc. All rights reserved.
+ * Authors: Mike Christie
+ *          Dave Wysochanski
+ *
+ * This file is released under the GPL.
+ *
+ * This module implements the specific path activation code for
+ * HP StorageWorks and FSC FibreCat Asymmetric (Active/Passive)
+ * storage arrays.
+ * These storage arrays have controller-based failover, not
+ * LUN-based failover.  However, LUN-based failover is the design
+ * of dm-multipath. Thus, this module is written for LUN-based failover.
+ */
+#include <linux/blkdev.h>
+#include <linux/list.h>
+#include <linux/types.h>
+#include <scsi/scsi.h>
+#include <scsi/scsi_cmnd.h>
+
+#include "dm.h"
+#include "dm-hw-handler.h"
+
+#define DM_MSG_PREFIX "multipath hp-sw"
+#define DM_HP_HWH_NAME "hp-sw"
+#define DM_HP_HWH_VER "0.0.3"
+
+struct hp_sw_context {
+	unsigned char sense[SCSI_SENSE_BUFFERSIZE];
+};
+
+/*
+ * hp_sw_end_io - Completion handler for HP path activation.
+ * @req: path activation request
+ * @error: scsi-ml error
+ *
+ *  Check sense data, free request structure, and notify dm that
+ *  pg initialization has completed.
+ *
+ * Context: scsi-ml softirq
+ *
+ * Possible optimizations
+ * 1. Actually check sense data for retryable error (e.g. NOT_READY)
+ */
+static void hp_sw_end_io(struct request *req, int error)
+{
+	struct dm_path *path = req->end_io_data;
+	unsigned err_flags = 0;
+
+	if (!error)
+		DMDEBUG("%s path activation command - success",
+			path->dev->name);
+	else {
+		DMWARN("%s path activation command - error=0x%x",
+		       path->dev->name, error);
+		err_flags = MP_FAIL_PATH;
+	}
+
+	req->end_io_data = NULL;
+	__blk_put_request(req->q, req);
+	dm_pg_init_complete(path, err_flags);
+}
+
+/*
+ * hp_sw_get_request - Allocate an HP specific path activation request
+ * @path: path on which request will be sent (needed for request queue)
+ *
+ * The START command is used for path activation request.
+ * These arrays are controller-based failover, not LUN based.
+ * One START command issued to a single path will fail over all
+ * LUNs for the same controller.
+ *
+ * Possible optimizations
+ * 1. Make timeout configurable
+ * 2. Preallocate request
+ */
+static struct request *hp_sw_get_request(struct dm_path *path)
+{
+	struct request *req;
+	struct block_device *bdev = path->dev->bdev;
+	struct request_queue *q = bdev_get_queue(bdev);
+	struct hp_sw_context *h = path->hwhcontext;
+
+	req = blk_get_request(q, WRITE, GFP_NOIO);
+	if (!req)
+		goto out;
+
+	req->timeout = 60 * HZ;
+
+	req->errors = 0;
+	req->cmd_type = REQ_TYPE_BLOCK_PC;
+	req->cmd_flags |= REQ_FAILFAST | REQ_NOMERGE;
+	req->end_io_data = path;
+	req->sense = h->sense;
+	memset(req->sense, 0, SCSI_SENSE_BUFFERSIZE);
+
+	memset(&req->cmd, 0, BLK_MAX_CDB);
+	req->cmd[0] = START_STOP;
+	req->cmd[4] = 1;
+	req->cmd_len = COMMAND_SIZE(req->cmd[0]);
+
+out:
+	return req;
+}
+
+/*
+ * hp_sw_pg_init - HP path activation implementation.
+ * @hwh: hardware handler specific data
+ * @bypassed: unused; is the path group bypassed? (see dm-mpath.c)
+ * @path: path to send initialization command
+ *
+ * Send an HP-specific path activation command on 'path'.
+ * Do not try to optimize in any way, just send the activation command.
+ * More than one path activation command may be sent to the same controller.
+ * This seems to work fine for basic failover support.
+ *
+ * Possible optimizations
+ * 1. Detect an in-progress activation request and avoid submitting another one
+ * 2. Model the controller and only send a single activation request at a time
+ * 3. Determine the state of a path before sending an activation request
+ *
+ * Context: kmpathd (see process_queued_ios() in dm-mpath.c)
+ */
+static void hp_sw_pg_init(struct hw_handler *hwh, unsigned bypassed,
+			  struct dm_path *path)
+{
+	struct request *req;
+	struct hp_sw_context *h;
+
+	path->hwhcontext = hwh->context;
+	h = hwh->context;
+
+	req = hp_sw_get_request(path);
+	if (!req) {
+		DMERR("%s path activation command - allocation fail",
+		      path->dev->name);
+		goto fail;
+	}
+
+	DMDEBUG("%s path activation command - sent", path->dev->name);
+
+	blk_execute_rq_nowait(req->q, NULL, req, 1, hp_sw_end_io);
+	return;
+
+fail:
+	dm_pg_init_complete(path, MP_FAIL_PATH);
+}
+
+static int hp_sw_create(struct hw_handler *hwh, unsigned argc, char **argv)
+{
+	struct hp_sw_context *h;
+
+	h = kmalloc(sizeof(*h), GFP_KERNEL);
+	if (!h)
+		return -ENOMEM;
+
+	hwh->context = h;
+
+	return 0;
+}
+
+static void hp_sw_destroy(struct hw_handler *hwh)
+{
+	struct hp_sw_context *h = hwh->context;
+
+	kfree(h);
+}
+
+static struct hw_handler_type hp_sw_hwh = {
+	.name = DM_HP_HWH_NAME,
+	.module = THIS_MODULE,
+	.create = hp_sw_create,
+	.destroy = hp_sw_destroy,
+	.pg_init = hp_sw_pg_init,
+};
+
+static int __init hp_sw_init(void)
+{
+	int r;
+
+	r = dm_register_hw_handler(&hp_sw_hwh);
+	if (r < 0)
+		DMERR("register failed %d", r);
+	else
+		DMINFO("version " DM_HP_HWH_VER " loaded");
+
+	return r;
+}
+
+static void __exit hp_sw_exit(void)
+{
+	int r;
+
+	r = dm_unregister_hw_handler(&hp_sw_hwh);
+	if (r < 0)
+		DMERR("unregister failed %d", r);
+}
+
+module_init(hp_sw_init);
+module_exit(hp_sw_exit);
+
+MODULE_DESCRIPTION("DM Multipath HP StorageWorks / FSC FibreCat (A/P) support");
+MODULE_AUTHOR("Mike Christie, Dave Wysochanski <dm-devel@redhat.com>");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(DM_HP_HWH_VER);
-- 
cgit v1.2.3


From fe97e2aa0502922488ad62303a19a20c8044ae18 Mon Sep 17 00:00:00 2001
From: Dave Wysochanski <dwysocha@redhat.com>
Date: Fri, 19 Oct 2007 22:47:55 +0100
Subject: dm mpath: hp retry if not ready

This patch adds retries to the hp hardware handler, and utilizes the
MP_RETRY flag of dm-multipath.  For now in the hp handler, if we get a
pg_init completed with a check condition we just assume we can retry the
pg_init command.  We make this assumption because of incomplete data on
specific check condition code of the HP hardware, and because testing
has shown the HP path initialization command to be idempotent.
The number of times we retry is settable via the "pg_init_retries"
multipath map feature.

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
Acked-by: Chandra Seetharaman <sekharan@us.ibm.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-mpath-hp-sw.c | 64 +++++++++++++++++++++++++++++++++++++--------
 1 file changed, 53 insertions(+), 11 deletions(-)

diff --git a/drivers/md/dm-mpath-hp-sw.c b/drivers/md/dm-mpath-hp-sw.c
index 575317037ce2..204bf42c9449 100644
--- a/drivers/md/dm-mpath-hp-sw.c
+++ b/drivers/md/dm-mpath-hp-sw.c
@@ -18,18 +18,53 @@
 #include <linux/types.h>
 #include <scsi/scsi.h>
 #include <scsi/scsi_cmnd.h>
+#include <scsi/scsi_dbg.h>
 
 #include "dm.h"
 #include "dm-hw-handler.h"
 
 #define DM_MSG_PREFIX "multipath hp-sw"
 #define DM_HP_HWH_NAME "hp-sw"
-#define DM_HP_HWH_VER "0.0.3"
+#define DM_HP_HWH_VER "1.0.0"
 
 struct hp_sw_context {
 	unsigned char sense[SCSI_SENSE_BUFFERSIZE];
 };
 
+/*
+ * hp_sw_error_is_retryable - Is an HP-specific check condition retryable?
+ * @req: path activation request
+ *
+ * Examine error codes of request and determine whether the error is retryable.
+ * Some error codes are already retried by scsi-ml (see
+ * scsi_decide_disposition), but some HP specific codes are not.
+ * The intent of this routine is to supply the logic for the HP specific
+ * check conditions.
+ *
+ * Returns:
+ *  1 - command completed with retryable error
+ *  0 - command completed with non-retryable error
+ *
+ * Possible optimizations
+ * 1. More hardware-specific error codes
+ */
+static int hp_sw_error_is_retryable(struct request *req)
+{
+	/*
+	 * NOT_READY is known to be retryable
+	 * For now we just dump out the sense data and call it retryable
+	 */
+	if (status_byte(req->errors) == CHECK_CONDITION)
+		__scsi_print_sense(DM_HP_HWH_NAME, req->sense, req->sense_len);
+
+	/*
+	 * At this point we don't have complete information about all the error
+	 * codes from this hardware, so we are just conservative and retry
+	 * when in doubt.
+	 */
+	return 1;
+}
+
 /*
  * hp_sw_end_io - Completion handler for HP path activation.
  * @req: path activation request
@@ -40,23 +75,30 @@ struct hp_sw_context {
  *
  * Context: scsi-ml softirq
  *
- * Possible optimizations
- * 1. Actually check sense data for retryable error (e.g. NOT_READY)
  */
 static void hp_sw_end_io(struct request *req, int error)
 {
 	struct dm_path *path = req->end_io_data;
 	unsigned err_flags = 0;
 
-	if (!error)
+	if (!error) {
 		DMDEBUG("%s path activation command - success",
 			path->dev->name);
-	else {
-		DMWARN("%s path activation command - error=0x%x",
-		       path->dev->name, error);
-		err_flags = MP_FAIL_PATH;
+		goto out;
 	}
 
+	if (hp_sw_error_is_retryable(req)) {
+		DMDEBUG("%s path activation command - retry",
+			path->dev->name);
+		err_flags = MP_RETRY;
+		goto out;
+	}
+
+	DMWARN("%s path activation fail - error=0x%x",
+	       path->dev->name, error);
+	err_flags = MP_FAIL_PATH;
+
+out:
 	req->end_io_data = NULL;
 	__blk_put_request(req->q, req);
 	dm_pg_init_complete(path, err_flags);
@@ -135,7 +177,7 @@ static void hp_sw_pg_init(struct hw_handler *hwh, unsigned bypassed,
 	if (!req) {
 		DMERR("%s path activation command - allocation fail",
 		      path->dev->name);
-		goto fail;
+		goto retry;
 	}
 
 	DMDEBUG("%s path activation command - sent", path->dev->name);
@@ -143,8 +185,8 @@ static void hp_sw_pg_init(struct hw_handler *hwh, unsigned bypassed,
 	blk_execute_rq_nowait(req->q, NULL, req, 1, hp_sw_end_io);
 	return;
 
-fail:
-	dm_pg_init_complete(path, MP_FAIL_PATH);
+retry:
+	dm_pg_init_complete(path, MP_RETRY);
 }
 
 static int hp_sw_create(struct hw_handler *hwh, unsigned argc, char **argv)
-- 
cgit v1.2.3


From 6b3df0d7a5e85ad2afd3eecc50e2dee59e876ae8 Mon Sep 17 00:00:00 2001
From: Jonathan Brassow <jbrassow@redhat.com>
Date: Fri, 19 Oct 2007 22:47:57 +0100
Subject: dm log: split suspend

There are now two phases to a suspend in device-mapper -
presuspend and postsuspend.  This patch removes the
single 'suspend' in the logging API and replaces it with
'presuspend' and 'postsuspend' functions to align it
better with core device-mapper.

A subsequent patch will make use of 'presuspend'.

Signed-off-by: Jonathan Brassow <jbrassow@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-log.c   | 2 +-
 drivers/md/dm-log.h   | 3 ++-
 drivers/md/dm-raid1.c | 2 +-
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c
index a66428d860fe..072ee4353eab 100644
--- a/drivers/md/dm-log.c
+++ b/drivers/md/dm-log.c
@@ -696,7 +696,7 @@ static struct dirty_log_type _disk_type = {
 	.module = THIS_MODULE,
 	.ctr = disk_ctr,
 	.dtr = disk_dtr,
-	.suspend = disk_flush,
+	.postsuspend = disk_flush,
 	.resume = disk_resume,
 	.get_region_size = core_get_region_size,
 	.is_clean = core_is_clean,
diff --git a/drivers/md/dm-log.h b/drivers/md/dm-log.h
index 86a301c8daf1..3fae87eb5963 100644
--- a/drivers/md/dm-log.h
+++ b/drivers/md/dm-log.h
@@ -32,7 +32,8 @@ struct dirty_log_type {
 	 * There are times when we don't want the log to touch
 	 * the disk.
 	 */
-	int (*suspend)(struct dirty_log *log);
+	int (*presuspend)(struct dirty_log *log);
+	int (*postsuspend)(struct dirty_log *log);
 	int (*resume)(struct dirty_log *log);
 
 	/*
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 2b2ca371e20b..8aafbb7ec574 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -1305,7 +1305,7 @@ static void mirror_postsuspend(struct dm_target *ti)
 	wait_event(_kmirrord_recovery_stopped,
 		   !atomic_read(&ms->rh.recovery_in_flight));
 
-	if (log->type->suspend && log->type->suspend(log))
+	if (log->type->postsuspend && log->type->postsuspend(log))
 		/* FIXME: need better error handling */
 		DMWARN("log suspend failed");
 }
-- 
cgit v1.2.3


From aa5617c55357d86c9082ba1d66fa9795370c9954 Mon Sep 17 00:00:00 2001
From: Jonathan Brassow <jbrassow@redhat.com>
Date: Fri, 19 Oct 2007 22:47:58 +0100
Subject: dm raid1: add mirror_set to struct mirror

Store a pointer to the owning mirror_set structure within each mirror
structure for a subsequent patch to use.

Signed-off-by: Jonathan Brassow <jbrassow@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-raid1.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 8aafbb7ec574..31123d4a6b9c 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -114,6 +114,7 @@ struct region {
  * Mirror set structures.
  *---------------------------------------------------------------*/
 struct mirror {
+	struct mirror_set *ms;
 	atomic_t error_count;
 	struct dm_dev *dev;
 	sector_t offset;
@@ -1017,6 +1018,7 @@ static int get_mirror(struct mirror_set *ms, struct dm_target *ti,
 		return -ENXIO;
 	}
 
+	ms->mirror[mirror].ms = ms;
 	ms->mirror[mirror].offset = offset;
 
 	return 0;
-- 
cgit v1.2.3


From 96a1f7dba6e464155c0d1dc69c6c2efa96b644ac Mon Sep 17 00:00:00 2001
From: Mike Anderson <andmike@linux.vnet.ibm.com>
Date: Fri, 19 Oct 2007 22:47:59 +0100
Subject: dm: export name and uuid

This patch adds a function to obtain a copy of a mapped device's name and uuid.

Signed-off-by: Mike Anderson <andmike@linux.vnet.ibm.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-ioctl.c         | 32 ++++++++++++++++++++++++++++++++
 include/linux/device-mapper.h |  1 +
 2 files changed, 33 insertions(+)

diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index 6af95f9b7b52..138200bf5e0b 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -1515,3 +1515,35 @@ void dm_interface_exit(void)
 
 	dm_hash_exit();
 }
+
+/**
+ * dm_copy_name_and_uuid - Copy mapped device name & uuid into supplied buffers
+ * @md: Pointer to mapped_device
+ * @name: Buffer (size DM_NAME_LEN) for name
+ * @uuid: Buffer (size DM_UUID_LEN) for uuid or empty string if uuid not defined
+ */
+int dm_copy_name_and_uuid(struct mapped_device *md, char *name, char *uuid)
+{
+	int r = 0;
+	struct hash_cell *hc;
+
+	if (!md)
+		return -ENXIO;
+
+	dm_get(md);
+	down_read(&_hash_lock);
+	hc = dm_get_mdptr(md);
+	if (!hc || hc->md != md) {
+		r = -ENXIO;
+		goto out;
+	}
+
+	strcpy(name, hc->name);
+	strcpy(uuid, hc->uuid ? : "");
+
+out:
+	up_read(&_hash_lock);
+	dm_put(md);
+
+	return r;
+}
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 499f5373e213..8b3c7cdc599c 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -188,6 +188,7 @@ int dm_wait_event(struct mapped_device *md, int event_nr);
  * Info functions.
  */
 const char *dm_device_name(struct mapped_device *md);
+int dm_copy_name_and_uuid(struct mapped_device *md, char *name, char *uuid);
 struct gendisk *dm_disk(struct mapped_device *md);
 int dm_suspended(struct mapped_device *md);
 int dm_noflush_suspending(struct dm_target *ti);
-- 
cgit v1.2.3


From 51e5b2bd34ded40ef48cade8a6a8f1baa0b4275e Mon Sep 17 00:00:00 2001
From: Mike Anderson <andmike@linux.vnet.ibm.com>
Date: Fri, 19 Oct 2007 22:48:00 +0100
Subject: dm: add uevent to core

This patch adds a uevent skeleton to device-mapper.

Signed-off-by: Mike Anderson <andmike@linux.vnet.ibm.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/Kconfig     |  6 +++++
 drivers/md/Makefile    |  4 +++
 drivers/md/dm-uevent.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++
 drivers/md/dm-uevent.h | 41 ++++++++++++++++++++++++++++
 drivers/md/dm.c        | 10 +++++++
 5 files changed, 133 insertions(+)
 create mode 100644 drivers/md/dm-uevent.c
 create mode 100644 drivers/md/dm-uevent.h

diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index a99af8978203..9b6fbf044fd8 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -282,4 +282,10 @@ config DM_DELAY
 
 	If unsure, say N.
 
+config DM_UEVENT
+	bool "DM uevents (EXPERIMENTAL)"
+	depends on BLK_DEV_DM && EXPERIMENTAL
+	---help---
+	Generate udev events for DM events.
+
 endif # MD
diff --git a/drivers/md/Makefile b/drivers/md/Makefile
index fded842edeb1..d9aa7edb8780 100644
--- a/drivers/md/Makefile
+++ b/drivers/md/Makefile
@@ -50,6 +50,10 @@ ifeq ($(CONFIG_ALTIVEC),y)
 altivec_flags := -maltivec -mabi=altivec
 endif
 
+ifeq ($(CONFIG_DM_UEVENT),y)
+dm-mod-objs			+= dm-uevent.o
+endif
+
 targets += raid6int1.c
 $(obj)/raid6int1.c:   UNROLL := 1
 $(obj)/raid6int1.c:   $(src)/raid6int.uc $(src)/unroll.pl FORCE
diff --git a/drivers/md/dm-uevent.c b/drivers/md/dm-uevent.c
new file mode 100644
index 000000000000..53200c96bcb4
--- /dev/null
+++ b/drivers/md/dm-uevent.c
@@ -0,0 +1,72 @@
+/*
+ * Device Mapper Uevent Support (dm-uevent)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright IBM Corporation, 2007
+ * 	Author: Mike Anderson <andmike@linux.vnet.ibm.com>
+ */
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/kobject.h>
+
+#include "dm.h"
+#include "dm-uevent.h"
+
+#define DM_MSG_PREFIX "uevent"
+
+static struct kmem_cache *_dm_event_cache;
+
+struct dm_uevent {
+	struct mapped_device *md;
+	enum kobject_action action;
+	struct kobj_uevent_env ku_env;
+	struct list_head elist;
+};
+
+static void dm_uevent_free(struct dm_uevent *event)
+{
+	kmem_cache_free(_dm_event_cache, event);
+}
+
+static struct dm_uevent *dm_uevent_alloc(struct mapped_device *md)
+{
+	struct dm_uevent *event;
+
+	event = kmem_cache_zalloc(_dm_event_cache, GFP_ATOMIC);
+	if (!event)
+		return NULL;
+
+	INIT_LIST_HEAD(&event->elist);
+	event->md = md;
+
+	return event;
+}
+
+int dm_uevent_init(void)
+{
+	_dm_event_cache = KMEM_CACHE(dm_uevent, 0);
+	if (!_dm_event_cache)
+		return -ENOMEM;
+
+	DMINFO("version 1.0.3");
+
+	return 0;
+}
+
+void dm_uevent_exit(void)
+{
+	kmem_cache_destroy(_dm_event_cache);
+}
diff --git a/drivers/md/dm-uevent.h b/drivers/md/dm-uevent.h
new file mode 100644
index 000000000000..9d776836489b
--- /dev/null
+++ b/drivers/md/dm-uevent.h
@@ -0,0 +1,41 @@
+/*
+ * Device Mapper Uevent Support
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright IBM Corporation, 2007
+ * 	Author: Mike Anderson <andmike@linux.vnet.ibm.com>
+ */
+#ifndef DM_UEVENT_H
+#define DM_UEVENT_H
+
+#ifdef CONFIG_DM_UEVENT
+
+extern int dm_uevent_init(void);
+extern void dm_uevent_exit(void);
+
+#else
+
+static inline int dm_uevent_init(void)
+{
+	return 0;
+}
+static inline void dm_uevent_exit(void)
+{
+}
+
+#endif	/* CONFIG_DM_UEVENT */
+
+#endif	/* DM_UEVENT_H */
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 41c9549b32ad..bb5c1eaca52b 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -7,6 +7,7 @@
 
 #include "dm.h"
 #include "dm-bio-list.h"
+#include "dm-uevent.h"
 
 #include <linux/init.h>
 #include <linux/module.h>
@@ -143,11 +144,19 @@ static int __init local_init(void)
 		return -ENOMEM;
 	}
 
+	r = dm_uevent_init();
+	if (r) {
+		kmem_cache_destroy(_tio_cache);
+		kmem_cache_destroy(_io_cache);
+		return r;
+	}
+
 	_major = major;
 	r = register_blkdev(_major, _name);
 	if (r < 0) {
 		kmem_cache_destroy(_tio_cache);
 		kmem_cache_destroy(_io_cache);
+		dm_uevent_exit();
 		return r;
 	}
 
@@ -162,6 +171,7 @@ static void local_exit(void)
 	kmem_cache_destroy(_tio_cache);
 	kmem_cache_destroy(_io_cache);
 	unregister_blkdev(_major, _name);
+	dm_uevent_exit();
 
 	_major = 0;
 
-- 
cgit v1.2.3


From 7a8c3d3b92883798e4ead21dd48c16db0ec0ff6f Mon Sep 17 00:00:00 2001
From: Mike Anderson <andmike@linux.vnet.ibm.com>
Date: Fri, 19 Oct 2007 22:48:01 +0100
Subject: dm: uevent generate events

This patch adds support for the dm_path_event dm_send_event functions which
create and send udev events.

Signed-off-by: Mike Anderson <andmike@linux.vnet.ibm.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 Documentation/device-mapper/dm-uevent.txt |  97 +++++++++++++++++++
 drivers/md/dm-uevent.c                    | 150 ++++++++++++++++++++++++++++++
 drivers/md/dm-uevent.h                    |  18 ++++
 drivers/md/dm.c                           |  28 ++++++
 include/linux/device-mapper.h             |   2 +
 5 files changed, 295 insertions(+)
 create mode 100644 Documentation/device-mapper/dm-uevent.txt

diff --git a/Documentation/device-mapper/dm-uevent.txt b/Documentation/device-mapper/dm-uevent.txt
new file mode 100644
index 000000000000..07edbd85c714
--- /dev/null
+++ b/Documentation/device-mapper/dm-uevent.txt
@@ -0,0 +1,97 @@
+The device-mapper uevent code adds the capability to device-mapper to create
+and send kobject uevents (uevents).  Previously device-mapper events were only
+available through the ioctl interface.  The advantage of the uevents interface
+is the event contains environment attributes providing increased context for
+the event avoiding the need to query the state of the device-mapper device after
+the event is received.
+
+There are two functions currently for device-mapper events.  The first function
+listed creates the event and the second function sends the event(s).
+
+void dm_path_uevent(enum dm_uevent_type event_type, struct dm_target *ti,
+                    const char *path, unsigned nr_valid_paths)
+
+void dm_send_uevents(struct list_head *events, struct kobject *kobj)
+
+
+The variables added to the uevent environment are:
+
+Variable Name: DM_TARGET
+Uevent Action(s): KOBJ_CHANGE
+Type: string
+Description:
+Value: Name of device-mapper target that generated the event.
+
+Variable Name: DM_ACTION
+Uevent Action(s): KOBJ_CHANGE
+Type: string
+Description:
+Value: Device-mapper specific action that caused the uevent action.
+	PATH_FAILED - A path has failed.
+	PATH_REINSTATED - A path has been reinstated.
+
+Variable Name: DM_SEQNUM
+Uevent Action(s): KOBJ_CHANGE
+Type: unsigned integer
+Description: A sequence number for this specific device-mapper device.
+Value: Valid unsigned integer range.
+
+Variable Name: DM_PATH
+Uevent Action(s): KOBJ_CHANGE
+Type: string
+Description: Major and minor number of the path device pertaining to this
+event.
+Value: Path name in the form of "Major:Minor"
+
+Variable Name: DM_NR_VALID_PATHS
+Uevent Action(s): KOBJ_CHANGE
+Type: unsigned integer
+Description:
+Value: Valid unsigned integer range.
+
+Variable Name: DM_NAME
+Uevent Action(s): KOBJ_CHANGE
+Type: string
+Description: Name of the device-mapper device.
+Value: Name
+
+Variable Name: DM_UUID
+Uevent Action(s): KOBJ_CHANGE
+Type: string
+Description: UUID of the device-mapper device.
+Value: UUID. (Empty string if there isn't one.)
+
+An example of the uevents generated as captured by udevmonitor is shown
+below.
+
+1.) Path failure.
+UEVENT[1192521009.711215] change@/block/dm-3
+ACTION=change
+DEVPATH=/block/dm-3
+SUBSYSTEM=block
+DM_TARGET=multipath
+DM_ACTION=PATH_FAILED
+DM_SEQNUM=1
+DM_PATH=8:32
+DM_NR_VALID_PATHS=0
+DM_NAME=mpath2
+DM_UUID=mpath-35333333000002328
+MINOR=3
+MAJOR=253
+SEQNUM=1130
+
+2.) Path reinstate.
+UEVENT[1192521132.989927] change@/block/dm-3
+ACTION=change
+DEVPATH=/block/dm-3
+SUBSYSTEM=block
+DM_TARGET=multipath
+DM_ACTION=PATH_REINSTATED
+DM_SEQNUM=2
+DM_PATH=8:32
+DM_NR_VALID_PATHS=1
+DM_NAME=mpath2
+DM_UUID=mpath-35333333000002328
+MINOR=3
+MAJOR=253
+SEQNUM=1131
diff --git a/drivers/md/dm-uevent.c b/drivers/md/dm-uevent.c
index 53200c96bcb4..50377e5dc2a3 100644
--- a/drivers/md/dm-uevent.c
+++ b/drivers/md/dm-uevent.c
@@ -21,12 +21,22 @@
 #include <linux/list.h>
 #include <linux/slab.h>
 #include <linux/kobject.h>
+#include <linux/dm-ioctl.h>
 
 #include "dm.h"
 #include "dm-uevent.h"
 
 #define DM_MSG_PREFIX "uevent"
 
+static const struct {
+	enum dm_uevent_type type;
+	enum kobject_action action;
+	char *name;
+} _dm_uevent_type_names[] = {
+	{DM_UEVENT_PATH_FAILED, KOBJ_CHANGE, "PATH_FAILED"},
+	{DM_UEVENT_PATH_REINSTATED, KOBJ_CHANGE, "PATH_REINSTATED"},
+};
+
 static struct kmem_cache *_dm_event_cache;
 
 struct dm_uevent {
@@ -34,6 +44,8 @@ struct dm_uevent {
 	enum kobject_action action;
 	struct kobj_uevent_env ku_env;
 	struct list_head elist;
+	char name[DM_NAME_LEN];
+	char uuid[DM_UUID_LEN];
 };
 
 static void dm_uevent_free(struct dm_uevent *event)
@@ -55,6 +67,144 @@ static struct dm_uevent *dm_uevent_alloc(struct mapped_device *md)
 	return event;
 }
 
+static struct dm_uevent *dm_build_path_uevent(struct mapped_device *md,
+					      struct dm_target *ti,
+					      enum kobject_action action,
+					      const char *dm_action,
+					      const char *path,
+					      unsigned nr_valid_paths)
+{
+	struct dm_uevent *event;
+
+	event = dm_uevent_alloc(md);
+	if (!event) {
+		DMERR("%s: dm_uevent_alloc() failed", __FUNCTION__);
+		goto err_nomem;
+	}
+
+	event->action = action;
+
+	if (add_uevent_var(&event->ku_env, "DM_TARGET=%s", ti->type->name)) {
+		DMERR("%s: add_uevent_var() for DM_TARGET failed",
+		      __FUNCTION__);
+		goto err_add;
+	}
+
+	if (add_uevent_var(&event->ku_env, "DM_ACTION=%s", dm_action)) {
+		DMERR("%s: add_uevent_var() for DM_ACTION failed",
+		      __FUNCTION__);
+		goto err_add;
+	}
+
+	if (add_uevent_var(&event->ku_env, "DM_SEQNUM=%u",
+			   dm_next_uevent_seq(md))) {
+		DMERR("%s: add_uevent_var() for DM_SEQNUM failed",
+		      __FUNCTION__);
+		goto err_add;
+	}
+
+	if (add_uevent_var(&event->ku_env, "DM_PATH=%s", path)) {
+		DMERR("%s: add_uevent_var() for DM_PATH failed", __FUNCTION__);
+		goto err_add;
+	}
+
+	if (add_uevent_var(&event->ku_env, "DM_NR_VALID_PATHS=%d",
+			   nr_valid_paths)) {
+		DMERR("%s: add_uevent_var() for DM_NR_VALID_PATHS failed",
+		      __FUNCTION__);
+		goto err_add;
+	}
+
+	return event;
+
+err_add:
+	dm_uevent_free(event);
+err_nomem:
+	return ERR_PTR(-ENOMEM);
+}
+
+/**
+ * dm_send_uevents - send uevents for given list
+ *
+ * @events:	list of events to send
+ * @kobj:	kobject generating event
+ *
+ */
+void dm_send_uevents(struct list_head *events, struct kobject *kobj)
+{
+	int r;
+	struct dm_uevent *event, *next;
+
+	list_for_each_entry_safe(event, next, events, elist) {
+		list_del_init(&event->elist);
+
+		/*
+		 * Need to call dm_copy_name_and_uuid from here for now.
+		 * Context of previous var adds and locking used for
+		 * hash_cell not compatable.
+		 */
+		if (dm_copy_name_and_uuid(event->md, event->name,
+					  event->uuid)) {
+			DMERR("%s: dm_copy_name_and_uuid() failed",
+			      __FUNCTION__);
+			goto uevent_free;
+		}
+
+		if (add_uevent_var(&event->ku_env, "DM_NAME=%s", event->name)) {
+			DMERR("%s: add_uevent_var() for DM_NAME failed",
+			      __FUNCTION__);
+			goto uevent_free;
+		}
+
+		if (add_uevent_var(&event->ku_env, "DM_UUID=%s", event->uuid)) {
+			DMERR("%s: add_uevent_var() for DM_UUID failed",
+			      __FUNCTION__);
+			goto uevent_free;
+		}
+
+		r = kobject_uevent_env(kobj, event->action, event->ku_env.envp);
+		if (r)
+			DMERR("%s: kobject_uevent_env failed", __FUNCTION__);
+uevent_free:
+		dm_uevent_free(event);
+	}
+}
+EXPORT_SYMBOL_GPL(dm_send_uevents);
+
+/**
+ * dm_path_uevent - called to create a new path event and queue it
+ *
+ * @event_type:	path event type enum
+ * @ti:			pointer to a dm_target
+ * @path:		string containing pathname
+ * @nr_valid_paths:	number of valid paths remaining
+ *
+ */
+void dm_path_uevent(enum dm_uevent_type event_type, struct dm_target *ti,
+		   const char *path, unsigned nr_valid_paths)
+{
+	struct mapped_device *md = dm_table_get_md(ti->table);
+	struct dm_uevent *event;
+
+	if (event_type >= ARRAY_SIZE(_dm_uevent_type_names)) {
+		DMERR("%s: Invalid event_type %d", __FUNCTION__, event_type);
+		goto out;
+	}
+
+	event = dm_build_path_uevent(md, ti,
+				     _dm_uevent_type_names[event_type].action,
+				     _dm_uevent_type_names[event_type].name,
+				     path, nr_valid_paths);
+	if (IS_ERR(event))
+		goto out;
+
+	dm_uevent_add(md, &event->elist);
+
+out:
+	dm_put(md);
+}
+EXPORT_SYMBOL_GPL(dm_path_uevent);
+
 int dm_uevent_init(void)
 {
 	_dm_event_cache = KMEM_CACHE(dm_uevent, 0);
diff --git a/drivers/md/dm-uevent.h b/drivers/md/dm-uevent.h
index 9d776836489b..2eccc8bd671a 100644
--- a/drivers/md/dm-uevent.h
+++ b/drivers/md/dm-uevent.h
@@ -21,10 +21,19 @@
 #ifndef DM_UEVENT_H
 #define DM_UEVENT_H
 
+enum dm_uevent_type {
+	DM_UEVENT_PATH_FAILED,
+	DM_UEVENT_PATH_REINSTATED,
+};
+
 #ifdef CONFIG_DM_UEVENT
 
 extern int dm_uevent_init(void);
 extern void dm_uevent_exit(void);
+extern void dm_send_uevents(struct list_head *events, struct kobject *kobj);
+extern void dm_path_uevent(enum dm_uevent_type event_type,
+			   struct dm_target *ti, const char *path,
+			   unsigned nr_valid_paths);
 
 #else
 
@@ -35,6 +44,15 @@ static inline int dm_uevent_init(void)
 static inline void dm_uevent_exit(void)
 {
 }
+static inline void dm_send_uevents(struct list_head *events,
+				   struct kobject *kobj)
+{
+}
+static inline void dm_path_uevent(enum dm_uevent_type event_type,
+				  struct dm_target *ti, const char *path,
+				  unsigned nr_valid_paths)
+{
+}
 
 #endif	/* CONFIG_DM_UEVENT */
 
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index bb5c1eaca52b..07cbbb8eb3e0 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -113,6 +113,9 @@ struct mapped_device {
 	 */
 	atomic_t event_nr;
 	wait_queue_head_t eventq;
+	atomic_t uevent_seq;
+	struct list_head uevent_list;
+	spinlock_t uevent_lock; /* Protect access to uevent_list */
 
 	/*
 	 * freeze/thaw support require holding onto a super block
@@ -985,6 +988,9 @@ static struct mapped_device *alloc_dev(int minor)
 	atomic_set(&md->holders, 1);
 	atomic_set(&md->open_count, 0);
 	atomic_set(&md->event_nr, 0);
+	atomic_set(&md->uevent_seq, 0);
+	INIT_LIST_HEAD(&md->uevent_list);
+	spin_lock_init(&md->uevent_lock);
 
 	md->queue = blk_alloc_queue(GFP_KERNEL);
 	if (!md->queue)
@@ -1083,8 +1089,16 @@ static void free_dev(struct mapped_device *md)
  */
 static void event_callback(void *context)
 {
+	unsigned long flags;
+	LIST_HEAD(uevents);
 	struct mapped_device *md = (struct mapped_device *) context;
 
+	spin_lock_irqsave(&md->uevent_lock, flags);
+	list_splice_init(&md->uevent_list, &uevents);
+	spin_unlock_irqrestore(&md->uevent_lock, flags);
+
+	dm_send_uevents(&uevents, &md->disk->kobj);
+
 	atomic_inc(&md->event_nr);
 	wake_up(&md->eventq);
 }
@@ -1502,6 +1516,11 @@ out:
 /*-----------------------------------------------------------------
  * Event notification.
  *---------------------------------------------------------------*/
+uint32_t dm_next_uevent_seq(struct mapped_device *md)
+{
+	return atomic_add_return(1, &md->uevent_seq);
+}
+
 uint32_t dm_get_event_nr(struct mapped_device *md)
 {
 	return atomic_read(&md->event_nr);
@@ -1513,6 +1532,15 @@ int dm_wait_event(struct mapped_device *md, int event_nr)
 			(event_nr != atomic_read(&md->event_nr)));
 }
 
+void dm_uevent_add(struct mapped_device *md, struct list_head *elist)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&md->uevent_lock, flags);
+	list_add(elist, &md->uevent_list);
+	spin_unlock_irqrestore(&md->uevent_lock, flags);
+}
+
 /*
  * The gendisk is only valid as long as you have a reference
  * count on 'md'.
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 8b3c7cdc599c..37c66d1254b5 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -183,6 +183,8 @@ int dm_resume(struct mapped_device *md);
  */
 uint32_t dm_get_event_nr(struct mapped_device *md);
 int dm_wait_event(struct mapped_device *md, int event_nr);
+uint32_t dm_next_uevent_seq(struct mapped_device *md);
+void dm_uevent_add(struct mapped_device *md, struct list_head *elist);
 
 /*
  * Info functions.
-- 
cgit v1.2.3


From b15546f942c09fe55ad55af31841eecacbf4ffd1 Mon Sep 17 00:00:00 2001
From: Mike Anderson <andmike@linux.vnet.ibm.com>
Date: Fri, 19 Oct 2007 22:48:02 +0100
Subject: dm mpath: send uevents

This patch adds calls to dm_path_event for a failed path and a reinstated
path.

Signed-off-by: Mike Anderson <andmike@linux.vnet.ibm.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-mpath.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index dd5ad6310f54..24b2b1e32fae 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -10,6 +10,7 @@
 #include "dm-hw-handler.h"
 #include "dm-bio-list.h"
 #include "dm-bio-record.h"
+#include "dm-uevent.h"
 
 #include <linux/ctype.h>
 #include <linux/init.h>
@@ -857,6 +858,9 @@ static int fail_path(struct pgpath *pgpath)
 	if (pgpath == m->current_pgpath)
 		m->current_pgpath = NULL;
 
+	dm_path_uevent(DM_UEVENT_PATH_FAILED, m->ti,
+		      pgpath->path.dev->name, m->nr_valid_paths);
+
 	queue_work(kmultipathd, &m->trigger_event);
 
 out:
@@ -896,6 +900,9 @@ static int reinstate_path(struct pgpath *pgpath)
 	if (!m->nr_valid_paths++ && m->queue_size)
 		queue_work(kmultipathd, &m->process_queued_ios);
 
+	dm_path_uevent(DM_UEVENT_PATH_REINSTATED, m->ti,
+		      pgpath->path.dev->name, m->nr_valid_paths);
+
 	queue_work(kmultipathd, &m->trigger_event);
 
 out:
-- 
cgit v1.2.3


From 80fd662683be5dc2a3b41b27e30942d5fd7b5d5c Mon Sep 17 00:00:00 2001
From: Milan Broz <mbroz@redhat.com>
Date: Sat, 20 Oct 2007 01:14:21 +0100
Subject: dm crypt: tidy pending

Add crypt prefix to dec_pending to avoid confusing it in backtraces with
the dm core function of the same name.

No functional change here.

Signed-off-by: Milan Broz <mbroz@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-crypt.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 64ffa0ea8ca4..b41f945df8a1 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -460,7 +460,7 @@ static void crypt_free_buffer_pages(struct crypt_config *cc, struct bio *clone)
  * One of the bios was finished. Check for completion of
  * the whole request and correctly clean up the buffer.
  */
-static void dec_pending(struct dm_crypt_io *io, int error)
+static void crypt_dec_pending(struct dm_crypt_io *io, int error)
 {
 	struct crypt_config *cc = (struct crypt_config *) io->target->private;
 
@@ -533,7 +533,7 @@ static void crypt_endio(struct bio *clone, int error)
 
 out:
 	bio_put(clone);
-	dec_pending(io, error);
+	crypt_dec_pending(io, error);
 }
 
 static void clone_init(struct dm_crypt_io *io, struct bio *clone)
@@ -563,7 +563,7 @@ static void process_read(struct dm_crypt_io *io)
 	 */
 	clone = bio_alloc_bioset(GFP_NOIO, bio_segments(base_bio), cc->bs);
 	if (unlikely(!clone)) {
-		dec_pending(io, -ENOMEM);
+		crypt_dec_pending(io, -ENOMEM);
 		return;
 	}
 
@@ -598,7 +598,7 @@ static void process_write(struct dm_crypt_io *io)
 	while (remaining) {
 		clone = crypt_alloc_buffer(io, remaining);
 		if (unlikely(!clone)) {
-			dec_pending(io, -ENOMEM);
+			crypt_dec_pending(io, -ENOMEM);
 			return;
 		}
 
@@ -608,7 +608,7 @@ static void process_write(struct dm_crypt_io *io)
 		if (unlikely(crypt_convert(cc, &ctx) < 0)) {
 			crypt_free_buffer_pages(cc, clone);
 			bio_put(clone);
-			dec_pending(io, -EIO);
+			crypt_dec_pending(io, -EIO);
 			return;
 		}
 
@@ -643,7 +643,7 @@ static void process_read_endio(struct dm_crypt_io *io)
 	crypt_convert_init(cc, &ctx, io->base_bio, io->base_bio,
 			   io->base_bio->bi_sector - io->target->begin, 0);
 
-	dec_pending(io, crypt_convert(cc, &ctx));
+	crypt_dec_pending(io, crypt_convert(cc, &ctx));
 }
 
 static void kcryptd_do_work(struct work_struct *work)
-- 
cgit v1.2.3