diff options
author | Robert Becker <Rob.Becker@riverbed.com> | 2009-12-14 12:49:58 +1100 |
---|---|---|
committer | NeilBrown <neilb@suse.de> | 2009-12-14 12:51:41 +1100 |
commit | 1e50915fe0bbf7a46db0fa7e1e604d3fc95f057d (patch) | |
tree | 7a722ad6f56c61a6173493f1cd44d809c8b1bd8d /drivers/md/md.c | |
parent | 67b8dc4b06b0e97df55fd76e209f34f9a52e820e (diff) | |
download | linux-1e50915fe0bbf7a46db0fa7e1e604d3fc95f057d.tar.bz2 |
raid: improve MD/raid10 handling of correctable read errors.
We've noticed severe lasting performance degradation of our raid
arrays when we have drives that yield large amounts of media errors.
The raid10 module will queue each failed read for retry, and also
will attempt call fix_read_error() to perform the read recovery.
Read recovery is performed while the array is frozen, so repeated
recovery attempts can degrade the performance of the array for
extended periods of time.
With this patch I propose adding a per md device max number of
corrected read attempts. Each rdev will maintain a count of
read correction attempts in the rdev->read_errors field (not
used currently for raid10). When we enter fix_read_error()
we'll check to see when the last read error occurred, and
divide the read error count by 2 for every hour since the
last read error. If at that point our read error count
exceeds the read error threshold, we'll fail the raid device.
In addition in this patch I add sysfs nodes (get/set) for
the per md max_read_errors attribute, the rdev->read_errors
attribute, and added some printk's to indicate when
fix_read_error fails to repair an rdev.
For testing I used debugfs->fail_make_request to inject
IO errors to the rdev while doing IO to the raid array.
Signed-off-by: Robert Becker <Rob.Becker@riverbed.com>
Signed-off-by: NeilBrown <neilb@suse.de>
Diffstat (limited to 'drivers/md/md.c')
-rw-r--r-- | drivers/md/md.c | 34 |
1 files changed, 34 insertions, 0 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c index 859edbf8c9b0..f1b905a20133 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -68,6 +68,12 @@ static DECLARE_WAIT_QUEUE_HEAD(resync_wait); #define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); } /* + * Default number of read corrections we'll attempt on an rdev + * before ejecting it from the array. We divide the read error + * count by 2 for every hour elapsed between read errors. + */ +#define MD_DEFAULT_MAX_CORRECTED_READ_ERRORS 20 +/* * Current RAID-1,4,5 parallel reconstruction 'guaranteed speed limit' * is 1000 KB/sec, so the extra system load does not show up that much. * Increase it if you want to have more _guaranteed_ speed. Note that @@ -2653,6 +2659,8 @@ static mdk_rdev_t *md_import_device(dev_t newdev, int super_format, int super_mi rdev->flags = 0; rdev->data_offset = 0; rdev->sb_events = 0; + rdev->last_read_error.tv_sec = 0; + rdev->last_read_error.tv_nsec = 0; atomic_set(&rdev->nr_pending, 0); atomic_set(&rdev->read_errors, 0); atomic_set(&rdev->corrected_errors, 0); @@ -3290,6 +3298,29 @@ static struct md_sysfs_entry md_array_state = __ATTR(array_state, S_IRUGO|S_IWUSR, array_state_show, array_state_store); static ssize_t +max_corrected_read_errors_show(mddev_t *mddev, char *page) { + return sprintf(page, "%d\n", + atomic_read(&mddev->max_corr_read_errors)); +} + +static ssize_t +max_corrected_read_errors_store(mddev_t *mddev, const char *buf, size_t len) +{ + char *e; + unsigned long n = simple_strtoul(buf, &e, 10); + + if (*buf && (*e == 0 || *e == '\n')) { + atomic_set(&mddev->max_corr_read_errors, n); + return len; + } + return -EINVAL; +} + +static struct md_sysfs_entry max_corr_read_errors = +__ATTR(max_read_errors, S_IRUGO|S_IWUSR, max_corrected_read_errors_show, + max_corrected_read_errors_store); + +static ssize_t null_show(mddev_t *mddev, char *page) { return -EINVAL; @@ -3914,6 +3945,7 @@ static struct attribute *md_default_attrs[] = { &md_array_state.attr, &md_reshape_position.attr, &md_array_size.attr, + &max_corr_read_errors.attr, NULL, }; @@ -4333,6 +4365,8 @@ static int do_md_run(mddev_t * mddev) mddev->ro = 0; atomic_set(&mddev->writes_pending,0); + atomic_set(&mddev->max_corr_read_errors, + MD_DEFAULT_MAX_CORRECTED_READ_ERRORS); mddev->safemode = 0; mddev->safemode_timer.function = md_safemode_timeout; mddev->safemode_timer.data = (unsigned long) mddev; |