From: Hannes Reinecke <hare@suse.com>
Date: Thu, 12 Jul 2012 08:36:52 +0200
Subject: md: display timeout errors in /etc/mdstat etc
Patch-mainline: Not yet, failfast is poorly defined
References: bnc#763402
Track whether a device failed due to a timeout or some other reason.
If due to a timeout, set a flag so that it can be reported.
Acked-by: NeilBrown <neilb@suse.de>
Signed-off-by: Neil Brown <neilb@suse.de>
---
drivers/md/md.c | 44 +++++++++++++++++++++++++++++++++--------
drivers/md/md.h | 3 ++
drivers/md/raid10.c | 42 +++++++++++++++++++++++++++++++++------
drivers/md/raid10.h | 1
include/uapi/linux/raid/md_p.h | 2 +
5 files changed, 78 insertions(+), 14 deletions(-)
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -724,8 +724,10 @@ static void super_written(struct bio *bi
if (bio->bi_error) {
pr_err("md: super_written gets error=%d\n", bio->bi_error);
md_error(mddev, rdev);
- if (!test_bit(Faulty, &rdev->flags)
- && (bio->bi_opf & MD_FAILFAST)) {
+ if (test_bit(Faulty, &rdev->flags)) {
+ if (bio->bi_error == -ETIMEDOUT)
+ set_bit(Timeout, &rdev->flags);
+ } else if (bio->bi_opf & MD_FAILFAST) {
set_bit(MD_SB_NEED_REWRITE, &mddev->sb_flags);
set_bit(LastDev, &rdev->flags);
}
@@ -1072,6 +1074,7 @@ static int super_90_validate(struct mdde
rdev->raid_disk = -1;
clear_bit(Faulty, &rdev->flags);
+ clear_bit(Timeout, &rdev->flags);
clear_bit(In_sync, &rdev->flags);
clear_bit(Bitmap_sync, &rdev->flags);
clear_bit(WriteMostly, &rdev->flags);
@@ -1568,6 +1571,7 @@ static int super_1_validate(struct mddev
rdev->raid_disk = -1;
clear_bit(Faulty, &rdev->flags);
+ clear_bit(Timeout, &rdev->flags);
clear_bit(In_sync, &rdev->flags);
clear_bit(Bitmap_sync, &rdev->flags);
clear_bit(WriteMostly, &rdev->flags);
@@ -1685,6 +1689,9 @@ static int super_1_validate(struct mddev
break;
case MD_DISK_ROLE_JOURNAL: /* journal device */
if (!(le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL)) {
+ /* probably legacy 'timed-out' device */
+ if (mddev->level == 10 || mddev->level == 1)
+ goto timeout;
/* journal device without journal feature */
pr_warn("md: journal device provided without journal feature, ignoring the device\n");
return -EINVAL;
@@ -1693,6 +1700,11 @@ static int super_1_validate(struct mddev
rdev->journal_tail = le64_to_cpu(sb->journal_tail);
rdev->raid_disk = 0;
break;
+ case MD_DISK_ROLE_TIMEOUT: /* faulty, timeout */
+ timeout:
+ set_bit(Faulty, &rdev->flags);
+ set_bit(Timeout, &rdev->flags);
+ break;
default:
rdev->saved_raid_disk = role;
if ((le32_to_cpu(sb->feature_map) &
@@ -1867,9 +1879,12 @@ retry:
rdev_for_each(rdev2, mddev) {
i = rdev2->desc_nr;
- if (test_bit(Faulty, &rdev2->flags))
- sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_FAULTY);
- else if (test_bit(In_sync, &rdev2->flags))
+ if (test_bit(Faulty, &rdev2->flags)) {
+ if (test_bit(Timeout, &rdev2->flags))
+ sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_TIMEOUT);
+ else
+ sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_FAULTY);
+ } else if (test_bit(In_sync, &rdev2->flags))
sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
else if (test_bit(Journal, &rdev2->flags))
sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_JOURNAL);
@@ -2589,6 +2604,8 @@ state_show(struct md_rdev *rdev, char *p
(!test_bit(ExternalBbl, &flags) &&
rdev->badblocks.unacked_exist))
len += sprintf(page+len, "faulty%s", sep);
+ if (test_bit(Timeout, &flags))
+ len += sprintf(page+len, "timeout%s", sep);
if (test_bit(In_sync, &flags))
len += sprintf(page+len, "in_sync%s", sep);
if (test_bit(Journal, &flags))
@@ -2644,6 +2661,11 @@ state_store(struct md_rdev *rdev, const
err = 0;
else
err = -EBUSY;
+ } else if (cmd_match(buf, "timeout") && rdev->mddev->pers) {
+ md_error(rdev->mddev, rdev);
+ if (test_bit(Faulty, &rdev->flags))
+ set_bit(Timeout, &rdev->flags);
+ err = 0;
} else if (cmd_match(buf, "remove")) {
if (rdev->mddev->pers) {
clear_bit(Blocked, &rdev->flags);
@@ -2890,6 +2912,7 @@ slot_store(struct md_rdev *rdev, const c
rdev->raid_disk = slot;
/* assume it is working */
clear_bit(Faulty, &rdev->flags);
+ clear_bit(Timeout, &rdev->flags);
clear_bit(WriteMostly, &rdev->flags);
set_bit(In_sync, &rdev->flags);
sysfs_notify_dirent_safe(rdev->sysfs_state);
@@ -6008,9 +6031,11 @@ static int get_disk_info(struct mddev *m
info.minor = MINOR(rdev->bdev->bd_dev);
info.raid_disk = rdev->raid_disk;
info.state = 0;
- if (test_bit(Faulty, &rdev->flags))
+ if (test_bit(Faulty, &rdev->flags)) {
info.state |= (1<<MD_DISK_FAULTY);
- else if (test_bit(In_sync, &rdev->flags)) {
+ if (test_bit(Timeout, &rdev->flags))
+ info.state |= (1<<MD_DISK_TIMEOUT);
+ } else if (test_bit(In_sync, &rdev->flags)) {
info.state |= (1<<MD_DISK_ACTIVE);
info.state |= (1<<MD_DISK_SYNC);
}
@@ -7549,7 +7574,10 @@ static int md_seq_show(struct seq_file *
if (test_bit(Journal, &rdev->flags))
seq_printf(seq, "(J)");
if (test_bit(Faulty, &rdev->flags)) {
- seq_printf(seq, "(F)");
+ if (test_bit(Timeout, &rdev->flags))
+ seq_printf(seq, "(T)");
+ else
+ seq_printf(seq, "(F)");
continue;
}
if (rdev->raid_disk < 0)
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -198,6 +198,9 @@ enum flag_bits {
* it didn't fail, so don't use FailFast
* any more for metadata
*/
+ Timeout, /* Device fault due to timeout.
+ * 'Faulty' is required to be set.
+ */
};
static inline int is_badblock(struct md_rdev *rdev, sector_t s, int sectors,
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -375,6 +375,7 @@ static void raid10_end_read_request(stru
slot = r10_bio->read_slot;
dev = r10_bio->devs[slot].devnum;
rdev = r10_bio->devs[slot].rdev;
+ r10_bio->devs[slot].error = bio->bi_error;
/*
* this branch is our 'one mirror IO has finished' event handler:
*/
@@ -465,6 +466,7 @@ static void raid10_end_write_request(str
repl = 0;
rdev = conf->mirrors[dev].rdev;
}
+ r10_bio->devs[slot].error = bio->bi_error;
/*
* this branch is our 'one mirror IO has finished' event handler:
*/
@@ -491,6 +493,8 @@ static void raid10_end_write_request(str
*/
set_bit(R10BIO_WriteError, &r10_bio->state);
else {
+ if (bio->bi_error == -ETIMEDOUT)
+ set_bit(Timeout, &rdev->flags);
r10_bio->devs[slot].bio = NULL;
to_put = bio;
dec_rdev = 1;
@@ -890,7 +894,10 @@ static void flush_pending_writes(struct
bio->bi_next = NULL;
bio->bi_bdev = rdev->bdev;
if (test_bit(Faulty, &rdev->flags)) {
- bio->bi_error = -EIO;
+ if (test_bit(Timeout, &rdev->flags))
+ bio->bi_error = -ETIMEDOUT;
+ else
+ bio->bi_error = -EIO;
bio_endio(bio);
} else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) &&
!blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
@@ -1075,7 +1082,10 @@ static void raid10_unplug(struct blk_plu
bio->bi_next = NULL;
bio->bi_bdev = rdev->bdev;
if (test_bit(Faulty, &rdev->flags)) {
- bio->bi_error = -EIO;
+ if (test_bit(Timeout, &rdev->flags))
+ bio->bi_error = -ETIMEDOUT;
+ else
+ bio->bi_error = -EIO;
bio_endio(bio);
} else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) &&
!blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
@@ -2111,6 +2121,9 @@ static void sync_request_write(struct md
} else if (test_bit(FailFast, &rdev->flags)) {
/* Just give up on this device */
md_error(rdev->mddev, rdev);
+ if (test_bit(Faulty, &rdev->flags) &&
+ r10_bio->devs[i].error == -ETIMEDOUT)
+ set_bit(Timeout, &rdev->flags);
continue;
}
/* Ok, we need to write this bio, either to correct an
@@ -2371,6 +2384,7 @@ static void fix_read_error(struct r10con
struct md_rdev*rdev;
int max_read_errors = atomic_read(&mddev->max_corr_read_errors);
int d = r10_bio->devs[r10_bio->read_slot].devnum;
+ int read_error = r10_bio->devs[r10_bio->read_slot].error;
/* still own a reference to this rdev, so it cannot
* have been cleared recently.
@@ -2394,6 +2408,9 @@ static void fix_read_error(struct r10con
pr_notice("md/raid10:%s: %s: Failing raid device\n",
mdname(mddev), b);
md_error(mddev, rdev);
+ if (test_bit(Faulty, &rdev->flags) &&
+ read_error == -ETIMEDOUT)
+ set_bit(Timeout, &rdev->flags);
r10_bio->devs[r10_bio->read_slot].bio = IO_BLOCKED;
return;
}
@@ -2643,9 +2660,12 @@ static void handle_read_error(struct mdd
freeze_array(conf, 1);
fix_read_error(conf, mddev, r10_bio);
unfreeze_array(conf);
- } else
+ } else {
md_error(mddev, rdev);
-
+ if (test_bit(Faulty, &rdev->flags) &&
+ r10_bio->devs[slot].error == -ETIMEDOUT)
+ set_bit(Timeout, &rdev->flags);
+ }
rdev_dec_pending(rdev, mddev);
read_more:
@@ -2740,8 +2760,12 @@ static void handle_write_completed(struc
if (!rdev_set_badblocks(
rdev,
r10_bio->devs[m].addr,
- r10_bio->sectors, 0))
+ r10_bio->sectors, 0)) {
md_error(conf->mddev, rdev);
+ if (test_bit(Faulty, &rdev->flags) &&
+ r10_bio->devs[m].error == -ETIMEDOUT)
+ set_bit(Timeout, &rdev->flags);
+ }
}
rdev = conf->mirrors[dev].replacement;
if (r10_bio->devs[m].repl_bio == NULL)
@@ -2756,8 +2780,12 @@ static void handle_write_completed(struc
if (!rdev_set_badblocks(
rdev,
r10_bio->devs[m].addr,
- r10_bio->sectors, 0))
+ r10_bio->sectors, 0)) {
md_error(conf->mddev, rdev);
+ if (test_bit(Faulty, &rdev->flags) &&
+ r10_bio->devs[m].error == -ETIMEDOUT)
+ set_bit(Timeout, &rdev->flags);
+ }
}
}
put_buf(r10_bio);
@@ -4757,6 +4785,8 @@ static void end_reshape_write(struct bio
if (bio->bi_error) {
/* FIXME should record badblock */
md_error(mddev, rdev);
+ if (test_bit(Faulty, &rdev->flags) && bio->bi_error == -ETIMEDOUT)
+ set_bit(Timeout, &rdev->flags);
}
rdev_dec_pending(rdev, mddev);
--- a/drivers/md/raid10.h
+++ b/drivers/md/raid10.h
@@ -132,6 +132,7 @@ struct r10bio {
};
sector_t addr;
int devnum;
+ int error;
} devs[0];
};
--- a/include/uapi/linux/raid/md_p.h
+++ b/include/uapi/linux/raid/md_p.h
@@ -88,6 +88,7 @@
* devices available - and don't try to
* correct read errors.
*/
+#define MD_DISK_TIMEOUT 11 /* disk is faulty due to timeout */
#define MD_DISK_WRITEMOSTLY 9 /* disk is "write-mostly" is RAID1 config.
* read requests will only be sent here in
@@ -98,6 +99,7 @@
#define MD_DISK_ROLE_SPARE 0xffff
#define MD_DISK_ROLE_FAULTY 0xfffe
#define MD_DISK_ROLE_JOURNAL 0xfffd
+#define MD_DISK_ROLE_TIMEOUT 0xfff0 /* SUSE-only timed-out */
#define MD_DISK_ROLE_MAX 0xff00 /* max value of regular disk role */
typedef struct mdp_device_descriptor_s {