From: Hannes Reinecke <hare@suse.com>
Date: Thu, 12 Jul 2012 08:36:52 +0200
Subject: md: display timeout errors in /etc/mdstat etc
Patch-mainline: Not yet, failfast is poorly defined
References: bnc#763402
Track whether a device failed due to a timeout or some other reason.
If due to a timeout, set a flag so that it can be reported.
Acked-by: NeilBrown <neilb@suse.de>
Signed-off-by: Neil Brown <neilb@suse.de>
---
drivers/md/md.c | 44 +++++++++++++++++++++++++++++++-------
drivers/md/md.h | 3 ++
drivers/md/raid10.c | 47 +++++++++++++++++++++++++++++++++++------
drivers/md/raid10.h | 1
include/uapi/linux/raid/md_p.h | 2 +
5 files changed, 83 insertions(+), 14 deletions(-)
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -950,8 +950,10 @@ static void super_written(struct bio *bi
pr_err("md: %s gets error=%d\n", __func__,
blk_status_to_errno(bio->bi_status));
md_error(mddev, rdev);
- if (!test_bit(Faulty, &rdev->flags)
- && (bio->bi_opf & MD_FAILFAST)) {
+ if (!test_bit(Faulty, &rdev->flags)) {
+ if (bio->bi_status == BLK_STS_TIMEOUT)
+ set_bit(Timeout, &rdev->flags);
+ } else if (bio->bi_opf & MD_FAILFAST) {
set_bit(MD_SB_NEED_REWRITE, &mddev->sb_flags);
set_bit(LastDev, &rdev->flags);
}
@@ -1315,6 +1317,7 @@ static int super_90_validate(struct mdde
rdev->raid_disk = -1;
clear_bit(Faulty, &rdev->flags);
+ clear_bit(Timeout, &rdev->flags);
clear_bit(In_sync, &rdev->flags);
clear_bit(Bitmap_sync, &rdev->flags);
clear_bit(WriteMostly, &rdev->flags);
@@ -1829,6 +1832,7 @@ static int super_1_validate(struct mddev
rdev->raid_disk = -1;
clear_bit(Faulty, &rdev->flags);
+ clear_bit(Timeout, &rdev->flags);
clear_bit(In_sync, &rdev->flags);
clear_bit(Bitmap_sync, &rdev->flags);
clear_bit(WriteMostly, &rdev->flags);
@@ -1955,6 +1959,9 @@ static int super_1_validate(struct mddev
break;
case MD_DISK_ROLE_JOURNAL: /* journal device */
if (!(le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL)) {
+ /* probably legacy 'timed-out' device */
+ if (mddev->level == 10 || mddev->level == 1)
+ goto timeout;
/* journal device without journal feature */
pr_warn("md: journal device provided without journal feature, ignoring the device\n");
return -EINVAL;
@@ -1963,6 +1970,11 @@ static int super_1_validate(struct mddev
rdev->journal_tail = le64_to_cpu(sb->journal_tail);
rdev->raid_disk = 0;
break;
+ case MD_DISK_ROLE_TIMEOUT: /* faulty, timeout */
+ timeout:
+ set_bit(Faulty, &rdev->flags);
+ set_bit(Timeout, &rdev->flags);
+ break;
default:
rdev->saved_raid_disk = role;
if ((le32_to_cpu(sb->feature_map) &
@@ -2148,9 +2160,12 @@ retry:
rdev_for_each(rdev2, mddev) {
i = rdev2->desc_nr;
- if (test_bit(Faulty, &rdev2->flags))
- sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_FAULTY);
- else if (test_bit(In_sync, &rdev2->flags))
+ if (test_bit(Faulty, &rdev2->flags)) {
+ if (test_bit(Timeout, &rdev2->flags))
+ sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_TIMEOUT);
+ else
+ sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_FAULTY);
+ } else if (test_bit(In_sync, &rdev2->flags))
sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
else if (test_bit(Journal, &rdev2->flags))
sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_JOURNAL);
@@ -2942,6 +2957,8 @@ state_show(struct md_rdev *rdev, char *p
(!test_bit(ExternalBbl, &flags) &&
rdev->badblocks.unacked_exist))
len += sprintf(page+len, "faulty%s", sep);
+ if (test_bit(Timeout, &flags))
+ len += sprintf(page+len, "timeout%s", sep);
if (test_bit(In_sync, &flags))
len += sprintf(page+len, "in_sync%s", sep);
if (test_bit(Journal, &flags))
@@ -2997,6 +3014,11 @@ state_store(struct md_rdev *rdev, const
err = 0;
else
err = -EBUSY;
+ } else if (cmd_match(buf, "timeout") && rdev->mddev->pers) {
+ md_error(rdev->mddev, rdev);
+ if (test_bit(Faulty, &rdev->flags))
+ set_bit(Timeout, &rdev->flags);
+ err = 0;
} else if (cmd_match(buf, "remove")) {
if (rdev->mddev->pers) {
clear_bit(Blocked, &rdev->flags);
@@ -3247,6 +3269,7 @@ slot_store(struct md_rdev *rdev, const c
rdev->raid_disk = slot;
/* assume it is working */
clear_bit(Faulty, &rdev->flags);
+ clear_bit(Timeout, &rdev->flags);
clear_bit(WriteMostly, &rdev->flags);
set_bit(In_sync, &rdev->flags);
sysfs_notify_dirent_safe(rdev->sysfs_state);
@@ -6685,9 +6708,11 @@ static int get_disk_info(struct mddev *m
info.minor = MINOR(rdev->bdev->bd_dev);
info.raid_disk = rdev->raid_disk;
info.state = 0;
- if (test_bit(Faulty, &rdev->flags))
+ if (test_bit(Faulty, &rdev->flags)) {
info.state |= (1<<MD_DISK_FAULTY);
- else if (test_bit(In_sync, &rdev->flags)) {
+ if (test_bit(Timeout, &rdev->flags))
+ info.state |= (1<<MD_DISK_TIMEOUT);
+ } else if (test_bit(In_sync, &rdev->flags)) {
info.state |= (1<<MD_DISK_ACTIVE);
info.state |= (1<<MD_DISK_SYNC);
}
@@ -8261,7 +8286,10 @@ static int md_seq_show(struct seq_file *
if (test_bit(Journal, &rdev->flags))
seq_printf(seq, "(J)");
if (test_bit(Faulty, &rdev->flags)) {
- seq_printf(seq, "(F)");
+ if (test_bit(Timeout, &rdev->flags))
+ seq_printf(seq, "(T)");
+ else
+ seq_printf(seq, "(F)");
continue;
}
if (rdev->raid_disk < 0)
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -213,6 +213,9 @@ enum flag_bits {
* check if there is collision between raid1
* serial bios.
*/
+ Timeout, /* Device fault due to timeout.
+ * 'Faulty' is required to be set.
+ */
};
static inline int is_badblock(struct md_rdev *rdev, sector_t s, int sectors,
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -356,6 +356,7 @@ static void raid10_end_read_request(stru
slot = r10_bio->read_slot;
rdev = r10_bio->devs[slot].rdev;
+ r10_bio->devs[slot].error = bio->bi_status;
/*
* this branch is our 'one mirror IO has finished' event handler:
*/
@@ -446,6 +447,7 @@ static void raid10_end_write_request(str
repl = 0;
rdev = conf->mirrors[dev].rdev;
}
+ r10_bio->devs[slot].error = bio->bi_status;
/*
* this branch is our 'one mirror IO has finished' event handler:
*/
@@ -476,6 +478,8 @@ static void raid10_end_write_request(str
else {
/* Fail the request */
set_bit(R10BIO_Degraded, &r10_bio->state);
+ if (bio->bi_status == BLK_STS_TIMEOUT)
+ set_bit(Timeout, &rdev->flags);
r10_bio->devs[slot].bio = NULL;
to_put = bio;
dec_rdev = 1;
@@ -911,7 +915,11 @@ static void flush_pending_writes(struct
bio->bi_next = NULL;
bio_set_dev(bio, rdev->bdev);
if (test_bit(Faulty, &rdev->flags)) {
- bio_io_error(bio);
+ if (test_bit(Timeout, &rdev->flags))
+ bio->bi_status = BLK_STS_TIMEOUT;
+ else
+ bio->bi_status = BLK_STS_IOERR;
+ bio_endio(bio);
} else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) &&
!blk_queue_discard(bio->bi_bdev->bd_disk->queue)))
/* Just ignore it */
@@ -1104,7 +1112,11 @@ static void raid10_unplug(struct blk_plu
bio->bi_next = NULL;
bio_set_dev(bio, rdev->bdev);
if (test_bit(Faulty, &rdev->flags)) {
- bio_io_error(bio);
+ if (test_bit(Timeout, &rdev->flags))
+ bio->bi_status = BLK_STS_TIMEOUT;
+ else
+ bio->bi_status = BLK_STS_IOERR;
+ bio_endio(bio);
} else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) &&
!blk_queue_discard(bio->bi_bdev->bd_disk->queue)))
/* Just ignore it */
@@ -2079,6 +2091,9 @@ static void sync_request_write(struct md
} else if (test_bit(FailFast, &rdev->flags)) {
/* Just give up on this device */
md_error(rdev->mddev, rdev);
+ if (test_bit(Faulty, &rdev->flags) &&
+ r10_bio->devs[i].error == BLK_STS_TIMEOUT)
+ set_bit(Timeout, &rdev->flags);
continue;
}
/* Ok, we need to write this bio, either to correct an
@@ -2342,6 +2357,7 @@ static void fix_read_error(struct r10con
struct md_rdev *rdev;
int max_read_errors = atomic_read(&mddev->max_corr_read_errors);
int d = r10_bio->devs[r10_bio->read_slot].devnum;
+ int read_error = r10_bio->devs[r10_bio->read_slot].error;
/* still own a reference to this rdev, so it cannot
* have been cleared recently.
@@ -2365,6 +2381,9 @@ static void fix_read_error(struct r10con
pr_notice("md/raid10:%s: %s: Failing raid device\n",
mdname(mddev), b);
md_error(mddev, rdev);
+ if (test_bit(Faulty, &rdev->flags) &&
+ read_error == BLK_STS_TIMEOUT)
+ set_bit(Timeout, &rdev->flags);
r10_bio->devs[r10_bio->read_slot].bio = IO_BLOCKED;
return;
}
@@ -2606,9 +2625,12 @@ static void handle_read_error(struct mdd
freeze_array(conf, 1);
fix_read_error(conf, mddev, r10_bio);
unfreeze_array(conf);
- } else
+ } else {
md_error(mddev, rdev);
-
+ if (test_bit(Faulty, &rdev->flags) &&
+ r10_bio->devs[slot].error == BLK_STS_TIMEOUT)
+ set_bit(Timeout, &rdev->flags);
+ }
rdev_dec_pending(rdev, mddev);
allow_barrier(conf);
r10_bio->state = 0;
@@ -2643,8 +2665,13 @@ static void handle_write_completed(struc
if (!rdev_set_badblocks(
rdev,
r10_bio->devs[m].addr,
- r10_bio->sectors, 0))
+ r10_bio->sectors, 0)) {
md_error(conf->mddev, rdev);
+ if (test_bit(Faulty, &rdev->flags) &&
+ r10_bio->devs[m].error ==
+ BLK_STS_TIMEOUT)
+ set_bit(Timeout, &rdev->flags);
+ }
}
rdev = conf->mirrors[dev].replacement;
if (r10_bio->devs[m].repl_bio == NULL ||
@@ -2660,8 +2687,13 @@ static void handle_write_completed(struc
if (!rdev_set_badblocks(
rdev,
r10_bio->devs[m].addr,
- r10_bio->sectors, 0))
+ r10_bio->sectors, 0)) {
md_error(conf->mddev, rdev);
+ if (test_bit(Faulty, &rdev->flags) &&
+ r10_bio->devs[m].error ==
+ BLK_STS_TIMEOUT)
+ set_bit(Timeout, &rdev->flags);
+ }
}
}
put_buf(r10_bio);
@@ -4865,6 +4897,9 @@ static void end_reshape_write(struct bio
if (bio->bi_status) {
/* FIXME should record badblock */
md_error(mddev, rdev);
+ if (test_bit(Faulty, &rdev->flags) &&
+ bio->bi_status == BLK_STS_TIMEOUT)
+ set_bit(Timeout, &rdev->flags);
}
rdev_dec_pending(rdev, mddev);
--- a/drivers/md/raid10.h
+++ b/drivers/md/raid10.h
@@ -153,6 +153,7 @@ struct r10bio {
};
sector_t addr;
int devnum;
+ int error;
} devs[];
};
--- a/include/uapi/linux/raid/md_p.h
+++ b/include/uapi/linux/raid/md_p.h
@@ -89,6 +89,7 @@
* devices available - and don't try to
* correct read errors.
*/
+#define MD_DISK_TIMEOUT 11 /* disk is faulty due to timeout */
#define MD_DISK_WRITEMOSTLY 9 /* disk is "write-mostly" is RAID1 config.
* read requests will only be sent here in
@@ -99,6 +100,7 @@
#define MD_DISK_ROLE_SPARE 0xffff
#define MD_DISK_ROLE_FAULTY 0xfffe
#define MD_DISK_ROLE_JOURNAL 0xfffd
+#define MD_DISK_ROLE_TIMEOUT 0xfff0 /* SUSE-only timed-out */
#define MD_DISK_ROLE_MAX 0xff00 /* max value of regular disk role */
typedef struct mdp_device_descriptor_s {