Blob Blame History Raw
From: Hannes Reinecke <hare@suse.com>
Date: Thu, 12 Jul 2012 08:36:52 +0200
Subject: md: display timeout errors in /etc/mdstat etc
Patch-mainline: Not yet, failfast is poorly defined
References: bnc#763402

Track whether a device failed due to a timeout or some other reason.
If due to a timeout, set a flag so that it can be reported.

Acked-by: NeilBrown <neilb@suse.de>
Signed-off-by: Neil Brown <neilb@suse.de>
---
 drivers/md/md.c                |   44 +++++++++++++++++++++++++++++++-------
 drivers/md/md.h                |    3 ++
 drivers/md/raid10.c            |   47 +++++++++++++++++++++++++++++++++++------
 drivers/md/raid10.h            |    1 
 include/uapi/linux/raid/md_p.h |    2 +
 5 files changed, 83 insertions(+), 14 deletions(-)

--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -865,8 +865,10 @@ static void super_written(struct bio *bi
 	if (bio->bi_status) {
 		pr_err("md: super_written gets error=%d\n", bio->bi_status);
 		md_error(mddev, rdev);
-		if (!test_bit(Faulty, &rdev->flags)
-		    && (bio->bi_opf & MD_FAILFAST)) {
+		if (!test_bit(Faulty, &rdev->flags)) {
+			if (bio->bi_status == BLK_STS_TIMEOUT)
+				set_bit(Timeout, &rdev->flags);
+		} else if (bio->bi_opf & MD_FAILFAST) {
 			set_bit(MD_SB_NEED_REWRITE, &mddev->sb_flags);
 			set_bit(LastDev, &rdev->flags);
 		}
@@ -1231,6 +1233,7 @@ static int super_90_validate(struct mdde
 
 	rdev->raid_disk = -1;
 	clear_bit(Faulty, &rdev->flags);
+	clear_bit(Timeout, &rdev->flags);
 	clear_bit(In_sync, &rdev->flags);
 	clear_bit(Bitmap_sync, &rdev->flags);
 	clear_bit(WriteMostly, &rdev->flags);
@@ -1746,6 +1749,7 @@ static int super_1_validate(struct mddev
 
 	rdev->raid_disk = -1;
 	clear_bit(Faulty, &rdev->flags);
+	clear_bit(Timeout, &rdev->flags);
 	clear_bit(In_sync, &rdev->flags);
 	clear_bit(Bitmap_sync, &rdev->flags);
 	clear_bit(WriteMostly, &rdev->flags);
@@ -1872,6 +1876,9 @@ static int super_1_validate(struct mddev
 			break;
 		case MD_DISK_ROLE_JOURNAL: /* journal device */
 			if (!(le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL)) {
+				/* probably legacy 'timed-out' device */
+				if (mddev->level == 10 || mddev->level == 1)
+					goto timeout;
 				/* journal device without journal feature */
 				pr_warn("md: journal device provided without journal feature, ignoring the device\n");
 				return -EINVAL;
@@ -1880,6 +1887,11 @@ static int super_1_validate(struct mddev
 			rdev->journal_tail = le64_to_cpu(sb->journal_tail);
 			rdev->raid_disk = 0;
 			break;
+		case MD_DISK_ROLE_TIMEOUT: /* faulty, timeout */
+		timeout:
+			set_bit(Faulty, &rdev->flags);
+			set_bit(Timeout, &rdev->flags);
+			break;
 		default:
 			rdev->saved_raid_disk = role;
 			if ((le32_to_cpu(sb->feature_map) &
@@ -2065,9 +2077,12 @@ retry:
 
 	rdev_for_each(rdev2, mddev) {
 		i = rdev2->desc_nr;
-		if (test_bit(Faulty, &rdev2->flags))
-			sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_FAULTY);
-		else if (test_bit(In_sync, &rdev2->flags))
+		if (test_bit(Faulty, &rdev2->flags)) {
+			if (test_bit(Timeout, &rdev2->flags))
+				sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_TIMEOUT);
+			else
+				sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_FAULTY);
+		} else if (test_bit(In_sync, &rdev2->flags))
 			sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
 		else if (test_bit(Journal, &rdev2->flags))
 			sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_JOURNAL);
@@ -2824,6 +2839,8 @@ state_show(struct md_rdev *rdev, char *p
 	    (!test_bit(ExternalBbl, &flags) &&
 	    rdev->badblocks.unacked_exist))
 		len += sprintf(page+len, "faulty%s", sep);
+	if (test_bit(Timeout, &flags))
+		len += sprintf(page+len, "timeout%s", sep);
 	if (test_bit(In_sync, &flags))
 		len += sprintf(page+len, "in_sync%s", sep);
 	if (test_bit(Journal, &flags))
@@ -2879,6 +2896,11 @@ state_store(struct md_rdev *rdev, const
 			err = 0;
 		else
 			err = -EBUSY;
+	} else if (cmd_match(buf, "timeout") && rdev->mddev->pers) {
+		md_error(rdev->mddev, rdev);
+		if (test_bit(Faulty, &rdev->flags))
+			set_bit(Timeout, &rdev->flags);
+		err = 0;
 	} else if (cmd_match(buf, "remove")) {
 		if (rdev->mddev->pers) {
 			clear_bit(Blocked, &rdev->flags);
@@ -3130,6 +3152,7 @@ slot_store(struct md_rdev *rdev, const c
 		rdev->raid_disk = slot;
 		/* assume it is working */
 		clear_bit(Faulty, &rdev->flags);
+		clear_bit(Timeout, &rdev->flags);
 		clear_bit(WriteMostly, &rdev->flags);
 		set_bit(In_sync, &rdev->flags);
 		sysfs_notify_dirent_safe(rdev->sysfs_state);
@@ -6498,9 +6521,11 @@ static int get_disk_info(struct mddev *m
 		info.minor = MINOR(rdev->bdev->bd_dev);
 		info.raid_disk = rdev->raid_disk;
 		info.state = 0;
-		if (test_bit(Faulty, &rdev->flags))
+		if (test_bit(Faulty, &rdev->flags)) {
 			info.state |= (1<<MD_DISK_FAULTY);
-		else if (test_bit(In_sync, &rdev->flags)) {
+			if (test_bit(Timeout, &rdev->flags))
+				info.state |= (1<<MD_DISK_TIMEOUT);
+		} else if (test_bit(In_sync, &rdev->flags)) {
 			info.state |= (1<<MD_DISK_ACTIVE);
 			info.state |= (1<<MD_DISK_SYNC);
 		}
@@ -8076,7 +8101,10 @@ static int md_seq_show(struct seq_file *
 			if (test_bit(Journal, &rdev->flags))
 				seq_printf(seq, "(J)");
 			if (test_bit(Faulty, &rdev->flags)) {
-				seq_printf(seq, "(F)");
+				if (test_bit(Timeout, &rdev->flags))
+					seq_printf(seq, "(T)");
+				else
+					seq_printf(seq, "(F)");
 				continue;
 			}
 			if (rdev->raid_disk < 0)
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -213,6 +213,9 @@ enum flag_bits {
 				 * check if there is collision between raid1
 				 * serial bios.
 				 */
+	Timeout,		/* Device fault due to timeout.
+				 * 'Faulty' is required to be set.
+				 */
 };
 
 static inline int is_badblock(struct md_rdev *rdev, sector_t s, int sectors,
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -395,6 +395,7 @@ static void raid10_end_read_request(stru
 
 	slot = r10_bio->read_slot;
 	rdev = r10_bio->devs[slot].rdev;
+	r10_bio->devs[slot].error = bio->bi_status;
 	/*
 	 * this branch is our 'one mirror IO has finished' event handler:
 	 */
@@ -485,6 +486,7 @@ static void raid10_end_write_request(str
 		repl = 0;
 		rdev = conf->mirrors[dev].rdev;
 	}
+	r10_bio->devs[slot].error = bio->bi_status;
 	/*
 	 * this branch is our 'one mirror IO has finished' event handler:
 	 */
@@ -515,6 +517,8 @@ static void raid10_end_write_request(str
 			if (!test_bit(Faulty, &rdev->flags))
 				set_bit(R10BIO_WriteError, &r10_bio->state);
 			else {
+				if (bio->bi_status == BLK_STS_TIMEOUT)
+					set_bit(Timeout, &rdev->flags);
 				r10_bio->devs[slot].bio = NULL;
 				to_put = bio;
 				dec_rdev = 1;
@@ -950,7 +954,11 @@ static void flush_pending_writes(struct
 			bio->bi_next = NULL;
 			bio_set_dev(bio, rdev->bdev);
 			if (test_bit(Faulty, &rdev->flags)) {
-				bio_io_error(bio);
+				if (test_bit(Timeout, &rdev->flags))
+					bio->bi_status = BLK_STS_TIMEOUT;
+				else
+					bio->bi_status = BLK_STS_IOERR;
+				bio_endio(bio);
 			} else if (unlikely((bio_op(bio) ==  REQ_OP_DISCARD) &&
 					    !blk_queue_discard(bio->bi_disk->queue)))
 				/* Just ignore it */
@@ -1135,7 +1143,11 @@ static void raid10_unplug(struct blk_plu
 		bio->bi_next = NULL;
 		bio_set_dev(bio, rdev->bdev);
 		if (test_bit(Faulty, &rdev->flags)) {
-			bio_io_error(bio);
+			if (test_bit(Timeout, &rdev->flags))
+				bio->bi_status = BLK_STS_TIMEOUT;
+			else
+				bio->bi_status = BLK_STS_IOERR;
+			bio_endio(bio);
 		} else if (unlikely((bio_op(bio) ==  REQ_OP_DISCARD) &&
 				    !blk_queue_discard(bio->bi_disk->queue)))
 			/* Just ignore it */
@@ -2112,6 +2124,9 @@ static void sync_request_write(struct md
 		} else if (test_bit(FailFast, &rdev->flags)) {
 			/* Just give up on this device */
 			md_error(rdev->mddev, rdev);
+			if (test_bit(Faulty, &rdev->flags) &&
+			    r10_bio->devs[i].error == BLK_STS_TIMEOUT)
+				set_bit(Timeout, &rdev->flags);
 			continue;
 		}
 		/* Ok, we need to write this bio, either to correct an
@@ -2375,6 +2390,7 @@ static void fix_read_error(struct r10con
 	struct md_rdev *rdev;
 	int max_read_errors = atomic_read(&mddev->max_corr_read_errors);
 	int d = r10_bio->devs[r10_bio->read_slot].devnum;
+	int read_error = r10_bio->devs[r10_bio->read_slot].error;
 
 	/* still own a reference to this rdev, so it cannot
 	 * have been cleared recently.
@@ -2398,6 +2414,9 @@ static void fix_read_error(struct r10con
 		pr_notice("md/raid10:%s: %s: Failing raid device\n",
 			  mdname(mddev), b);
 		md_error(mddev, rdev);
+		if (test_bit(Faulty, &rdev->flags) &&
+		    read_error == BLK_STS_TIMEOUT)
+			set_bit(Timeout, &rdev->flags);
 		r10_bio->devs[r10_bio->read_slot].bio = IO_BLOCKED;
 		return;
 	}
@@ -2639,9 +2658,12 @@ static void handle_read_error(struct mdd
 		freeze_array(conf, 1);
 		fix_read_error(conf, mddev, r10_bio);
 		unfreeze_array(conf);
-	} else
+	} else {
 		md_error(mddev, rdev);
-
+		if (test_bit(Faulty, &rdev->flags) &&
+		    r10_bio->devs[slot].error == BLK_STS_TIMEOUT)
+			set_bit(Timeout, &rdev->flags);
+	}
 	rdev_dec_pending(rdev, mddev);
 	allow_barrier(conf);
 	r10_bio->state = 0;
@@ -2676,8 +2698,13 @@ static void handle_write_completed(struc
 				if (!rdev_set_badblocks(
 					    rdev,
 					    r10_bio->devs[m].addr,
-					    r10_bio->sectors, 0))
+					    r10_bio->sectors, 0)) {
 					md_error(conf->mddev, rdev);
+					if (test_bit(Faulty, &rdev->flags) &&
+					    r10_bio->devs[m].error ==
+					     BLK_STS_TIMEOUT)
+						set_bit(Timeout, &rdev->flags);
+				}
 			}
 			rdev = conf->mirrors[dev].replacement;
 			if (r10_bio->devs[m].repl_bio == NULL ||
@@ -2693,8 +2720,13 @@ static void handle_write_completed(struc
 				if (!rdev_set_badblocks(
 					    rdev,
 					    r10_bio->devs[m].addr,
-					    r10_bio->sectors, 0))
+					    r10_bio->sectors, 0)) {
 					md_error(conf->mddev, rdev);
+					if (test_bit(Faulty, &rdev->flags) &&
+					    r10_bio->devs[m].error ==
+					     BLK_STS_TIMEOUT)
+						set_bit(Timeout, &rdev->flags);
+				}
 			}
 		}
 		put_buf(r10_bio);
@@ -4900,6 +4932,9 @@ static void end_reshape_write(struct bio
 	if (bio->bi_status) {
 		/* FIXME should record badblock */
 		md_error(mddev, rdev);
+		if (test_bit(Faulty, &rdev->flags) &&
+		    bio->bi_status == BLK_STS_TIMEOUT)
+			set_bit(Timeout, &rdev->flags);
 	}
 
 	rdev_dec_pending(rdev, mddev);
--- a/drivers/md/raid10.h
+++ b/drivers/md/raid10.h
@@ -152,6 +152,7 @@ struct r10bio {
 		};
 		sector_t	addr;
 		int		devnum;
+		int		error;
 	} devs[0];
 };
 
--- a/include/uapi/linux/raid/md_p.h
+++ b/include/uapi/linux/raid/md_p.h
@@ -88,6 +88,7 @@
 				    * devices available - and don't try to
 				    * correct read errors.
 				    */
+#define	MD_DISK_TIMEOUT		11 /* disk is faulty due to timeout */
 
 #define	MD_DISK_WRITEMOSTLY	9 /* disk is "write-mostly" is RAID1 config.
 				   * read requests will only be sent here in
@@ -98,6 +99,7 @@
 #define MD_DISK_ROLE_SPARE	0xffff
 #define MD_DISK_ROLE_FAULTY	0xfffe
 #define MD_DISK_ROLE_JOURNAL	0xfffd
+#define MD_DISK_ROLE_TIMEOUT	0xfff0 /* SUSE-only timed-out */
 #define MD_DISK_ROLE_MAX	0xff00 /* max value of regular disk role */
 
 typedef struct mdp_device_descriptor_s {