Blob Blame History Raw
From c339ef610d2a10b68936b26ef83825c11d7c9518 Mon Sep 17 00:00:00 2001
From: Guoqing Jiang <gqjiang@suse.com>
Date: Mon, 6 Nov 2017 11:48:30 +0800
Subject: [PATCH] md-cluster: Suspend writes in RAID10 if within range
Patch-mainline: Queued in subsystem maintainer repository
Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git
Git-commit: cb8a7a7e1098e74d36378b992a6d012668ec10d9
References: fate#323171

Commit 6eef4b21ffc9 ("md: add honouring of suspend_{lo,hi}
to raid1.") added the support for raid1. This commit does
the same thing for raid10. This will allow us to stop writeout
to portions of the array while they are resynced by someone
else - e.g. another node in a cluster.

Also if there is a resync going on, all nodes must suspend
writes to the range. This is recorded in suspend_info and
suspend_list. If there is an I/O within the ranges of any
of the suspend_info, area_resyncing will return 1.

[Guoqing] This commit is different with mainline because
suspend_hi/lo had been moved into md core code, and also
quiesce(.., 2) is removed.
Signed-off-by: Guoqing Jiang <gqjiang@suse.com>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/md/raid10.c | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index cc9e16c848ae..c3ba4cfc6068 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -25,6 +25,7 @@
 #include <linux/seq_file.h>
 #include <linux/ratelimit.h>
 #include <linux/kthread.h>
+#include <linux/sched/signal.h>
 #include <trace/events/block.h>
 #include "md.h"
 #include "raid10.h"
@@ -1299,6 +1300,36 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
 	sector_t sectors;
 	int max_sectors;
 
+	if ((bio_end_sector(bio) > mddev->suspend_lo &&
+	     bio->bi_iter.bi_sector < mddev->suspend_hi) ||
+	    (mddev_is_clustered(mddev) &&
+	     md_cluster_ops->area_resyncing(mddev, WRITE,
+					    bio->bi_iter.bi_sector,
+					    bio_end_sector(bio)))) {
+		/*
+		 * As the suspend_* range is controlled by
+		 * userspace, we want an interruptible wait.
+		 */
+		DEFINE_WAIT(w);
+		for (;;) {
+			sigset_t full, old;
+
+			prepare_to_wait(&conf->wait_barrier, &w,
+					TASK_INTERRUPTIBLE);
+			if ((bio_end_sector(bio) <= mddev->suspend_lo ||
+			     bio->bi_iter.bi_sector >= mddev->suspend_hi) &&
+			    (!mddev_is_clustered(mddev) ||
+			     !md_cluster_ops->area_resyncing(mddev, WRITE,
+				 bio->bi_iter.bi_sector, bio_end_sector(bio))))
+				break;
+			sigfillset(&full);
+			sigprocmask(SIG_BLOCK, &full, &old);
+			schedule();
+			sigprocmask(SIG_SETMASK, &old, NULL);
+		}
+		finish_wait(&conf->wait_barrier, &w);
+	}
+
 	/*
 	 * Register the new request and wait if the reconstruction
 	 * thread has put up a bar for new requests.
@@ -3953,6 +3984,9 @@ static void raid10_quiesce(struct mddev *mddev, int state)
 	struct r10conf *conf = mddev->private;
 
 	switch(state) {
+	case 2: /* wake for suspend */
+		wake_up(&conf->wait_barrier);
+		break;
 	case 1:
 		raise_barrier(conf, 0);
 		break;
-- 
2.10.0