Luis Henriques cdd8e7
From: "Yan, Zheng" <zyan@redhat.com>
Luis Henriques cdd8e7
Date: Fri, 1 Sep 2017 16:53:58 +0800
Luis Henriques cdd8e7
Subject: ceph: ignore wbc->range_{start,end} when write back snapshot data
Luis Henriques cdd8e7
Git-commit: 2a2d927e35dd8dc4faf8fbc211533cf5f8840f5b
Luis Henriques cdd8e7
Patch-mainline: v4.14-rc1
Luis Henriques cdd8e7
References: FATE#324714
Luis Henriques cdd8e7
Luis Henriques cdd8e7
writepages() needs to write dirty pages to OSD in strict order of
Luis Henriques cdd8e7
snapshot context. It must first write dirty pages associated with
Luis Henriques cdd8e7
the oldest snapshot context. In the write range case, dirty pages
Luis Henriques cdd8e7
in the specified range can be associated with newer snapc. They
Luis Henriques cdd8e7
are not writeable until we write all dirty pages associated with
Luis Henriques cdd8e7
the oldest snapc.
Luis Henriques cdd8e7
Luis Henriques cdd8e7
Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
Luis Henriques cdd8e7
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Luis Henriques cdd8e7
Acked-by: Luis Henriques <lhenriques@suse.com>
Luis Henriques cdd8e7
---
Luis Henriques cdd8e7
 fs/ceph/addr.c |   80 ++++++++++++++++++++++++++++++++-------------------------
Luis Henriques cdd8e7
 1 file changed, 46 insertions(+), 34 deletions(-)
Luis Henriques cdd8e7
Luis Henriques cdd8e7
--- a/fs/ceph/addr.c
Luis Henriques cdd8e7
+++ b/fs/ceph/addr.c
Luis Henriques cdd8e7
@@ -469,6 +469,7 @@ struct ceph_writeback_ctl
Luis Henriques cdd8e7
 	u64 truncate_size;
Luis Henriques cdd8e7
 	u32 truncate_seq;
Luis Henriques cdd8e7
 	bool size_stable;
Luis Henriques cdd8e7
+	bool head_snapc;
Luis Henriques cdd8e7
 };
Luis Henriques cdd8e7
 
Luis Henriques cdd8e7
 /*
Luis Henriques cdd8e7
@@ -504,6 +505,7 @@ get_oldest_context(struct inode *inode,
Luis Henriques cdd8e7
 			}
Luis Henriques cdd8e7
 			ctl->truncate_size = capsnap->truncate_size;
Luis Henriques cdd8e7
 			ctl->truncate_seq = capsnap->truncate_seq;
Luis Henriques cdd8e7
+			ctl->head_snapc = false;
Luis Henriques cdd8e7
 		}
Luis Henriques cdd8e7
 
Luis Henriques cdd8e7
 		if (snapc)
Luis Henriques cdd8e7
@@ -524,6 +526,7 @@ get_oldest_context(struct inode *inode,
Luis Henriques cdd8e7
 			ctl->truncate_size = ci->i_truncate_size;
Luis Henriques cdd8e7
 			ctl->truncate_seq = ci->i_truncate_seq;
Luis Henriques cdd8e7
 			ctl->size_stable = false;
Luis Henriques cdd8e7
+			ctl->head_snapc = true;
Luis Henriques cdd8e7
 		}
Luis Henriques cdd8e7
 	}
Luis Henriques cdd8e7
 	spin_unlock(&ci->i_ceph_lock);
Luis Henriques cdd8e7
@@ -781,7 +784,7 @@ static int ceph_writepages_start(struct
Luis Henriques cdd8e7
 	struct ceph_inode_info *ci = ceph_inode(inode);
Luis Henriques cdd8e7
 	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
Luis Henriques cdd8e7
 	struct ceph_vino vino = ceph_vino(inode);
Luis Henriques cdd8e7
-	pgoff_t index, start_index, end;
Luis Henriques cdd8e7
+	pgoff_t index, start_index, end = -1;
Luis Henriques cdd8e7
 	struct ceph_snap_context *snapc = NULL, *last_snapc = NULL, *pgsnapc;
Luis Henriques cdd8e7
 	struct pagevec pvec;
Luis Henriques cdd8e7
 	int rc = 0;
Luis Henriques cdd8e7
@@ -810,25 +813,10 @@ static int ceph_writepages_start(struct
Jeff Mahoney 1fd8e1
 	pagevec_init(&pvec, 0);
Luis Henriques cdd8e7
 
Luis Henriques cdd8e7
 	start_index = wbc->range_cyclic ? mapping->writeback_index : 0;
Luis Henriques cdd8e7
-
Luis Henriques cdd8e7
-	/* where to start/end? */
Luis Henriques cdd8e7
-	if (wbc->range_cyclic) {
Luis Henriques cdd8e7
-		index = start_index;
Luis Henriques cdd8e7
-		end = -1;
Luis Henriques cdd8e7
-		should_loop = (index > 0);
Luis Henriques cdd8e7
-		dout(" cyclic, start at %lu\n", index);
Luis Henriques cdd8e7
-	} else {
Luis Henriques cdd8e7
-		index = wbc->range_start >> PAGE_SHIFT;
Luis Henriques cdd8e7
-		end = wbc->range_end >> PAGE_SHIFT;
Luis Henriques cdd8e7
-		if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
Luis Henriques cdd8e7
-			range_whole = true;
Luis Henriques cdd8e7
-		should_loop = false;
Luis Henriques cdd8e7
-		dout(" not cyclic, %lu to %lu\n", index, end);
Luis Henriques cdd8e7
-	}
Luis Henriques cdd8e7
+	index = start_index;
Luis Henriques cdd8e7
 
Luis Henriques cdd8e7
 retry:
Luis Henriques cdd8e7
 	/* find oldest snap context with dirty data */
Luis Henriques cdd8e7
-	ceph_put_snap_context(snapc);
Luis Henriques cdd8e7
 	snapc = get_oldest_context(inode, &ceph_wbc, NULL);
Luis Henriques cdd8e7
 	if (!snapc) {
Luis Henriques cdd8e7
 		/* hmm, why does writepages get called when there
Luis Henriques cdd8e7
@@ -839,13 +827,33 @@ retry:
Luis Henriques cdd8e7
 	dout(" oldest snapc is %p seq %lld (%d snaps)\n",
Luis Henriques cdd8e7
 	     snapc, snapc->seq, snapc->num_snaps);
Luis Henriques cdd8e7
 
Luis Henriques cdd8e7
-	if (last_snapc && snapc != last_snapc) {
Luis Henriques cdd8e7
-		/* if we switched to a newer snapc, restart our scan at the
Luis Henriques cdd8e7
-		 * start of the original file range. */
Luis Henriques cdd8e7
-		dout("  snapc differs from last pass, restarting at %lu\n",
Luis Henriques cdd8e7
-		     index);
Luis Henriques cdd8e7
-		index = start;
Luis Henriques cdd8e7
+	should_loop = false;
Luis Henriques cdd8e7
+	if (ceph_wbc.head_snapc && snapc != last_snapc) {
Luis Henriques cdd8e7
+		/* where to start/end? */
Luis Henriques cdd8e7
+		if (wbc->range_cyclic) {
Luis Henriques cdd8e7
+			index = start_index;
Luis Henriques cdd8e7
+			end = -1;
Luis Henriques cdd8e7
+			if (index > 0)
Luis Henriques cdd8e7
+				should_loop = true;
Luis Henriques cdd8e7
+			dout(" cyclic, start at %lu\n", index);
Luis Henriques cdd8e7
+		} else {
Luis Henriques cdd8e7
+			index = wbc->range_start >> PAGE_SHIFT;
Luis Henriques cdd8e7
+			end = wbc->range_end >> PAGE_SHIFT;
Luis Henriques cdd8e7
+			if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
Luis Henriques cdd8e7
+				range_whole = true;
Luis Henriques cdd8e7
+			dout(" not cyclic, %lu to %lu\n", index, end);
Luis Henriques cdd8e7
+		}
Luis Henriques cdd8e7
+	} else if (!ceph_wbc.head_snapc) {
Luis Henriques cdd8e7
+		/* Do not respect wbc->range_{start,end}. Dirty pages
Luis Henriques cdd8e7
+		 * in that range can be associated with newer snapc.
Luis Henriques cdd8e7
+		 * They are not writeable until we write all dirty pages
Luis Henriques cdd8e7
+		 * associated with 'snapc' get written */
Luis Henriques cdd8e7
+		if (index > 0 || wbc->sync_mode != WB_SYNC_NONE)
Luis Henriques cdd8e7
+			should_loop = true;
Luis Henriques cdd8e7
+		dout(" non-head snapc, range whole\n");
Luis Henriques cdd8e7
 	}
Luis Henriques cdd8e7
+
Luis Henriques cdd8e7
+	ceph_put_snap_context(last_snapc);
Luis Henriques cdd8e7
 	last_snapc = snapc;
Luis Henriques cdd8e7
 
Luis Henriques cdd8e7
 	stop = false;
Luis Henriques cdd8e7
@@ -891,7 +899,9 @@ get_more_pages:
Luis Henriques cdd8e7
 				dout("end of range %p\n", page);
Luis Henriques cdd8e7
 				/* can't be range_cyclic (1st pass) because
Luis Henriques cdd8e7
 				 * end == -1 in that case. */
Luis Henriques cdd8e7
-				stop = done = true;
Luis Henriques cdd8e7
+				stop = true;
Luis Henriques cdd8e7
+				if (ceph_wbc.head_snapc)
Luis Henriques cdd8e7
+					done = true;
Luis Henriques cdd8e7
 				unlock_page(page);
Luis Henriques cdd8e7
 				break;
Luis Henriques cdd8e7
 			}
Luis Henriques cdd8e7
@@ -1136,24 +1146,26 @@ new_request:
Luis Henriques cdd8e7
 		if (pages)
Luis Henriques cdd8e7
 			goto new_request;
Luis Henriques cdd8e7
 
Luis Henriques cdd8e7
-		if (wbc->nr_to_write <= 0)
Luis Henriques cdd8e7
-			stop = done = true;
Luis Henriques cdd8e7
+		/*
Luis Henriques cdd8e7
+		 * We stop writing back only if we are not doing
Luis Henriques cdd8e7
+		 * integrity sync. In case of integrity sync we have to
Luis Henriques cdd8e7
+		 * keep going until we have written all the pages
Luis Henriques cdd8e7
+		 * we tagged for writeback prior to entering this loop.
Luis Henriques cdd8e7
+		 */
Luis Henriques cdd8e7
+		if (wbc->nr_to_write <= 0 && wbc->sync_mode == WB_SYNC_NONE)
Luis Henriques cdd8e7
+			done = stop = true;
Luis Henriques cdd8e7
 
Luis Henriques cdd8e7
 release_pvec_pages:
Luis Henriques cdd8e7
 		dout("pagevec_release on %d pages (%p)\n", (int)pvec.nr,
Luis Henriques cdd8e7
 		     pvec.nr ? pvec.pages[0] : NULL);
Luis Henriques cdd8e7
 		pagevec_release(&pvec);
Luis Henriques cdd8e7
-
Luis Henriques cdd8e7
-		if (locked_pages && !done)
Luis Henriques cdd8e7
-			goto retry;
Luis Henriques cdd8e7
 	}
Luis Henriques cdd8e7
 
Luis Henriques cdd8e7
 	if (should_loop && !done) {
Luis Henriques cdd8e7
 		/* more to do; loop back to beginning of file */
Luis Henriques cdd8e7
 		dout("writepages looping back to beginning of file\n");
Luis Henriques cdd8e7
-		should_loop = false;
Luis Henriques cdd8e7
-		end = start_index - 1;
Luis Henriques cdd8e7
-
Luis Henriques cdd8e7
+		end = start_index - 1; /* OK even when start_index == 0 */
Luis Henriques cdd8e7
+		start_index = 0;
Luis Henriques cdd8e7
 		index = 0;
Luis Henriques cdd8e7
 		goto retry;
Luis Henriques cdd8e7
 	}
Luis Henriques cdd8e7
@@ -1163,8 +1175,8 @@ release_pvec_pages:
Luis Henriques cdd8e7
 
Luis Henriques cdd8e7
 out:
Luis Henriques cdd8e7
 	ceph_osdc_put_request(req);
Luis Henriques cdd8e7
-	ceph_put_snap_context(snapc);
Luis Henriques cdd8e7
-	dout("writepages done, rc = %d\n", rc);
Luis Henriques cdd8e7
+	ceph_put_snap_context(last_snapc);
Luis Henriques cdd8e7
+	dout("writepages dend - startone, rc = %d\n", rc);
Luis Henriques cdd8e7
 	return rc;
Luis Henriques cdd8e7
 }
Luis Henriques cdd8e7