Blob Blame History Raw
From: "Yan, Zheng" <zyan@redhat.com>
Date: Sun, 4 Mar 2018 16:36:01 +0800
Subject: ceph: invalidate pages that beyond EOF in ceph_writepages_start()
Git-commit: af9cc401ce7452f9d965ba4553d8ffe7f0ed42ee
Patch-mainline: v4.17-rc1
References: FATE#324714

Dirty pages can be associated with different capsnap. Different capsnap
may have different EOF value. So invalidating dirty pages according to
the largest EOF value is wrong. Dirty pages beyond EOF, but associated
with other capsnap, do not get invalidated.

Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
[luis: since we don't have 0ed75fc8d288 ("ceph: use pagevec_lookup_range_tag()")
 in SLE15SP1, the 'stop' flag is still required in ceph_writepages_start() ]
Acked-by: Luis Henriques <lhenriques@suse.com>
---
 fs/ceph/addr.c  |   33 +++++++++++++++++----------------
 fs/ceph/inode.c |   11 -----------
 2 files changed, 17 insertions(+), 27 deletions(-)

--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -864,7 +864,7 @@ retry:
 	last_snapc = snapc;
 
 	stop = false;
-	while (!stop && index <= end) {
+	while (!stop && !done && index <= end) {
 		int num_ops = 0, op_idx;
 		unsigned i, pvec_pages, max_pages, locked_pages = 0;
 		struct page **pages = NULL, **data_pages;
@@ -912,16 +912,26 @@ get_more_pages:
 				unlock_page(page);
 				break;
 			}
-			if (strip_unit_end && (page->index > strip_unit_end)) {
-				dout("end of strip unit %p\n", page);
+			/* only if matching snap context */
+			pgsnapc = page_snap_context(page);
+			if (pgsnapc != snapc) {
+				dout("page snapc %p %lld != oldest %p %lld\n",
+				     pgsnapc, pgsnapc->seq, snapc, snapc->seq);
 				unlock_page(page);
-				break;
+				continue;
 			}
 			if (page_offset(page) >= ceph_wbc.i_size) {
 				dout("%p page eof %llu\n",
 				     page, ceph_wbc.i_size);
-				/* not done if range_cyclic */
-				stop = true;
+				if (ceph_wbc.size_stable ||
+				    page_offset(page) >= i_size_read(inode))
+					mapping->a_ops->invalidatepage(page,
+								0, PAGE_SIZE);
+				unlock_page(page);
+				continue;
+			}
+			if (strip_unit_end && (page->index > strip_unit_end)) {
+				dout("end of strip unit %p\n", page);
 				unlock_page(page);
 				break;
 			}
@@ -935,15 +945,6 @@ get_more_pages:
 				wait_on_page_writeback(page);
 			}
 
-			/* only if matching snap context */
-			pgsnapc = page_snap_context(page);
-			if (pgsnapc != snapc) {
-				dout("page snapc %p %lld != oldest %p %lld\n",
-				     pgsnapc, pgsnapc->seq, snapc, snapc->seq);
-				unlock_page(page);
-				continue;
-			}
-
 			if (!clear_page_dirty_for_io(page)) {
 				dout("%p !clear_page_dirty_for_io\n", page);
 				unlock_page(page);
@@ -1156,7 +1157,7 @@ new_request:
 		 * we tagged for writeback prior to entering this loop.
 		 */
 		if (wbc->nr_to_write <= 0 && wbc->sync_mode == WB_SYNC_NONE)
-			done = stop = true;
+			done = true;
 
 release_pvec_pages:
 		dout("pagevec_release on %d pages (%p)\n", (int)pvec.nr,
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -1866,20 +1866,9 @@ retry:
 	 * possibly truncate them.. so write AND block!
 	 */
 	if (ci->i_wrbuffer_ref_head < ci->i_wrbuffer_ref) {
-		struct ceph_cap_snap *capsnap;
-		to = ci->i_truncate_size;
-		list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) {
-			// MDS should have revoked Frw caps
-			WARN_ON_ONCE(capsnap->writing);
-			if (capsnap->dirty_pages && capsnap->size > to)
-				to = capsnap->size;
-		}
 		spin_unlock(&ci->i_ceph_lock);
 		dout("__do_pending_vmtruncate %p flushing snaps first\n",
 		     inode);
-
-		truncate_pagecache(inode, to);
-
 		filemap_write_and_wait_range(&inode->i_data, 0,
 					     inode->i_sb->s_maxbytes);
 		goto retry;