|
Luis Henriques |
cdd8e7 |
From: "Yan, Zheng" <zyan@redhat.com>
|
|
Luis Henriques |
cdd8e7 |
Date: Fri, 1 Sep 2017 16:53:58 +0800
|
|
Luis Henriques |
cdd8e7 |
Subject: ceph: ignore wbc->range_{start,end} when write back snapshot data
|
|
Luis Henriques |
cdd8e7 |
Git-commit: 2a2d927e35dd8dc4faf8fbc211533cf5f8840f5b
|
|
Luis Henriques |
cdd8e7 |
Patch-mainline: v4.14-rc1
|
|
Luis Henriques |
cdd8e7 |
References: FATE#324714
|
|
Luis Henriques |
cdd8e7 |
|
|
Luis Henriques |
cdd8e7 |
writepages() needs to write dirty pages to OSD in strict order of
|
|
Luis Henriques |
cdd8e7 |
snapshot context. It must first write dirty pages associated with
|
|
Luis Henriques |
cdd8e7 |
the oldest snapshot context. In the write range case, dirty pages
|
|
Luis Henriques |
cdd8e7 |
in the specified range can be associated with newer snapc. They
|
|
Luis Henriques |
cdd8e7 |
are not writeable until we write all dirty pages associated with
|
|
Luis Henriques |
cdd8e7 |
the oldest snapc.
|
|
Luis Henriques |
cdd8e7 |
|
|
Luis Henriques |
cdd8e7 |
Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
|
|
Luis Henriques |
cdd8e7 |
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
|
|
Luis Henriques |
cdd8e7 |
Acked-by: Luis Henriques <lhenriques@suse.com>
|
|
Luis Henriques |
cdd8e7 |
---
|
|
Luis Henriques |
cdd8e7 |
fs/ceph/addr.c | 80 ++++++++++++++++++++++++++++++++-------------------------
|
|
Luis Henriques |
cdd8e7 |
1 file changed, 46 insertions(+), 34 deletions(-)
|
|
Luis Henriques |
cdd8e7 |
|
|
Luis Henriques |
cdd8e7 |
--- a/fs/ceph/addr.c
|
|
Luis Henriques |
cdd8e7 |
+++ b/fs/ceph/addr.c
|
|
Luis Henriques |
cdd8e7 |
@@ -469,6 +469,7 @@ struct ceph_writeback_ctl
|
|
Luis Henriques |
cdd8e7 |
u64 truncate_size;
|
|
Luis Henriques |
cdd8e7 |
u32 truncate_seq;
|
|
Luis Henriques |
cdd8e7 |
bool size_stable;
|
|
Luis Henriques |
cdd8e7 |
+ bool head_snapc;
|
|
Luis Henriques |
cdd8e7 |
};
|
|
Luis Henriques |
cdd8e7 |
|
|
Luis Henriques |
cdd8e7 |
/*
|
|
Luis Henriques |
cdd8e7 |
@@ -504,6 +505,7 @@ get_oldest_context(struct inode *inode,
|
|
Luis Henriques |
cdd8e7 |
}
|
|
Luis Henriques |
cdd8e7 |
ctl->truncate_size = capsnap->truncate_size;
|
|
Luis Henriques |
cdd8e7 |
ctl->truncate_seq = capsnap->truncate_seq;
|
|
Luis Henriques |
cdd8e7 |
+ ctl->head_snapc = false;
|
|
Luis Henriques |
cdd8e7 |
}
|
|
Luis Henriques |
cdd8e7 |
|
|
Luis Henriques |
cdd8e7 |
if (snapc)
|
|
Luis Henriques |
cdd8e7 |
@@ -524,6 +526,7 @@ get_oldest_context(struct inode *inode,
|
|
Luis Henriques |
cdd8e7 |
ctl->truncate_size = ci->i_truncate_size;
|
|
Luis Henriques |
cdd8e7 |
ctl->truncate_seq = ci->i_truncate_seq;
|
|
Luis Henriques |
cdd8e7 |
ctl->size_stable = false;
|
|
Luis Henriques |
cdd8e7 |
+ ctl->head_snapc = true;
|
|
Luis Henriques |
cdd8e7 |
}
|
|
Luis Henriques |
cdd8e7 |
}
|
|
Luis Henriques |
cdd8e7 |
spin_unlock(&ci->i_ceph_lock);
|
|
Luis Henriques |
cdd8e7 |
@@ -781,7 +784,7 @@ static int ceph_writepages_start(struct
|
|
Luis Henriques |
cdd8e7 |
struct ceph_inode_info *ci = ceph_inode(inode);
|
|
Luis Henriques |
cdd8e7 |
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
|
|
Luis Henriques |
cdd8e7 |
struct ceph_vino vino = ceph_vino(inode);
|
|
Luis Henriques |
cdd8e7 |
- pgoff_t index, start_index, end;
|
|
Luis Henriques |
cdd8e7 |
+ pgoff_t index, start_index, end = -1;
|
|
Luis Henriques |
cdd8e7 |
struct ceph_snap_context *snapc = NULL, *last_snapc = NULL, *pgsnapc;
|
|
Luis Henriques |
cdd8e7 |
struct pagevec pvec;
|
|
Luis Henriques |
cdd8e7 |
int rc = 0;
|
|
Luis Henriques |
cdd8e7 |
@@ -810,25 +813,10 @@ static int ceph_writepages_start(struct
|
|
Jeff Mahoney |
1fd8e1 |
pagevec_init(&pvec, 0);
|
|
Luis Henriques |
cdd8e7 |
|
|
Luis Henriques |
cdd8e7 |
start_index = wbc->range_cyclic ? mapping->writeback_index : 0;
|
|
Luis Henriques |
cdd8e7 |
-
|
|
Luis Henriques |
cdd8e7 |
- /* where to start/end? */
|
|
Luis Henriques |
cdd8e7 |
- if (wbc->range_cyclic) {
|
|
Luis Henriques |
cdd8e7 |
- index = start_index;
|
|
Luis Henriques |
cdd8e7 |
- end = -1;
|
|
Luis Henriques |
cdd8e7 |
- should_loop = (index > 0);
|
|
Luis Henriques |
cdd8e7 |
- dout(" cyclic, start at %lu\n", index);
|
|
Luis Henriques |
cdd8e7 |
- } else {
|
|
Luis Henriques |
cdd8e7 |
- index = wbc->range_start >> PAGE_SHIFT;
|
|
Luis Henriques |
cdd8e7 |
- end = wbc->range_end >> PAGE_SHIFT;
|
|
Luis Henriques |
cdd8e7 |
- if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
|
|
Luis Henriques |
cdd8e7 |
- range_whole = true;
|
|
Luis Henriques |
cdd8e7 |
- should_loop = false;
|
|
Luis Henriques |
cdd8e7 |
- dout(" not cyclic, %lu to %lu\n", index, end);
|
|
Luis Henriques |
cdd8e7 |
- }
|
|
Luis Henriques |
cdd8e7 |
+ index = start_index;
|
|
Luis Henriques |
cdd8e7 |
|
|
Luis Henriques |
cdd8e7 |
retry:
|
|
Luis Henriques |
cdd8e7 |
/* find oldest snap context with dirty data */
|
|
Luis Henriques |
cdd8e7 |
- ceph_put_snap_context(snapc);
|
|
Luis Henriques |
cdd8e7 |
snapc = get_oldest_context(inode, &ceph_wbc, NULL);
|
|
Luis Henriques |
cdd8e7 |
if (!snapc) {
|
|
Luis Henriques |
cdd8e7 |
/* hmm, why does writepages get called when there
|
|
Luis Henriques |
cdd8e7 |
@@ -839,13 +827,33 @@ retry:
|
|
Luis Henriques |
cdd8e7 |
dout(" oldest snapc is %p seq %lld (%d snaps)\n",
|
|
Luis Henriques |
cdd8e7 |
snapc, snapc->seq, snapc->num_snaps);
|
|
Luis Henriques |
cdd8e7 |
|
|
Luis Henriques |
cdd8e7 |
- if (last_snapc && snapc != last_snapc) {
|
|
Luis Henriques |
cdd8e7 |
- /* if we switched to a newer snapc, restart our scan at the
|
|
Luis Henriques |
cdd8e7 |
- * start of the original file range. */
|
|
Luis Henriques |
cdd8e7 |
- dout(" snapc differs from last pass, restarting at %lu\n",
|
|
Luis Henriques |
cdd8e7 |
- index);
|
|
Luis Henriques |
cdd8e7 |
- index = start;
|
|
Luis Henriques |
cdd8e7 |
+ should_loop = false;
|
|
Luis Henriques |
cdd8e7 |
+ if (ceph_wbc.head_snapc && snapc != last_snapc) {
|
|
Luis Henriques |
cdd8e7 |
+ /* where to start/end? */
|
|
Luis Henriques |
cdd8e7 |
+ if (wbc->range_cyclic) {
|
|
Luis Henriques |
cdd8e7 |
+ index = start_index;
|
|
Luis Henriques |
cdd8e7 |
+ end = -1;
|
|
Luis Henriques |
cdd8e7 |
+ if (index > 0)
|
|
Luis Henriques |
cdd8e7 |
+ should_loop = true;
|
|
Luis Henriques |
cdd8e7 |
+ dout(" cyclic, start at %lu\n", index);
|
|
Luis Henriques |
cdd8e7 |
+ } else {
|
|
Luis Henriques |
cdd8e7 |
+ index = wbc->range_start >> PAGE_SHIFT;
|
|
Luis Henriques |
cdd8e7 |
+ end = wbc->range_end >> PAGE_SHIFT;
|
|
Luis Henriques |
cdd8e7 |
+ if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
|
|
Luis Henriques |
cdd8e7 |
+ range_whole = true;
|
|
Luis Henriques |
cdd8e7 |
+ dout(" not cyclic, %lu to %lu\n", index, end);
|
|
Luis Henriques |
cdd8e7 |
+ }
|
|
Luis Henriques |
cdd8e7 |
+ } else if (!ceph_wbc.head_snapc) {
|
|
Luis Henriques |
cdd8e7 |
+ /* Do not respect wbc->range_{start,end}. Dirty pages
|
|
Luis Henriques |
cdd8e7 |
+ * in that range can be associated with newer snapc.
|
|
Luis Henriques |
cdd8e7 |
+ * They are not writeable until we write all dirty pages
|
|
Luis Henriques |
cdd8e7 |
+ * associated with 'snapc' get written */
|
|
Luis Henriques |
cdd8e7 |
+ if (index > 0 || wbc->sync_mode != WB_SYNC_NONE)
|
|
Luis Henriques |
cdd8e7 |
+ should_loop = true;
|
|
Luis Henriques |
cdd8e7 |
+ dout(" non-head snapc, range whole\n");
|
|
Luis Henriques |
cdd8e7 |
}
|
|
Luis Henriques |
cdd8e7 |
+
|
|
Luis Henriques |
cdd8e7 |
+ ceph_put_snap_context(last_snapc);
|
|
Luis Henriques |
cdd8e7 |
last_snapc = snapc;
|
|
Luis Henriques |
cdd8e7 |
|
|
Luis Henriques |
cdd8e7 |
stop = false;
|
|
Luis Henriques |
cdd8e7 |
@@ -891,7 +899,9 @@ get_more_pages:
|
|
Luis Henriques |
cdd8e7 |
dout("end of range %p\n", page);
|
|
Luis Henriques |
cdd8e7 |
/* can't be range_cyclic (1st pass) because
|
|
Luis Henriques |
cdd8e7 |
* end == -1 in that case. */
|
|
Luis Henriques |
cdd8e7 |
- stop = done = true;
|
|
Luis Henriques |
cdd8e7 |
+ stop = true;
|
|
Luis Henriques |
cdd8e7 |
+ if (ceph_wbc.head_snapc)
|
|
Luis Henriques |
cdd8e7 |
+ done = true;
|
|
Luis Henriques |
cdd8e7 |
unlock_page(page);
|
|
Luis Henriques |
cdd8e7 |
break;
|
|
Luis Henriques |
cdd8e7 |
}
|
|
Luis Henriques |
cdd8e7 |
@@ -1136,24 +1146,26 @@ new_request:
|
|
Luis Henriques |
cdd8e7 |
if (pages)
|
|
Luis Henriques |
cdd8e7 |
goto new_request;
|
|
Luis Henriques |
cdd8e7 |
|
|
Luis Henriques |
cdd8e7 |
- if (wbc->nr_to_write <= 0)
|
|
Luis Henriques |
cdd8e7 |
- stop = done = true;
|
|
Luis Henriques |
cdd8e7 |
+ /*
|
|
Luis Henriques |
cdd8e7 |
+ * We stop writing back only if we are not doing
|
|
Luis Henriques |
cdd8e7 |
+ * integrity sync. In case of integrity sync we have to
|
|
Luis Henriques |
cdd8e7 |
+ * keep going until we have written all the pages
|
|
Luis Henriques |
cdd8e7 |
+ * we tagged for writeback prior to entering this loop.
|
|
Luis Henriques |
cdd8e7 |
+ */
|
|
Luis Henriques |
cdd8e7 |
+ if (wbc->nr_to_write <= 0 && wbc->sync_mode == WB_SYNC_NONE)
|
|
Luis Henriques |
cdd8e7 |
+ done = stop = true;
|
|
Luis Henriques |
cdd8e7 |
|
|
Luis Henriques |
cdd8e7 |
release_pvec_pages:
|
|
Luis Henriques |
cdd8e7 |
dout("pagevec_release on %d pages (%p)\n", (int)pvec.nr,
|
|
Luis Henriques |
cdd8e7 |
pvec.nr ? pvec.pages[0] : NULL);
|
|
Luis Henriques |
cdd8e7 |
pagevec_release(&pvec);
|
|
Luis Henriques |
cdd8e7 |
-
|
|
Luis Henriques |
cdd8e7 |
- if (locked_pages && !done)
|
|
Luis Henriques |
cdd8e7 |
- goto retry;
|
|
Luis Henriques |
cdd8e7 |
}
|
|
Luis Henriques |
cdd8e7 |
|
|
Luis Henriques |
cdd8e7 |
if (should_loop && !done) {
|
|
Luis Henriques |
cdd8e7 |
/* more to do; loop back to beginning of file */
|
|
Luis Henriques |
cdd8e7 |
dout("writepages looping back to beginning of file\n");
|
|
Luis Henriques |
cdd8e7 |
- should_loop = false;
|
|
Luis Henriques |
cdd8e7 |
- end = start_index - 1;
|
|
Luis Henriques |
cdd8e7 |
-
|
|
Luis Henriques |
cdd8e7 |
+ end = start_index - 1; /* OK even when start_index == 0 */
|
|
Luis Henriques |
cdd8e7 |
+ start_index = 0;
|
|
Luis Henriques |
cdd8e7 |
index = 0;
|
|
Luis Henriques |
cdd8e7 |
goto retry;
|
|
Luis Henriques |
cdd8e7 |
}
|
|
Luis Henriques |
cdd8e7 |
@@ -1163,8 +1175,8 @@ release_pvec_pages:
|
|
Luis Henriques |
cdd8e7 |
|
|
Luis Henriques |
cdd8e7 |
out:
|
|
Luis Henriques |
cdd8e7 |
ceph_osdc_put_request(req);
|
|
Luis Henriques |
cdd8e7 |
- ceph_put_snap_context(snapc);
|
|
Luis Henriques |
cdd8e7 |
- dout("writepages done, rc = %d\n", rc);
|
|
Luis Henriques |
cdd8e7 |
+ ceph_put_snap_context(last_snapc);
|
|
Luis Henriques |
cdd8e7 |
+ dout("writepages dend - startone, rc = %d\n", rc);
|
|
Luis Henriques |
cdd8e7 |
return rc;
|
|
Luis Henriques |
cdd8e7 |
}
|
|
Luis Henriques |
cdd8e7 |
|