From: Jeff Layton <jlayton@kernel.org>
Date: Mon, 12 Oct 2020 09:39:06 -0400
Subject: ceph: check session state after bumping session->s_seq
Git-commit: 62575e270f661aba64778cbc5f354511cf9abb21
Patch-mainline: v5.10-rc3
References: bsc#1179259
Some messages sent by the MDS entail a session sequence number
increment, and the MDS will drop certain types of requests on the floor
when the sequence numbers don't match.
In particular, a REQUEST_CLOSE message can cross with one of the
sequence morphing messages from the MDS which can cause the client to
stall, waiting for a response that will never come.
Originally, this meant an up to 5s delay before the recurring workqueue
job kicked in and resent the request, but a recent change made it so
that the client would never resend, causing a 60s stall unmounting and
sometimes a blockisting event.
Add a new helper for incrementing the session sequence and then testing
to see whether a REQUEST_CLOSE needs to be resent, and move the handling
of CEPH_MDS_SESSION_CLOSING into that function. Change all of the
bare sequence counter increments to use the new helper.
Reorganize check_session_state with a switch statement. It should no
longer be called when the session is CLOSING, so throw a warning if it
ever is (but still handle that case sanely).
[ idryomov: whitespace, pr_err() call fixup ]
URL: https://tracker.ceph.com/issues/47563
Fixes: fa9967734227 ("ceph: fix potential mdsc use-after-free crash")
Reported-by: Patrick Donnelly <pdonnell@redhat.com>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Xiubo Li <xiubli@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
[Luis:
- replaced 'fallthrough' by comment in switch since we don't have 294f69e662d1
("compiler_attributes.h: Add 'fallthrough' pseudo keyword for switch/case use")
- also dropped references to CEPH_MDS_SESSION_CLOSED as we don't have
4d681c2f9141 ("ceph: keep the session state until it is released")]
Acked-by: Luis Henriques <lhenriques@suse.com>
---
fs/ceph/caps.c | 2 +-
fs/ceph/mds_client.c | 48 ++++++++++++++++++++++++++++++++++--------------
fs/ceph/mds_client.h | 1 +
fs/ceph/quota.c | 2 +-
fs/ceph/snap.c | 2 +-
5 files changed, 38 insertions(+), 17 deletions(-)
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -3901,7 +3901,7 @@ void ceph_handle_caps(struct ceph_mds_se
vino.snap, inode);
mutex_lock(&session->s_mutex);
- session->s_seq++;
+ inc_session_sequence(session);
dout(" mds%d seq %lld cap seq %u\n", session->s_mds, session->s_seq,
(unsigned)seq);
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -3651,7 +3651,7 @@ static void handle_lease(struct ceph_mds
dname.len, dname.name);
mutex_lock(&session->s_mutex);
- session->s_seq++;
+ inc_session_sequence(session);
if (!inode) {
dout("handle_lease no inode %llx\n", vino.ino);
@@ -3780,28 +3780,48 @@ static void lock_unlock_sessions(struct
bool check_session_state(struct ceph_mds_session *s)
{
- if (s->s_state == CEPH_MDS_SESSION_CLOSING) {
- dout("resending session close request for mds%d\n",
- s->s_mds);
- request_close_session(s);
- return false;
- }
- if (s->s_ttl && time_after(jiffies, s->s_ttl)) {
- if (s->s_state == CEPH_MDS_SESSION_OPEN) {
+ switch (s->s_state) {
+ case CEPH_MDS_SESSION_OPEN:
+ if (s->s_ttl && time_after(jiffies, s->s_ttl)) {
s->s_state = CEPH_MDS_SESSION_HUNG;
pr_info("mds%d hung\n", s->s_mds);
}
- }
- if (s->s_state == CEPH_MDS_SESSION_NEW ||
- s->s_state == CEPH_MDS_SESSION_RESTARTING ||
- s->s_state == CEPH_MDS_SESSION_REJECTED)
- /* this mds is failed or recovering, just wait */
+ break;
+ case CEPH_MDS_SESSION_CLOSING:
+ /* Should never reach this when we're unmounting */
+ WARN_ON_ONCE(true);
+ /* fallthrough */
+ case CEPH_MDS_SESSION_NEW:
+ case CEPH_MDS_SESSION_RESTARTING:
+ case CEPH_MDS_SESSION_REJECTED:
return false;
+ }
return true;
}
/*
+ * If the sequence is incremented while we're waiting on a REQUEST_CLOSE reply,
+ * then we need to retransmit that request.
+ */
+void inc_session_sequence(struct ceph_mds_session *s)
+{
+ lockdep_assert_held(&s->s_mutex);
+
+ s->s_seq++;
+
+ if (s->s_state == CEPH_MDS_SESSION_CLOSING) {
+ int ret;
+
+ dout("resending session close request for mds%d\n", s->s_mds);
+ ret = request_close_session(s);
+ if (ret < 0)
+ pr_err("unable to close session to mds%d: %d\n",
+ s->s_mds, ret);
+ }
+}
+
+/*
* delayed work -- periodically trim expired leases, renew caps with mds
*/
static void schedule_delayed(struct ceph_mds_client *mdsc)
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -434,6 +434,7 @@ struct ceph_mds_client {
extern const char *ceph_mds_op_name(int op);
extern bool check_session_state(struct ceph_mds_session *s);
+void inc_session_sequence(struct ceph_mds_session *s);
extern struct ceph_mds_session *
__ceph_lookup_mds_session(struct ceph_mds_client *, int mds);
--- a/fs/ceph/quota.c
+++ b/fs/ceph/quota.c
@@ -66,7 +66,7 @@ void ceph_handle_quota(struct ceph_mds_c
/* increment msg sequence number */
mutex_lock(&session->s_mutex);
- session->s_seq++;
+ inc_session_sequence(session);
mutex_unlock(&session->s_mutex);
/* lookup inode */
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -865,7 +865,7 @@ void ceph_handle_snap(struct ceph_mds_cl
ceph_snap_op_name(op), split, trace_len);
mutex_lock(&session->s_mutex);
- session->s_seq++;
+ inc_session_sequence(session);
mutex_unlock(&session->s_mutex);
down_write(&mdsc->snap_rwsem);