Luís Henriques 8e1cd3
From: =?utf-8?q?Lu=C3=ADs_Henriques_=3Clhenriques=40suse=2Ede=3E?=
Luís Henriques 8e1cd3
Date: Fri, 3 Jun 2022 14:29:09 +0100
Luís Henriques 8e1cd3
Subject: ceph: prevent a client from exceeding the MDS maximum xattr size
Luís Henriques 8e1cd3
MIME-Version: 1.0
Luís Henriques 8e1cd3
Content-Type: text/plain; charset=UTF-8
Luís Henriques 8e1cd3
Content-Transfer-Encoding: 8bit
Luís Henriques 8e1cd3
Git-commit: d93231a6bc8a452323d5fef16cca7107ce483a27
Luís Henriques 8e1cd3
Patch-mainline: v6.0-rc1
Luís Henriques 8e1cd3
References: jsc#SES-1880
Luís Henriques 8e1cd3
Luís Henriques 8e1cd3
The MDS tries to enforce a limit on the total key/values in extended
Luís Henriques 8e1cd3
attributes.  However, this limit is enforced only if doing a synchronous
Luís Henriques 8e1cd3
operation (MDS_OP_SETXATTR) -- if we're buffering the xattrs, the MDS
Luís Henriques 8e1cd3
doesn't have a chance to enforce these limits.
Luís Henriques 8e1cd3
Luís Henriques 8e1cd3
This patch adds support for decoding the xattrs maximum size setting that is
Luís Henriques 8e1cd3
distributed in the mdsmap.  Then, when setting an xattr, the kernel client
Luís Henriques 8e1cd3
will revert to do a synchronous operation if that maximum size is exceeded.
Luís Henriques 8e1cd3
Luís Henriques 8e1cd3
While there, fix a dout() that would trigger a printk warning:
Luís Henriques 8e1cd3
Luís Henriques 8e1cd3
[   98.718078] ------------[ cut here ]------------
Luís Henriques 8e1cd3
[   98.719012] precision 65536 too large
Luís Henriques 8e1cd3
[   98.719039] WARNING: CPU: 1 PID: 3755 at lib/vsprintf.c:2703 vsnprintf+0x5e3/0x600
Luís Henriques 8e1cd3
...
Luís Henriques 8e1cd3
Luís Henriques 8e1cd3
Link: https://tracker.ceph.com/issues/55725
Luís Henriques 8e1cd3
Signed-off-by: Luís Henriques <lhenriques@suse.de>
Luís Henriques 8e1cd3
Reviewed-by: Xiubo Li <xiubli@redhat.com>
Luís Henriques 8e1cd3
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Luís Henriques 8e1cd3
Acked-by: Luis Henriques <lhenriques@suse.com>
Luís Henriques 8e1cd3
---
Luís Henriques 8e1cd3
 fs/ceph/mdsmap.c            | 22 ++++++++++++++++++----
Luís Henriques 8e1cd3
 fs/ceph/xattr.c             | 12 ++++++++----
Luís Henriques 8e1cd3
 include/linux/ceph/mdsmap.h |  1 +
Luís Henriques 8e1cd3
 3 files changed, 27 insertions(+), 8 deletions(-)
Luís Henriques 8e1cd3
Luís Henriques 8e1cd3
diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c
Luís Henriques 8e1cd3
index 30387733765d..8d0a6d2c2da4 100644
Luís Henriques 8e1cd3
--- a/fs/ceph/mdsmap.c
Luís Henriques 8e1cd3
+++ b/fs/ceph/mdsmap.c
Luís Henriques 8e1cd3
@@ -352,12 +352,10 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end, bool msgr2)
Luís Henriques 8e1cd3
 		__decode_and_drop_type(p, end, u8, bad_ext);
Luís Henriques 8e1cd3
 	}
Luís Henriques 8e1cd3
 	if (mdsmap_ev >= 8) {
Luís Henriques 8e1cd3
-		u32 name_len;
Luís Henriques 8e1cd3
 		/* enabled */
Luís Henriques 8e1cd3
 		ceph_decode_8_safe(p, end, m->m_enabled, bad_ext);
Luís Henriques 8e1cd3
-		ceph_decode_32_safe(p, end, name_len, bad_ext);
Luís Henriques 8e1cd3
-		ceph_decode_need(p, end, name_len, bad_ext);
Luís Henriques 8e1cd3
-		*p += name_len;
Luís Henriques 8e1cd3
+		/* fs_name */
Luís Henriques 8e1cd3
+		ceph_decode_skip_string(p, end, bad_ext);
Luís Henriques 8e1cd3
 	}
Luís Henriques 8e1cd3
 	/* damaged */
Luís Henriques 8e1cd3
 	if (mdsmap_ev >= 9) {
Luís Henriques 8e1cd3
@@ -370,6 +368,22 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end, bool msgr2)
Luís Henriques 8e1cd3
 	} else {
Luís Henriques 8e1cd3
 		m->m_damaged = false;
Luís Henriques 8e1cd3
 	}
Luís Henriques 8e1cd3
+	if (mdsmap_ev >= 17) {
Luís Henriques 8e1cd3
+		/* balancer */
Luís Henriques 8e1cd3
+		ceph_decode_skip_string(p, end, bad_ext);
Luís Henriques 8e1cd3
+		/* standby_count_wanted */
Luís Henriques 8e1cd3
+		ceph_decode_skip_32(p, end, bad_ext);
Luís Henriques 8e1cd3
+		/* old_max_mds */
Luís Henriques 8e1cd3
+		ceph_decode_skip_32(p, end, bad_ext);
Luís Henriques 8e1cd3
+		/* min_compat_client */
Luís Henriques 8e1cd3
+		ceph_decode_skip_8(p, end, bad_ext);
Luís Henriques 8e1cd3
+		/* required_client_features */
Luís Henriques 8e1cd3
+		ceph_decode_skip_set(p, end, 64, bad_ext);
Luís Henriques 8e1cd3
+		ceph_decode_64_safe(p, end, m->m_max_xattr_size, bad_ext);
Luís Henriques 8e1cd3
+	} else {
Luís Henriques 8e1cd3
+		/* This forces the usage of the (sync) SETXATTR Op */
Luís Henriques 8e1cd3
+		m->m_max_xattr_size = 0;
Luís Henriques 8e1cd3
+	}
Luís Henriques 8e1cd3
 bad_ext:
Luís Henriques 8e1cd3
 	dout("mdsmap_decode m_enabled: %d, m_damaged: %d, m_num_laggy: %d\n",
Luís Henriques 8e1cd3
 	     !!m->m_enabled, !!m->m_damaged, m->m_num_laggy);
Luís Henriques 8e1cd3
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
Luís Henriques 8e1cd3
index f141f5246163..f31350cda960 100644
Luís Henriques 8e1cd3
--- a/fs/ceph/xattr.c
Luís Henriques 8e1cd3
+++ b/fs/ceph/xattr.c
Luís Henriques 8e1cd3
@@ -1086,7 +1086,7 @@ static int ceph_sync_setxattr(struct inode *inode, const char *name,
Luís Henriques 8e1cd3
 			flags |= CEPH_XATTR_REMOVE;
Luís Henriques 8e1cd3
 	}
Luís Henriques 8e1cd3
 
Luís Henriques 8e1cd3
-	dout("setxattr value=%.*s\n", (int)size, value);
Luís Henriques 8e1cd3
+	dout("setxattr value size: %zu\n", size);
Luís Henriques 8e1cd3
 
Luís Henriques 8e1cd3
 	/* do request */
Luís Henriques 8e1cd3
 	req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
Luís Henriques 8e1cd3
@@ -1184,8 +1184,14 @@ int __ceph_setxattr(struct inode *inode, const char *name,
Luís Henriques 8e1cd3
 	spin_lock(&ci->i_ceph_lock);
Luís Henriques 8e1cd3
 retry:
Luís Henriques 8e1cd3
 	issued = __ceph_caps_issued(ci, NULL);
Luís Henriques 8e1cd3
-	if (ci->i_xattrs.version == 0 || !(issued & CEPH_CAP_XATTR_EXCL))
Luís Henriques 8e1cd3
+	required_blob_size = __get_required_blob_size(ci, name_len, val_len);
Luís Henriques 8e1cd3
+	if ((ci->i_xattrs.version == 0) || !(issued & CEPH_CAP_XATTR_EXCL) ||
Luís Henriques 8e1cd3
+	    (required_blob_size > mdsc->mdsmap->m_max_xattr_size)) {
Luís Henriques 8e1cd3
+		dout("%s do sync setxattr: version: %llu size: %d max: %llu\n",
Luís Henriques 8e1cd3
+		     __func__, ci->i_xattrs.version, required_blob_size,
Luís Henriques 8e1cd3
+		     mdsc->mdsmap->m_max_xattr_size);
Luís Henriques 8e1cd3
 		goto do_sync;
Luís Henriques 8e1cd3
+	}
Luís Henriques 8e1cd3
 
Luís Henriques 8e1cd3
 	if (!lock_snap_rwsem && !ci->i_head_snapc) {
Luís Henriques 8e1cd3
 		lock_snap_rwsem = true;
Luís Henriques 8e1cd3
@@ -1201,8 +1207,6 @@ int __ceph_setxattr(struct inode *inode, const char *name,
Luís Henriques 8e1cd3
 	     ceph_cap_string(issued));
Luís Henriques 8e1cd3
 	__build_xattrs(inode);
Luís Henriques 8e1cd3
 
Luís Henriques 8e1cd3
-	required_blob_size = __get_required_blob_size(ci, name_len, val_len);
Luís Henriques 8e1cd3
-
Luís Henriques 8e1cd3
 	if (!ci->i_xattrs.prealloc_blob ||
Luís Henriques 8e1cd3
 	    required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) {
Luís Henriques 8e1cd3
 		struct ceph_buffer *blob;
Luís Henriques 8e1cd3
diff --git a/include/linux/ceph/mdsmap.h b/include/linux/ceph/mdsmap.h
Luís Henriques 8e1cd3
index 523fd0452856..4c3e0648dc27 100644
Luís Henriques 8e1cd3
--- a/include/linux/ceph/mdsmap.h
Luís Henriques 8e1cd3
+++ b/include/linux/ceph/mdsmap.h
Luís Henriques 8e1cd3
@@ -25,6 +25,7 @@ struct ceph_mdsmap {
Luís Henriques 8e1cd3
 	u32 m_session_timeout;          /* seconds */
Luís Henriques 8e1cd3
 	u32 m_session_autoclose;        /* seconds */
Luís Henriques 8e1cd3
 	u64 m_max_file_size;
Luís Henriques 8e1cd3
+	u64 m_max_xattr_size;		/* maximum size for xattrs blob */
Luís Henriques 8e1cd3
 	u32 m_max_mds;			/* expected up:active mds number */
Luís Henriques 8e1cd3
 	u32 m_num_active_mds;		/* actual up:active mds number */
Luís Henriques 8e1cd3
 	u32 possible_max_rank;		/* possible max rank index */
Luís Henriques 8e1cd3