|
Luís Henriques |
8e1cd3 |
From: =?utf-8?q?Lu=C3=ADs_Henriques_=3Clhenriques=40suse=2Ede=3E?=
|
|
Luís Henriques |
8e1cd3 |
Date: Fri, 3 Jun 2022 14:29:09 +0100
|
|
Luís Henriques |
8e1cd3 |
Subject: ceph: prevent a client from exceeding the MDS maximum xattr size
|
|
Luís Henriques |
8e1cd3 |
MIME-Version: 1.0
|
|
Luís Henriques |
8e1cd3 |
Content-Type: text/plain; charset=UTF-8
|
|
Luís Henriques |
8e1cd3 |
Content-Transfer-Encoding: 8bit
|
|
Luís Henriques |
8e1cd3 |
Git-commit: d93231a6bc8a452323d5fef16cca7107ce483a27
|
|
Luís Henriques |
8e1cd3 |
Patch-mainline: v6.0-rc1
|
|
Luís Henriques |
8e1cd3 |
References: jsc#SES-1880
|
|
Luís Henriques |
8e1cd3 |
|
|
Luís Henriques |
8e1cd3 |
The MDS tries to enforce a limit on the total key/values in extended
|
|
Luís Henriques |
8e1cd3 |
attributes. However, this limit is enforced only if doing a synchronous
|
|
Luís Henriques |
8e1cd3 |
operation (MDS_OP_SETXATTR) -- if we're buffering the xattrs, the MDS
|
|
Luís Henriques |
8e1cd3 |
doesn't have a chance to enforce these limits.
|
|
Luís Henriques |
8e1cd3 |
|
|
Luís Henriques |
8e1cd3 |
This patch adds support for decoding the xattrs maximum size setting that is
|
|
Luís Henriques |
8e1cd3 |
distributed in the mdsmap. Then, when setting an xattr, the kernel client
|
|
Luís Henriques |
8e1cd3 |
will revert to do a synchronous operation if that maximum size is exceeded.
|
|
Luís Henriques |
8e1cd3 |
|
|
Luís Henriques |
8e1cd3 |
While there, fix a dout() that would trigger a printk warning:
|
|
Luís Henriques |
8e1cd3 |
|
|
Luís Henriques |
8e1cd3 |
[ 98.718078] ------------[ cut here ]------------
|
|
Luís Henriques |
8e1cd3 |
[ 98.719012] precision 65536 too large
|
|
Luís Henriques |
8e1cd3 |
[ 98.719039] WARNING: CPU: 1 PID: 3755 at lib/vsprintf.c:2703 vsnprintf+0x5e3/0x600
|
|
Luís Henriques |
8e1cd3 |
...
|
|
Luís Henriques |
8e1cd3 |
|
|
Luís Henriques |
8e1cd3 |
Link: https://tracker.ceph.com/issues/55725
|
|
Luís Henriques |
8e1cd3 |
Signed-off-by: Luís Henriques <lhenriques@suse.de>
|
|
Luís Henriques |
8e1cd3 |
Reviewed-by: Xiubo Li <xiubli@redhat.com>
|
|
Luís Henriques |
8e1cd3 |
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
|
|
Luís Henriques |
8e1cd3 |
Acked-by: Luis Henriques <lhenriques@suse.com>
|
|
Luís Henriques |
8e1cd3 |
---
|
|
Luís Henriques |
8e1cd3 |
fs/ceph/mdsmap.c | 22 ++++++++++++++++++----
|
|
Luís Henriques |
8e1cd3 |
fs/ceph/xattr.c | 12 ++++++++----
|
|
Luís Henriques |
8e1cd3 |
include/linux/ceph/mdsmap.h | 1 +
|
|
Luís Henriques |
8e1cd3 |
3 files changed, 27 insertions(+), 8 deletions(-)
|
|
Luís Henriques |
8e1cd3 |
|
|
Luís Henriques |
8e1cd3 |
diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c
|
|
Luís Henriques |
8e1cd3 |
index 30387733765d..8d0a6d2c2da4 100644
|
|
Luís Henriques |
8e1cd3 |
--- a/fs/ceph/mdsmap.c
|
|
Luís Henriques |
8e1cd3 |
+++ b/fs/ceph/mdsmap.c
|
|
Luís Henriques |
8e1cd3 |
@@ -352,12 +352,10 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end, bool msgr2)
|
|
Luís Henriques |
8e1cd3 |
__decode_and_drop_type(p, end, u8, bad_ext);
|
|
Luís Henriques |
8e1cd3 |
}
|
|
Luís Henriques |
8e1cd3 |
if (mdsmap_ev >= 8) {
|
|
Luís Henriques |
8e1cd3 |
- u32 name_len;
|
|
Luís Henriques |
8e1cd3 |
/* enabled */
|
|
Luís Henriques |
8e1cd3 |
ceph_decode_8_safe(p, end, m->m_enabled, bad_ext);
|
|
Luís Henriques |
8e1cd3 |
- ceph_decode_32_safe(p, end, name_len, bad_ext);
|
|
Luís Henriques |
8e1cd3 |
- ceph_decode_need(p, end, name_len, bad_ext);
|
|
Luís Henriques |
8e1cd3 |
- *p += name_len;
|
|
Luís Henriques |
8e1cd3 |
+ /* fs_name */
|
|
Luís Henriques |
8e1cd3 |
+ ceph_decode_skip_string(p, end, bad_ext);
|
|
Luís Henriques |
8e1cd3 |
}
|
|
Luís Henriques |
8e1cd3 |
/* damaged */
|
|
Luís Henriques |
8e1cd3 |
if (mdsmap_ev >= 9) {
|
|
Luís Henriques |
8e1cd3 |
@@ -370,6 +368,22 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end, bool msgr2)
|
|
Luís Henriques |
8e1cd3 |
} else {
|
|
Luís Henriques |
8e1cd3 |
m->m_damaged = false;
|
|
Luís Henriques |
8e1cd3 |
}
|
|
Luís Henriques |
8e1cd3 |
+ if (mdsmap_ev >= 17) {
|
|
Luís Henriques |
8e1cd3 |
+ /* balancer */
|
|
Luís Henriques |
8e1cd3 |
+ ceph_decode_skip_string(p, end, bad_ext);
|
|
Luís Henriques |
8e1cd3 |
+ /* standby_count_wanted */
|
|
Luís Henriques |
8e1cd3 |
+ ceph_decode_skip_32(p, end, bad_ext);
|
|
Luís Henriques |
8e1cd3 |
+ /* old_max_mds */
|
|
Luís Henriques |
8e1cd3 |
+ ceph_decode_skip_32(p, end, bad_ext);
|
|
Luís Henriques |
8e1cd3 |
+ /* min_compat_client */
|
|
Luís Henriques |
8e1cd3 |
+ ceph_decode_skip_8(p, end, bad_ext);
|
|
Luís Henriques |
8e1cd3 |
+ /* required_client_features */
|
|
Luís Henriques |
8e1cd3 |
+ ceph_decode_skip_set(p, end, 64, bad_ext);
|
|
Luís Henriques |
8e1cd3 |
+ ceph_decode_64_safe(p, end, m->m_max_xattr_size, bad_ext);
|
|
Luís Henriques |
8e1cd3 |
+ } else {
|
|
Luís Henriques |
8e1cd3 |
+ /* This forces the usage of the (sync) SETXATTR Op */
|
|
Luís Henriques |
8e1cd3 |
+ m->m_max_xattr_size = 0;
|
|
Luís Henriques |
8e1cd3 |
+ }
|
|
Luís Henriques |
8e1cd3 |
bad_ext:
|
|
Luís Henriques |
8e1cd3 |
dout("mdsmap_decode m_enabled: %d, m_damaged: %d, m_num_laggy: %d\n",
|
|
Luís Henriques |
8e1cd3 |
!!m->m_enabled, !!m->m_damaged, m->m_num_laggy);
|
|
Luís Henriques |
8e1cd3 |
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
|
|
Luís Henriques |
8e1cd3 |
index f141f5246163..f31350cda960 100644
|
|
Luís Henriques |
8e1cd3 |
--- a/fs/ceph/xattr.c
|
|
Luís Henriques |
8e1cd3 |
+++ b/fs/ceph/xattr.c
|
|
Luís Henriques |
8e1cd3 |
@@ -1086,7 +1086,7 @@ static int ceph_sync_setxattr(struct inode *inode, const char *name,
|
|
Luís Henriques |
8e1cd3 |
flags |= CEPH_XATTR_REMOVE;
|
|
Luís Henriques |
8e1cd3 |
}
|
|
Luís Henriques |
8e1cd3 |
|
|
Luís Henriques |
8e1cd3 |
- dout("setxattr value=%.*s\n", (int)size, value);
|
|
Luís Henriques |
8e1cd3 |
+ dout("setxattr value size: %zu\n", size);
|
|
Luís Henriques |
8e1cd3 |
|
|
Luís Henriques |
8e1cd3 |
/* do request */
|
|
Luís Henriques |
8e1cd3 |
req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
|
|
Luís Henriques |
8e1cd3 |
@@ -1184,8 +1184,14 @@ int __ceph_setxattr(struct inode *inode, const char *name,
|
|
Luís Henriques |
8e1cd3 |
spin_lock(&ci->i_ceph_lock);
|
|
Luís Henriques |
8e1cd3 |
retry:
|
|
Luís Henriques |
8e1cd3 |
issued = __ceph_caps_issued(ci, NULL);
|
|
Luís Henriques |
8e1cd3 |
- if (ci->i_xattrs.version == 0 || !(issued & CEPH_CAP_XATTR_EXCL))
|
|
Luís Henriques |
8e1cd3 |
+ required_blob_size = __get_required_blob_size(ci, name_len, val_len);
|
|
Luís Henriques |
8e1cd3 |
+ if ((ci->i_xattrs.version == 0) || !(issued & CEPH_CAP_XATTR_EXCL) ||
|
|
Luís Henriques |
8e1cd3 |
+ (required_blob_size > mdsc->mdsmap->m_max_xattr_size)) {
|
|
Luís Henriques |
8e1cd3 |
+ dout("%s do sync setxattr: version: %llu size: %d max: %llu\n",
|
|
Luís Henriques |
8e1cd3 |
+ __func__, ci->i_xattrs.version, required_blob_size,
|
|
Luís Henriques |
8e1cd3 |
+ mdsc->mdsmap->m_max_xattr_size);
|
|
Luís Henriques |
8e1cd3 |
goto do_sync;
|
|
Luís Henriques |
8e1cd3 |
+ }
|
|
Luís Henriques |
8e1cd3 |
|
|
Luís Henriques |
8e1cd3 |
if (!lock_snap_rwsem && !ci->i_head_snapc) {
|
|
Luís Henriques |
8e1cd3 |
lock_snap_rwsem = true;
|
|
Luís Henriques |
8e1cd3 |
@@ -1201,8 +1207,6 @@ int __ceph_setxattr(struct inode *inode, const char *name,
|
|
Luís Henriques |
8e1cd3 |
ceph_cap_string(issued));
|
|
Luís Henriques |
8e1cd3 |
__build_xattrs(inode);
|
|
Luís Henriques |
8e1cd3 |
|
|
Luís Henriques |
8e1cd3 |
- required_blob_size = __get_required_blob_size(ci, name_len, val_len);
|
|
Luís Henriques |
8e1cd3 |
-
|
|
Luís Henriques |
8e1cd3 |
if (!ci->i_xattrs.prealloc_blob ||
|
|
Luís Henriques |
8e1cd3 |
required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) {
|
|
Luís Henriques |
8e1cd3 |
struct ceph_buffer *blob;
|
|
Luís Henriques |
8e1cd3 |
diff --git a/include/linux/ceph/mdsmap.h b/include/linux/ceph/mdsmap.h
|
|
Luís Henriques |
8e1cd3 |
index 523fd0452856..4c3e0648dc27 100644
|
|
Luís Henriques |
8e1cd3 |
--- a/include/linux/ceph/mdsmap.h
|
|
Luís Henriques |
8e1cd3 |
+++ b/include/linux/ceph/mdsmap.h
|
|
Luís Henriques |
8e1cd3 |
@@ -25,6 +25,7 @@ struct ceph_mdsmap {
|
|
Luís Henriques |
8e1cd3 |
u32 m_session_timeout; /* seconds */
|
|
Luís Henriques |
8e1cd3 |
u32 m_session_autoclose; /* seconds */
|
|
Luís Henriques |
8e1cd3 |
u64 m_max_file_size;
|
|
Luís Henriques |
8e1cd3 |
+ u64 m_max_xattr_size; /* maximum size for xattrs blob */
|
|
Luís Henriques |
8e1cd3 |
u32 m_max_mds; /* expected up:active mds number */
|
|
Luís Henriques |
8e1cd3 |
u32 m_num_active_mds; /* actual up:active mds number */
|
|
Luís Henriques |
8e1cd3 |
u32 possible_max_rank; /* possible max rank index */
|
|
Luís Henriques |
8e1cd3 |
|