Michal Koutný 45f830
From: =?utf-8?b?TWljaGFsIEtvdXRuw70gPG1rb3V0bnlAc3VzZS5jb20+?=
Michal Koutný 45f830
Date: Mon, 10 Oct 2022 10:29:18 +0200
Michal Koutný 45f830
Subject: cgroup: Reorganize css_set_lock and kernfs path processing
Michal Koutný 45f830
MIME-Version: 1.0
Michal Koutný 45f830
Content-Type: text/plain; charset=UTF-8
Michal Koutný 45f830
Content-Transfer-Encoding: 8bit
Michal Koutný 45f830
Git-commit: 46307fd6e27a3f678a1678b02e667678c22aa8cc
Michal Koutný 45f830
Patch-mainline: v6.1-rc2
Michal Koutný 45f830
References: bsc#1205650
Michal Koutný 45f830
Michal Koutný 45f830
The commit 74e4b956eb1c incorrectly wrapped kernfs_walk_and_get
Michal Koutný 45f830
(might_sleep) under css_set_lock (spinlock). css_set_lock is needed by
Michal Koutný 45f830
__cset_cgroup_from_root to ensure stable cset->cgrp_links but not for
Michal Koutný 45f830
kernfs_walk_and_get.
Michal Koutný 45f830
Michal Koutný 45f830
We only need to make sure that the returned root_cgrp won't be freed
Michal Koutný 45f830
under us. This is given in the case of global root because it is static
Michal Koutný 45f830
(cgrp_dfl_root.cgrp). When the root_cgrp is lower in the hierarchy, it
Michal Koutný 45f830
is pinned by cgroup_ns->root_cset (and `current` task cannot switch
Michal Koutný 45f830
namespace asynchronously so ns_proxy pins cgroup_ns).
Michal Koutný 45f830
Michal Koutný 45f830
Note this reasoning won't hold for root cgroups in v1 hierarchies,
Michal Koutný 45f830
therefore create a special-cased helper function just for the default
Michal Koutný 45f830
hierarchy.
Michal Koutný 45f830
Michal Koutný 45f830
Fixes: 74e4b956eb1c ("cgroup: Honor caller's cgroup NS when resolving path")
Michal Koutný 45f830
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Michal Koutný 45f830
Signed-off-by: Michal Koutný <mkoutny@suse.com>
Michal Koutný 45f830
Signed-off-by: Tejun Heo <tj@kernel.org>
Michal Koutný 45f830
---
Michal Koutný 45f830
 kernel/cgroup/cgroup.c | 40 +++++++++++++++++++++++++++-------------
Michal Koutný 45f830
 1 file changed, 27 insertions(+), 13 deletions(-)
Michal Koutný 45f830
Michal Koutný 45f830
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
Michal Koutný 45f830
index 764bdd5fd8d1..ecf409e3c3a7 100644
Michal Koutný 45f830
--- a/kernel/cgroup/cgroup.c
Michal Koutný 45f830
+++ b/kernel/cgroup/cgroup.c
Michal Koutný 45f830
@@ -1392,6 +1392,9 @@ static void cgroup_destroy_root(struct cgroup_root *root)
Michal Koutný 45f830
 	cgroup_free_root(root);
Michal Koutný 45f830
 }
Michal Koutný 45f830
 
Michal Koutný 45f830
+/*
Michal Koutný 45f830
+ * Returned cgroup is without refcount but it's valid as long as cset pins it.
Michal Koutný 45f830
+ */
Michal Koutný 45f830
 static inline struct cgroup *__cset_cgroup_from_root(struct css_set *cset,
Michal Koutný 45f830
 					    struct cgroup_root *root)
Michal Koutný 45f830
 {
Michal Koutný 45f830
@@ -1403,6 +1406,7 @@ static inline struct cgroup *__cset_cgroup_from_root(struct css_set *cset,
Michal Koutný 45f830
 		res_cgroup = cset->dfl_cgrp;
Michal Koutný 45f830
 	} else {
Michal Koutný 45f830
 		struct cgrp_cset_link *link;
Michal Koutný 45f830
+		lockdep_assert_held(&css_set_lock);
Michal Koutný 45f830
 
Michal Koutný 45f830
 		list_for_each_entry(link, &cset->cgrp_links, cgrp_link) {
Michal Koutný 45f830
 			struct cgroup *c = link->cgrp;
Michal Koutný 45f830
@@ -1414,6 +1418,7 @@ static inline struct cgroup *__cset_cgroup_from_root(struct css_set *cset,
Michal Koutný 45f830
 		}
Michal Koutný 45f830
 	}
Michal Koutný 45f830
 
Michal Koutný 45f830
+	BUG_ON(!res_cgroup);
Michal Koutný 45f830
 	return res_cgroup;
Michal Koutný 45f830
 }
Michal Koutný 45f830
 
Michal Koutný 45f830
@@ -1436,23 +1441,36 @@ current_cgns_cgroup_from_root(struct cgroup_root *root)
Michal Koutný 45f830
 
Michal Koutný 45f830
 	rcu_read_unlock();
Michal Koutný 45f830
 
Michal Koutný 45f830
-	BUG_ON(!res);
Michal Koutný 45f830
 	return res;
Michal Koutný 45f830
 }
Michal Koutný 45f830
 
Michal Koutný 45f830
+/*
Michal Koutný 45f830
+ * Look up cgroup associated with current task's cgroup namespace on the default
Michal Koutný 45f830
+ * hierarchy.
Michal Koutný 45f830
+ *
Michal Koutný 45f830
+ * Unlike current_cgns_cgroup_from_root(), this doesn't need locks:
Michal Koutný 45f830
+ * - Internal rcu_read_lock is unnecessary because we don't dereference any rcu
Michal Koutný 45f830
+ *   pointers.
Michal Koutný 45f830
+ * - css_set_lock is not needed because we just read cset->dfl_cgrp.
Michal Koutný 45f830
+ * - As a bonus returned cgrp is pinned with the current because it cannot
Michal Koutný 45f830
+ *   switch cgroup_ns asynchronously.
Michal Koutný 45f830
+ */
Michal Koutný 45f830
+static struct cgroup *current_cgns_cgroup_dfl(void)
Michal Koutný 45f830
+{
Michal Koutný 45f830
+	struct css_set *cset;
Michal Koutný 45f830
+
Michal Koutný 45f830
+	cset = current->nsproxy->cgroup_ns->root_cset;
Michal Koutný 45f830
+	return __cset_cgroup_from_root(cset, &cgrp_dfl_root);
Michal Koutný 45f830
+}
Michal Koutný 45f830
+
Michal Koutný 45f830
 /* look up cgroup associated with given css_set on the specified hierarchy */
Michal Koutný 45f830
 static struct cgroup *cset_cgroup_from_root(struct css_set *cset,
Michal Koutný 45f830
 					    struct cgroup_root *root)
Michal Koutný 45f830
 {
Michal Koutný 45f830
-	struct cgroup *res = NULL;
Michal Koutný 45f830
-
Michal Koutný 45f830
 	lockdep_assert_held(&cgroup_mutex);
Michal Koutný 45f830
 	lockdep_assert_held(&css_set_lock);
Michal Koutný 45f830
 
Michal Koutný 45f830
-	res = __cset_cgroup_from_root(cset, root);
Michal Koutný 45f830
-
Michal Koutný 45f830
-	BUG_ON(!res);
Michal Koutný 45f830
-	return res;
Michal Koutný 45f830
+	return __cset_cgroup_from_root(cset, root);
Michal Koutný 45f830
 }
Michal Koutný 45f830
 
Michal Koutný 45f830
 /*
Michal Koutný 45f830
@@ -6105,9 +6123,7 @@ struct cgroup *cgroup_get_from_id(u64 id)
Michal Koutný 45f830
 	if (!cgrp)
Michal Koutný 45f830
 		return ERR_PTR(-ENOENT);
Michal Koutný 45f830
 
Michal Koutný 45f830
-	spin_lock_irq(&css_set_lock);
Michal Koutný 45f830
-	root_cgrp = current_cgns_cgroup_from_root(&cgrp_dfl_root);
Michal Koutný 45f830
-	spin_unlock_irq(&css_set_lock);
Michal Koutný 45f830
+	root_cgrp = current_cgns_cgroup_dfl();
Michal Koutný 45f830
 	if (!cgroup_is_descendant(cgrp, root_cgrp)) {
Michal Koutný 45f830
 		cgroup_put(cgrp);
Michal Koutný 45f830
 		return ERR_PTR(-ENOENT);
Michal Koutný 45f830
@@ -6686,10 +6702,8 @@ struct cgroup *cgroup_get_from_path(const char *path)
Michal Koutný 45f830
 	struct cgroup *cgrp = ERR_PTR(-ENOENT);
Michal Koutný 45f830
 	struct cgroup *root_cgrp;
Michal Koutný 45f830
 
Michal Koutný 45f830
-	spin_lock_irq(&css_set_lock);
Michal Koutný 45f830
-	root_cgrp = current_cgns_cgroup_from_root(&cgrp_dfl_root);
Michal Koutný 45f830
+	root_cgrp = current_cgns_cgroup_dfl();
Michal Koutný 45f830
 	kn = kernfs_walk_and_get(root_cgrp->kn, path);
Michal Koutný 45f830
-	spin_unlock_irq(&css_set_lock);
Michal Koutný 45f830
 	if (!kn)
Michal Koutný 45f830
 		goto out;
Michal Koutný 45f830
 
Michal Koutný 45f830