Blob Blame History Raw
From 25c4081f79f60623f84cc96534dcfd674671b37c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>
Date: Wed, 10 Nov 2021 09:55:27 +0100
Subject: drm/i915/ttm: Fix illegal addition to shrinker list
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Git-commit: d3cb30f8dcbcb797bb4dd62dc8e85dd2334c3475
Patch-mainline: v5.17-rc1
References: jsc#PED-1166 jsc#PED-1168 jsc#PED-1170 jsc#PED-1218 jsc#PED-1220 jsc#PED-1222 jsc#PED-1223 jsc#PED-1225

There's a small window of opportunity during which the adjust_lru()
function can be called with a GEM refcount of zero from the TTM
eviction code. This results in a kernel BUG().

Ensure that we don't attempt to modify the GEM shrinker lists unless
we have a GEM refcount.

Fixes: ebd4a8ec7799 ("drm/i915/ttm: move shrinker management into adjust_lru")
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20211110085527.1033475-1-thomas.hellstrom@linux.intel.com
Acked-by: Patrik Jakobsson <pjakobsson@suse.de>
---
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 31 +++++++++++++++++--------
 1 file changed, 21 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index 6369fb9b2455..d08a270b0921 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -771,18 +771,29 @@ void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj)
 	 *
 	 * TODO: consider maybe also bumping the shrinker list here when we have
 	 * already unpinned it, which should give us something more like an LRU.
+	 *
+	 * TODO: There is a small window of opportunity for this function to
+	 * get called from eviction after we've dropped the last GEM refcount,
+	 * but before the TTM deleted flag is set on the object. Avoid
+	 * adjusting the shrinker list in such cases, since the object is
+	 * not available to the shrinker anyway due to its zero refcount.
+	 * To fix this properly we should move to a TTM shrinker LRU list for
+	 * these objects.
 	 */
-	if (shrinkable != obj->mm.ttm_shrinkable) {
-		if (shrinkable) {
-			if (obj->mm.madv == I915_MADV_WILLNEED)
-				__i915_gem_object_make_shrinkable(obj);
-			else
-				__i915_gem_object_make_purgeable(obj);
-		} else {
-			i915_gem_object_make_unshrinkable(obj);
+	if (kref_get_unless_zero(&obj->base.refcount)) {
+		if (shrinkable != obj->mm.ttm_shrinkable) {
+			if (shrinkable) {
+				if (obj->mm.madv == I915_MADV_WILLNEED)
+					__i915_gem_object_make_shrinkable(obj);
+				else
+					__i915_gem_object_make_purgeable(obj);
+			} else {
+				i915_gem_object_make_unshrinkable(obj);
+			}
+
+			obj->mm.ttm_shrinkable = shrinkable;
 		}
-
-		obj->mm.ttm_shrinkable = shrinkable;
+		i915_gem_object_put(obj);
 	}
 
 	/*
-- 
2.38.1