From 30815f7590a1ad86ce27946a538b6c9b1169773f Mon Sep 17 00:00:00 2001
From: Yousaf Kaukab <ykaukab@suse.de>
Date: Feb 09 2023 10:37:34 +0000
Subject: Merge remote-tracking branch 'origin/users/ddiss/SLE15-SP5-GA/for-next' into SLE15-SP5-GA


Pull io_uring fixes from David Disseldorp

---

diff --git a/patches.suse/io_uring-cmpxchg-for-poll-arm-refs-release.patch b/patches.suse/io_uring-cmpxchg-for-poll-arm-refs-release.patch
new file mode 100644
index 0000000..d9da5a1
--- /dev/null
+++ b/patches.suse/io_uring-cmpxchg-for-poll-arm-refs-release.patch
@@ -0,0 +1,63 @@
+From a81f8ce78353cd921c446e84adb8519d65e222d4 Mon Sep 17 00:00:00 2001
+From: Pavel Begunkov <asml.silence@gmail.com>
+Date: Fri, 2 Dec 2022 14:27:13 +0000
+Subject: [PATCH 3/5] io_uring: cmpxchg for poll arm refs release
+Patch-mainline: v6.1-rc7
+Git-commit: 2f3893437a4ebf2e892ca172e9e122841319d675
+References: bsc#1207511 CVE-2023-0468
+
+[ upstream commit 2f3893437a4ebf2e892ca172e9e122841319d675 ]
+
+Replace atomically substracting the ownership reference at the end of
+arming a poll with a cmpxchg. We try to release ownership by setting 0
+assuming that poll_refs didn't change while we were arming. If it did
+change, we keep the ownership and use it to queue a tw, which is fully
+capable to process all events and (even tolerates spurious wake ups).
+
+It's a bit more elegant as we reduce races b/w setting the cancellation
+flag and getting refs with this release, and with that we don't have to
+worry about any kinds of underflows. It's not the fastest path for
+polling. The performance difference b/w cmpxchg and atomic dec is
+usually negligible and it's not the fastest path.
+
+Cc: stable@vger.kernel.org
+Fixes: aa43477b04025 ("io_uring: poll rework")
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Link: https://lore.kernel.org/r/0c95251624397ea6def568ff040cad2d7926fd51.1668963050.git.asml.silence@gmail.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Acked-by: David Disseldorp <ddiss@suse.de>
+---
+ fs/io_uring.c | 8 +++-----
+ 1 file changed, 3 insertions(+), 5 deletions(-)
+
+diff --git a/fs/io_uring.c b/fs/io_uring.c
+index 5518a3132232b..78402694570d8 100644
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -6763,7 +6763,6 @@ static int __io_arm_poll_handler(struct io_kiocb *req,
+ 				 struct io_poll_table *ipt, __poll_t mask)
+ {
+ 	struct io_ring_ctx *ctx = req->ctx;
+-	int v;
+ 
+ 	INIT_HLIST_NODE(&req->hash_node);
+ 	req->work.cancel_seq = atomic_read(&ctx->cancel_seq);
+@@ -6812,11 +6811,10 @@ static int __io_arm_poll_handler(struct io_kiocb *req,
+ 	}
+ 
+ 	/*
+-	 * Release ownership. If someone tried to queue a tw while it was
+-	 * locked, kick it off for them.
++	 * Try to release ownership. If we see a change of state, e.g.
++	 * poll was waken up, queue up a tw, it'll deal with it.
+ 	 */
+-	v = atomic_dec_return(&req->poll_refs);
+-	if (unlikely(v & IO_POLL_REF_MASK))
++	if (atomic_cmpxchg(&req->poll_refs, 1, 0) != 1)
+ 		__io_poll_execute(req, 0, poll->events);
+ 	return 0;
+ }
+-- 
+2.35.3
+
diff --git a/patches.suse/io_uring-fix-tw-losing-poll-events.patch b/patches.suse/io_uring-fix-tw-losing-poll-events.patch
new file mode 100644
index 0000000..caef4bc
--- /dev/null
+++ b/patches.suse/io_uring-fix-tw-losing-poll-events.patch
@@ -0,0 +1,43 @@
+From a8b931be5046b24d0951ee854d94ffbdbffcf350 Mon Sep 17 00:00:00 2001
+From: David Disseldorp <ddiss@suse.de>
+Date: Tue, 7 Feb 2023 23:48:37 +0100
+Subject: [PATCH 2/5] io_uring: fix tw losing poll events
+Patch-mainline: v6.1-rc6
+Git-commit: 539bcb57da2f58886d7d5c17134236b0ec9cd15d
+References: bsc#1207511 CVE-2023-0468
+
+We may never try to process a poll wake and its mask if there was
+multiple wake ups racing for queueing up a tw. Force
+io_poll_check_events() to update the mask by vfs_poll().
+
+Cc: stable@vger.kernel.org
+Fixes: aa43477b04025 ("io_uring: poll rework")
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Link: https://lore.kernel.org/r/00344d60f8b18907171178d7cf598de71d127b0b.1668710222.git.asml.silence@gmail.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Acked-by: David Disseldorp <ddiss@suse.de>
+---
+ fs/io_uring.c | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+diff --git a/fs/io_uring.c b/fs/io_uring.c
+index f7094c52af66b..5518a3132232b 100644
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -6509,6 +6509,13 @@ static int io_poll_check_events(struct io_kiocb *req, bool *locked)
+ 			return 0;
+ 		if (v & IO_POLL_CANCEL_FLAG)
+ 			return -ECANCELED;
++		/*
++		 * cqe.res contains only events of the first wake up
++		 * and all others are be lost. Redo vfs_poll() to get
++		 * up to date state.
++		 */
++		if ((v & IO_POLL_REF_MASK) != 1)
++			req->cqe.res = 0;
+ 
+ 		if (!req->cqe.res) {
+ 			struct poll_table_struct pt = { ._key = req->apoll_events };
+-- 
+2.35.3
+
diff --git a/patches.suse/io_uring-make-poll-refs-more-robust.patch b/patches.suse/io_uring-make-poll-refs-more-robust.patch
new file mode 100644
index 0000000..8d522aa
--- /dev/null
+++ b/patches.suse/io_uring-make-poll-refs-more-robust.patch
@@ -0,0 +1,101 @@
+From 925f5801bf3209e488ff4d9a271f82d492066222 Mon Sep 17 00:00:00 2001
+From: Pavel Begunkov <asml.silence@gmail.com>
+Date: Fri, 2 Dec 2022 14:27:14 +0000
+Subject: [PATCH 4/5] io_uring: make poll refs more robust
+Patch-mainline: v6.1-rc7
+Git-commit: a26a35e9019fd70bf3cf647dcfdae87abc7bacea
+References: bsc#1207511 CVE-2023-0468
+
+[ upstream commit a26a35e9019fd70bf3cf647dcfdae87abc7bacea ]
+
+poll_refs carry two functions, the first is ownership over the request.
+The second is notifying the io_poll_check_events() that there was an
+event but wake up couldn't grab the ownership, so io_poll_check_events()
+should retry.
+
+We want to make poll_refs more robust against overflows. Instead of
+always incrementing it, which covers two purposes with one atomic, check
+if poll_refs is elevated enough and if so set a retry flag without
+attempts to grab ownership. The gap between the bias check and following
+atomics may seem racy, but we don't need it to be strict. Moreover there
+might only be maximum 4 parallel updates: by the first and the second
+poll entries, __io_arm_poll_handler() and cancellation. From those four,
+only poll wake ups may be executed multiple times, but they're protected
+by a spin.
+
+Cc: stable@vger.kernel.org
+Reported-by: Lin Ma <linma@zju.edu.cn>
+Fixes: aa43477b04025 ("io_uring: poll rework")
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Link: https://lore.kernel.org/r/c762bc31f8683b3270f3587691348a7119ef9c9d.1668963050.git.asml.silence@gmail.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Acked-by: David Disseldorp <ddiss@suse.de>
+---
+ fs/io_uring.c | 36 +++++++++++++++++++++++++++++++++++-
+ 1 file changed, 35 insertions(+), 1 deletion(-)
+
+diff --git a/fs/io_uring.c b/fs/io_uring.c
+index 78402694570d8..c6c782e216138 100644
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -6386,7 +6386,29 @@ struct io_poll_table {
+ };
+ 
+ #define IO_POLL_CANCEL_FLAG	BIT(31)
+-#define IO_POLL_REF_MASK	GENMASK(30, 0)
++#define IO_POLL_RETRY_FLAG	BIT(30)
++#define IO_POLL_REF_MASK	GENMASK(29, 0)
++
++/*
++ * We usually have 1-2 refs taken, 128 is more than enough and we want to
++ * maximise the margin between this amount and the moment when it overflows.
++ */
++#define IO_POLL_REF_BIAS       128
++
++static bool io_poll_get_ownership_slowpath(struct io_kiocb *req)
++{
++	int v;
++
++	/*
++	 * poll_refs are already elevated and we don't have much hope for
++	 * grabbing the ownership. Instead of incrementing set a retry flag
++	 * to notify the loop that there might have been some change.
++	 */
++	v = atomic_fetch_or(IO_POLL_RETRY_FLAG, &req->poll_refs);
++	if (v & IO_POLL_REF_MASK)
++		return false;
++	return !(atomic_fetch_inc(&req->poll_refs) & IO_POLL_REF_MASK);
++}
+ 
+ /*
+  * If refs part of ->poll_refs (see IO_POLL_REF_MASK) is 0, it's free. We can
+@@ -6396,6 +6418,8 @@ struct io_poll_table {
+  */
+ static inline bool io_poll_get_ownership(struct io_kiocb *req)
+ {
++	if (unlikely(atomic_read(&req->poll_refs) >= IO_POLL_REF_BIAS))
++		return io_poll_get_ownership_slowpath(req);
+ 	return !(atomic_fetch_inc(&req->poll_refs) & IO_POLL_REF_MASK);
+ }
+ 
+@@ -6516,6 +6540,16 @@ static int io_poll_check_events(struct io_kiocb *req, bool *locked)
+ 		 */
+ 		if ((v & IO_POLL_REF_MASK) != 1)
+ 			req->cqe.res = 0;
++		if (v & IO_POLL_RETRY_FLAG) {
++			req->cqe.res = 0;
++			/*
++			 * We won't find new events that came in between
++			 * vfs_poll and the ref put unless we clear the
++			 * flag in advance.
++			 */
++			atomic_andnot(IO_POLL_RETRY_FLAG, &req->poll_refs);
++			v &= ~IO_POLL_RETRY_FLAG;
++		}
+ 
+ 		if (!req->cqe.res) {
+ 			struct poll_table_struct pt = { ._key = req->apoll_events };
+-- 
+2.35.3
+
diff --git a/patches.suse/io_uring-poll-fix-poll_refs-race-with-cancelation.patch b/patches.suse/io_uring-poll-fix-poll_refs-race-with-cancelation.patch
new file mode 100644
index 0000000..c17f88b
--- /dev/null
+++ b/patches.suse/io_uring-poll-fix-poll_refs-race-with-cancelation.patch
@@ -0,0 +1,157 @@
+From 1a8117ce73709081f1a410b857764122be4939e8 Mon Sep 17 00:00:00 2001
+From: Lin Ma <linma@zju.edu.cn>
+Date: Fri, 2 Dec 2022 14:27:15 +0000
+Subject: [PATCH 5/5] io_uring/poll: fix poll_refs race with cancelation
+Patch-mainline: v6.1-rc7
+Git-commit: 12ad3d2d6c5b0131a6052de91360849e3e154846
+References: bsc#1207511 CVE-2023-0468
+
+[ upstream commit 12ad3d2d6c5b0131a6052de91360849e3e154846 ]
+
+There is an interesting race condition of poll_refs which could result
+in a NULL pointer dereference. The crash trace is like:
+
+KASAN: null-ptr-deref in range [0x0000000000000008-0x000000000000000f]
+CPU: 0 PID: 30781 Comm: syz-executor.2 Not tainted 6.0.0-g493ffd6605b2 #1
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS
+1.13.0-1ubuntu1.1 04/01/2014
+RIP: 0010:io_poll_remove_entry io_uring/poll.c:154 [inline]
+RIP: 0010:io_poll_remove_entries+0x171/0x5b4 io_uring/poll.c:190
+Code: ...
+RSP: 0018:ffff88810dfefba0 EFLAGS: 00010202
+RAX: 0000000000000001 RBX: 0000000000000000 RCX: 0000000000040000
+RDX: ffffc900030c4000 RSI: 000000000003ffff RDI: 0000000000040000
+RBP: 0000000000000008 R08: ffffffff9764d3dd R09: fffffbfff3836781
+R10: fffffbfff3836781 R11: 0000000000000000 R12: 1ffff11003422d60
+R13: ffff88801a116b04 R14: ffff88801a116ac0 R15: dffffc0000000000
+FS:  00007f9c07497700(0000) GS:ffff88811a600000(0000) knlGS:0000000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 00007ffb5c00ea98 CR3: 0000000105680005 CR4: 0000000000770ef0
+DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+PKRU: 55555554
+Call Trace:
+ <TASK>
+ io_apoll_task_func+0x3f/0xa0 io_uring/poll.c:299
+ handle_tw_list io_uring/io_uring.c:1037 [inline]
+ tctx_task_work+0x37e/0x4f0 io_uring/io_uring.c:1090
+ task_work_run+0x13a/0x1b0 kernel/task_work.c:177
+ get_signal+0x2402/0x25a0 kernel/signal.c:2635
+ arch_do_signal_or_restart+0x3b/0x660 arch/x86/kernel/signal.c:869
+ exit_to_user_mode_loop kernel/entry/common.c:166 [inline]
+ exit_to_user_mode_prepare+0xc2/0x160 kernel/entry/common.c:201
+ __syscall_exit_to_user_mode_work kernel/entry/common.c:283 [inline]
+ syscall_exit_to_user_mode+0x58/0x160 kernel/entry/common.c:294
+ entry_SYSCALL_64_after_hwframe+0x63/0xcd
+
+The root cause for this is a tiny overlooking in
+io_poll_check_events() when cocurrently run with poll cancel routine
+io_poll_cancel_req().
+
+The interleaving to trigger use-after-free:
+
+CPU0                                       |  CPU1
+                                           |
+io_apoll_task_func()                       |  io_poll_cancel_req()
+ io_poll_check_events()                    |
+  // do while first loop                   |
+  v = atomic_read(...)                     |
+  // v = poll_refs = 1                     |
+  ...                                      |  io_poll_mark_cancelled()
+                                           |   atomic_or()
+                                           |   // poll_refs =
+IO_POLL_CANCEL_FLAG | 1
+                                           |
+  atomic_sub_return(...)                   |
+  // poll_refs = IO_POLL_CANCEL_FLAG       |
+  // loop continue                         |
+                                           |
+                                           |  io_poll_execute()
+                                           |   io_poll_get_ownership()
+                                           |   // poll_refs =
+IO_POLL_CANCEL_FLAG | 1
+                                           |   // gets the ownership
+  v = atomic_read(...)                     |
+  // poll_refs not change                  |
+                                           |
+  if (v & IO_POLL_CANCEL_FLAG)             |
+   return -ECANCELED;                      |
+  // io_poll_check_events return           |
+  // will go into                          |
+  // io_req_complete_failed() free req     |
+                                           |
+                                           |  io_apoll_task_func()
+                                           |  // also go into
+io_req_complete_failed()
+
+And the interleaving to trigger the kernel WARNING:
+
+CPU0                                       |  CPU1
+                                           |
+io_apoll_task_func()                       |  io_poll_cancel_req()
+ io_poll_check_events()                    |
+  // do while first loop                   |
+  v = atomic_read(...)                     |
+  // v = poll_refs = 1                     |
+  ...                                      |  io_poll_mark_cancelled()
+                                           |   atomic_or()
+                                           |   // poll_refs =
+IO_POLL_CANCEL_FLAG | 1
+                                           |
+  atomic_sub_return(...)                   |
+  // poll_refs = IO_POLL_CANCEL_FLAG       |
+  // loop continue                         |
+                                           |
+  v = atomic_read(...)                     |
+  // v = IO_POLL_CANCEL_FLAG               |
+                                           |  io_poll_execute()
+                                           |   io_poll_get_ownership()
+                                           |   // poll_refs =
+IO_POLL_CANCEL_FLAG | 1
+                                           |   // gets the ownership
+                                           |
+  WARN_ON_ONCE(!(v & IO_POLL_REF_MASK)))   |
+  // v & IO_POLL_REF_MASK = 0 WARN         |
+                                           |
+                                           |  io_apoll_task_func()
+                                           |  // also go into
+io_req_complete_failed()
+
+By looking up the source code and communicating with Pavel, the
+implementation of this atomic poll refs should continue the loop of
+io_poll_check_events() just to avoid somewhere else to grab the
+ownership. Therefore, this patch simply adds another AND operation to
+make sure the loop will stop if it finds the poll_refs is exactly equal
+to IO_POLL_CANCEL_FLAG. Since io_poll_cancel_req() grabs ownership and
+will finally make its way to io_req_complete_failed(), the req will
+be reclaimed as expected.
+
+Fixes: aa43477b0402 ("io_uring: poll rework")
+Signed-off-by: Lin Ma <linma@zju.edu.cn>
+Reviewed-by: Pavel Begunkov <asml.silence@gmail.com>
+[axboe: tweak description and code style]
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Acked-by: David Disseldorp <ddiss@suse.de>
+---
+ fs/io_uring.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/fs/io_uring.c b/fs/io_uring.c
+index c6c782e216138..39ecae9e7de1b 100644
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -6594,7 +6594,8 @@ static int io_poll_check_events(struct io_kiocb *req, bool *locked)
+ 		 * Release all references, retry if someone tried to restart
+ 		 * task_work while we were executing it.
+ 		 */
+-	} while (atomic_sub_return(v & IO_POLL_REF_MASK, &req->poll_refs));
++	} while (atomic_sub_return(v & IO_POLL_REF_MASK, &req->poll_refs) &
++					IO_POLL_REF_MASK);
+ 
+ 	return 1;
+ }
+-- 
+2.35.3
+
diff --git a/patches.suse/io_uring-update-res-mask-in-io_poll_check_events.patch b/patches.suse/io_uring-update-res-mask-in-io_poll_check_events.patch
new file mode 100644
index 0000000..ef1e984
--- /dev/null
+++ b/patches.suse/io_uring-update-res-mask-in-io_poll_check_events.patch
@@ -0,0 +1,43 @@
+From fd96d642879706de355acc6b850dd827fbec1985 Mon Sep 17 00:00:00 2001
+From: David Disseldorp <ddiss@suse.de>
+Date: Tue, 7 Feb 2023 23:46:26 +0100
+Subject: [PATCH 1/5] io_uring: update res mask in io_poll_check_events
+Patch-mainline: v6.1-rc6
+Git-commit: b98186aee22fa593bc8c6b2c5d839c2ee518bc8c
+References: bsc#1207511 CVE-2023-0468
+
+When io_poll_check_events() collides with someone attempting to queue a
+task work, it'll spin for one more time. However, it'll continue to use
+the mask from the first iteration instead of updating it. For example,
+if the first wake up was a EPOLLIN and the second EPOLLOUT, the
+userspace will not get EPOLLOUT in time.
+
+Clear the mask for all subsequent iterations to force vfs_poll().
+
+Cc: stable@vger.kernel.org
+Fixes: aa43477b04025 ("io_uring: poll rework")
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Link: https://lore.kernel.org/r/2dac97e8f691231049cb259c4ae57e79e40b537c.1668710222.git.asml.silence@gmail.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Acked-by: David Disseldorp <ddiss@suse.de>
+---
+ fs/io_uring.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/fs/io_uring.c b/fs/io_uring.c
+index a94d34d4abb88..f7094c52af66b 100644
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -6546,6 +6546,9 @@ static int io_poll_check_events(struct io_kiocb *req, bool *locked)
+ 		if (ret)
+ 			return ret;
+ 
++		/* force the next iteration to vfs_poll() */
++		req->cqe.res = 0;
++
+ 		/*
+ 		 * Release all references, retry if someone tried to restart
+ 		 * task_work while we were executing it.
+-- 
+2.35.3
+
diff --git a/series.conf b/series.conf
index 810767d..c9e7590 100644
--- a/series.conf
+++ b/series.conf
@@ -35739,6 +35739,8 @@
 	patches.suse/0277-block-make-blk_set_default_limits-private.patch
 	patches.suse/0278-dm-integrity-set-dma_alignment-limit-in-io_hints.patch
 	patches.suse/0279-dm-log-writes-set-dma_alignment-limit-in-io_hints.patch
+	patches.suse/io_uring-update-res-mask-in-io_poll_check_events.patch
+	patches.suse/io_uring-fix-tw-losing-poll-events.patch
 	patches.suse/Input-synaptics-switch-touchpad-on-HP-Laptop-15-da30.patch
 	patches.suse/Input-goodix-try-resetting-the-controller-when-no-co.patch
 	patches.suse/Input-iforce-invert-valid-length-check-when-fetching.patch
@@ -35868,6 +35870,9 @@
 	patches.suse/regulator-slg51000-Wait-after-asserting-CS-pin.patch
 	patches.suse/regulator-twl6030-re-add-TWL6032_SUBCLASS.patch
 	patches.suse/regulator-twl6030-fix-get-status-of-twl6032-regulato.patch
+	patches.suse/io_uring-cmpxchg-for-poll-arm-refs-release.patch
+	patches.suse/io_uring-make-poll-refs-more-robust.patch
+	patches.suse/io_uring-poll-fix-poll_refs-race-with-cancelation.patch
 	patches.suse/s390-dasd-fix-possible-buffer-overflow-in-copy_pair_show
 	patches.suse/init-Kconfig-fix-CC_HAS_ASM_GOTO_TIED_OUTPUT-test-wi.patch
 	patches.suse/cifs-Use-after-free-in-debug-code.patch