diff --git a/README b/README index 1c91eca..4150bc8 100644 --- a/README +++ b/README @@ -79,10 +79,9 @@ what the patch does, who wrote it, and who inside SUSE/Novell we'll log makes the most sense in most cases. * Unless the author identified in the From: tag has a @suse.de, - @suse.com, @suse.cz, or @novell.com address, the patch must include - a Signed-off-by:, Acked-by: or Reviewed-by: header which identifies the - person in one of these domains who feels responsible for the patch - inside the company. + @suse.com or @suse.cz, address, the patch must include a Signed-off-by:, + Acked-by: or Reviewed-by: header which identifies the person in one of + these domains who feels responsible for the patch inside the company. * The patch must include a Patch-mainline: tag that identifies where the patch came from (for backports from mainline), or when it is @@ -105,10 +104,11 @@ what the patch does, who wrote it, and who inside SUSE/Novell we'll Patch-mainline: Never, - * The patch should include a References: tag that identifies the - Bugzilla bug number, FATE entry, etc. where the patch is discussed. - Please prefix bugzilla.novell.com bug numbers with bnc# and fate - feature numbers with fate#. Have a look at + * The patch should include a References: tag that identifies the Bugzilla bug + number, JIRA epic, etc. where the patch is discussed. Please prefix + bugzilla.suse.com bug numbers with bsc# and JIRA epic numbers with jsc#. + Please make sure you reference a JIRA epic when referencin JIRA features. + Have a look at http://en.opensuse.org/openSUSE:Packaging_Patches_guidelines#Current_set_of_abbreviations for a full list of abbreviations. @@ -177,16 +177,27 @@ doing this is using scripts/sequence-patch.sh: $ export SCRATCH_AREA=/var/tmp/scratch $ scripts/sequence-patch.sh - + Creating tree in /var/tmp/scratch/linux-5.3-SLE15-SP3 + Cleaning up from previous run + Linking from /var/tmp/scratch/linux-5.3.orig + Applying 50892 patches using 4 threads... + Saving modified files... + [ Tree: /var/tmp/scratch/linux-5.3-SLE15-SP3 ] + [ Generating Module.supported ] + [ Copying config/x86_64/default ] + +Note the Tree: line output by the sequence-patch.sh script which +specifies the location of the expanded kernel tree that is configured +for local build. Please test-compile the kernel or even test-build kernel packages, depending on the impact of your changes. Use scripts/tar-up.sh for -creating an Autobuild source directory. +creating an OBS package directory. The kernel source tree that scripts/sequence-patch.sh creates can be test compiled as follows: - $ cp config/i386/default $SCRATCH_AREA/linux-2.6.18 - $ cd $SCRATCH_AREA/linux-2.6.18 + $ cp config/i386/default /var/tmp/scratch/linux-5.3-SLE15-SP3 + $ cd /var/tmp/scratch/linux-5.3-SLE15-SP3 $ make oldconfig $ make @@ -219,7 +230,7 @@ When adding patches that add kernel config options, please also update all config files as follows: $ scripts/sequence-patch.sh - $ cd /var/tmp/scratch/linux-2.6.16 + $ cd /var/tmp/scratch/linux-5.3-SLE15-SP3 $ patches/scripts/run_oldconfig.sh diff --git a/blacklist.conf b/blacklist.conf index 9940d7f..17fccff 100644 --- a/blacklist.conf +++ b/blacklist.conf @@ -1948,3 +1948,11 @@ bc9a2e226ea95e1699f7590845554de095308b75 # not needed in kernels older than SLE1 2ca11b0e043be6f5c2b188897e9a32275eaab046 # code comment 7eb000bdbe7c7da811ef51942b356f6e819b13ba # already applied f87777a3c30cf50c66a20e1d153f0e003bb30774 # net: stmmac: not needed +0f12156dff2862ac54235fc72703f18770769042 # memcg: reverted in merge window +3754707bcc3e190e5dadc978d172b61e809cb3bd # memcg: revert the above +b574ce3ee45937f4a01edc98c59213bfc7effe50 # prerequisites are too intrusive +45cb6653b0c355fc1445a8069ba78a4ce8720511 # kABI +f7f2b43eaf6b4cfe54c75100709be31d5c4b52c8 # cosmetic fix +2a1c55d4762dd34a8b0f2e36fb01b7b16b60735b # cosmetic fix +0f12156dff2862ac54235fc72703f18770769042 # memcg: reverted in merge window +3754707bcc3e190e5dadc978d172b61e809cb3bd # memcg: revert the above diff --git a/patches.kabi/kabi-fix-bpf_insn_aux_data-revert-sanitize_stack_spill.patch b/patches.kabi/kabi-fix-bpf_insn_aux_data-revert-sanitize_stack_spill.patch new file mode 100644 index 0000000..55b68df --- /dev/null +++ b/patches.kabi/kabi-fix-bpf_insn_aux_data-revert-sanitize_stack_spill.patch @@ -0,0 +1,46 @@ +From: Shung-Hsi Yu +Date: Tue, 7 Sep 2021 13:26:15 +0800 +Subject: [PATCH] kABI: revert change in struct bpf_insn_aux_data + +References: bsc#1188983, bsc#1188985, CVE-2021-34556, CVE-2021-35477 +Patch-mainline: never, kABI + +Revert sanitize_stack_spill to sanitize_stack_off since they're use in +pretty much identical way. +--- + include/linux/bpf_verifier.h | 2 +- + kernel/bpf/verifier.c | 4 ++-- + 2 files changed, 3 insertions(+), 3 deletions(-) + +--- a/include/linux/bpf_verifier.h ++++ b/include/linux/bpf_verifier.h +@@ -175,8 +175,8 @@ struct bpf_insn_aux_data { + u32 alu_limit; /* limit for add/sub register with pointer */ + }; + int ctx_field_size; /* the ctx field size for load insn, maybe 0 */ ++ int sanitize_stack_off; /* subject to Spectre v4 sanitation */ + bool seen; /* this insn was processed by the verifier */ +- bool sanitize_stack_spill; /* subject to Spectre v4 sanitation */ + u8 alu_state; /* used in combination with alu_limit */ + }; + +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -1050,7 +1050,7 @@ static int check_stack_write(struct bpf_ + } + + if (sanitize) +- env->insn_aux_data[insn_idx].sanitize_stack_spill = true; ++ env->insn_aux_data[insn_idx].sanitize_stack_off = 1; + } + + if (value_regno >= 0 && +@@ -5981,7 +5981,7 @@ static int convert_ctx_accesses(struct b + } + + if (type == BPF_WRITE && +- env->insn_aux_data[i + delta].sanitize_stack_spill) { ++ env->insn_aux_data[i + delta].sanitize_stack_off) { + struct bpf_insn patch[] = { + *insn, + BPF_ST_NOSPEC(), diff --git a/patches.suse/Bluetooth-fix-repeated-calls-to-sco_sock_kill.patch b/patches.suse/Bluetooth-fix-repeated-calls-to-sco_sock_kill.patch index b59fc67..e0e3bea 100644 --- a/patches.suse/Bluetooth-fix-repeated-calls-to-sco_sock_kill.patch +++ b/patches.suse/Bluetooth-fix-repeated-calls-to-sco_sock_kill.patch @@ -54,14 +54,14 @@ Acked-by: Takashi Iwai sock_put(sk); } -@@ -176,7 +174,6 @@ static void sco_conn_del(struct hci_conn +@@ -196,7 +196,6 @@ static void sco_conn_del(struct hci_conn sco_sock_clear_timer(sk); sco_chan_del(sk, err); release_sock(sk); - sco_sock_kill(sk); sock_put(sk); - } + /* Ensure no more work items will run before freeing conn. */ @@ -378,8 +375,7 @@ static void sco_sock_cleanup_listen(stru */ static void sco_sock_kill(struct sock *sk) diff --git a/patches.suse/Bluetooth-schedule-SCO-timeouts-with-delayed_work.patch b/patches.suse/Bluetooth-schedule-SCO-timeouts-with-delayed_work.patch new file mode 100644 index 0000000..aff8be2 --- /dev/null +++ b/patches.suse/Bluetooth-schedule-SCO-timeouts-with-delayed_work.patch @@ -0,0 +1,144 @@ +From ba316be1b6a00db7126ed9a39f9bee434a508043 Mon Sep 17 00:00:00 2001 +From: Desmond Cheong Zhi Xi +Date: Tue, 10 Aug 2021 12:14:05 +0800 +Subject: [PATCH] Bluetooth: schedule SCO timeouts with delayed_work +Git-commit: ba316be1b6a00db7126ed9a39f9bee434a508043 +Patch-mainline: v5.15-rc1 +References: CVE-2021-3640 bsc#1188172 + +struct sock.sk_timer should be used as a sock cleanup timer. However, +SCO uses it to implement sock timeouts. + +This causes issues because struct sock.sk_timer's callback is run in +an IRQ context, and the timer callback function sco_sock_timeout takes +a spin lock on the socket. However, other functions such as +sco_conn_del and sco_conn_ready take the spin lock with interrupts +enabled. + +This inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} lock usage could +lead to deadlocks as reported by Syzbot [1]: + CPU0 + ---- + lock(slock-AF_BLUETOOTH-BTPROTO_SCO); + + lock(slock-AF_BLUETOOTH-BTPROTO_SCO); + +To fix this, we use delayed work to implement SCO sock timouts +instead. This allows us to avoid taking the spin lock on the socket in +an IRQ context, and corrects the misuse of struct sock.sk_timer. + +As a note, cancel_delayed_work is used instead of +cancel_delayed_work_sync in sco_sock_set_timer and +sco_sock_clear_timer to avoid a deadlock. In the future, the call to +bh_lock_sock inside sco_sock_timeout should be changed to lock_sock to +synchronize with other functions using lock_sock. However, since +sco_sock_set_timer and sco_sock_clear_timer are sometimes called under +the locked socket (in sco_connect and __sco_sock_close), +cancel_delayed_work_sync might cause them to sleep until an +sco_sock_timeout that has started finishes running. But +sco_sock_timeout would also sleep until it can grab the lock_sock. + +Using cancel_delayed_work is fine because sco_sock_timeout does not +change from run to run, hence there is no functional difference +Between: +1. waiting for a timeout to finish running before scheduling another +timeout +2. scheduling another timeout while a timeout is running. + +Link: https://syzkaller.appspot.com/bug?id=9089d89de0502e120f234ca0fc8a703f7368b31e [1] +Reported-by: syzbot+2f6d7c28bb4bf7e82060@syzkaller.appspotmail.com +Tested-by: syzbot+2f6d7c28bb4bf7e82060@syzkaller.appspotmail.com +Signed-off-by: Desmond Cheong Zhi Xi +Signed-off-by: Luiz Augusto von Dentz +Acked-by: Takashi Iwai + +--- + net/bluetooth/sco.c | 35 +++++++++++++++++++++++++++++------ + 1 file changed, 29 insertions(+), 6 deletions(-) + +--- a/net/bluetooth/sco.c ++++ b/net/bluetooth/sco.c +@@ -48,6 +48,8 @@ struct sco_conn { + spinlock_t lock; + struct sock *sk; + ++ struct delayed_work timeout_work; ++ + unsigned int mtu; + }; + +@@ -73,9 +75,20 @@ struct sco_pinfo { + #define SCO_CONN_TIMEOUT (HZ * 40) + #define SCO_DISCONN_TIMEOUT (HZ * 2) + +-static void sco_sock_timeout(unsigned long arg) ++static void sco_sock_timeout(struct work_struct *work) + { +- struct sock *sk = (struct sock *)arg; ++ struct sco_conn *conn = container_of(work, struct sco_conn, ++ timeout_work.work); ++ struct sock *sk; ++ ++ sco_conn_lock(conn); ++ sk = conn->sk; ++ if (sk) ++ sock_hold(sk); ++ sco_conn_unlock(conn); ++ ++ if (!sk) ++ return; + + BT_DBG("sock %p state %d", sk, sk->sk_state); + +@@ -90,14 +103,21 @@ static void sco_sock_timeout(unsigned lo + + static void sco_sock_set_timer(struct sock *sk, long timeout) + { ++ if (!sco_pi(sk)->conn) ++ return; ++ + BT_DBG("sock %p state %d timeout %ld", sk, sk->sk_state, timeout); +- sk_reset_timer(sk, &sk->sk_timer, jiffies + timeout); ++ cancel_delayed_work(&sco_pi(sk)->conn->timeout_work); ++ schedule_delayed_work(&sco_pi(sk)->conn->timeout_work, timeout); + } + + static void sco_sock_clear_timer(struct sock *sk) + { ++ if (!sco_pi(sk)->conn) ++ return; ++ + BT_DBG("sock %p state %d", sk, sk->sk_state); +- sk_stop_timer(sk, &sk->sk_timer); ++ cancel_delayed_work(&sco_pi(sk)->conn->timeout_work); + } + + /* ---- SCO connections ---- */ +@@ -178,6 +198,9 @@ static void sco_conn_del(struct hci_conn + bh_unlock_sock(sk); + sco_sock_kill(sk); + sock_put(sk); ++ ++ /* Ensure no more work items will run before freeing conn. */ ++ cancel_delayed_work_sync(&conn->timeout_work); + } + + hcon->sco_data = NULL; +@@ -192,6 +215,8 @@ static void __sco_chan_add(struct sco_co + sco_pi(sk)->conn = conn; + conn->sk = sk; + ++ INIT_DELAYED_WORK(&conn->timeout_work, sco_sock_timeout); ++ + if (parent) + bt_accept_enqueue(parent, sk, true); + } +@@ -488,8 +513,6 @@ static struct sock *sco_sock_alloc(struc + + sco_pi(sk)->setting = BT_VOICE_CVSD_16BIT; + +- setup_timer(&sk->sk_timer, sco_sock_timeout, (unsigned long)sk); +- + bt_sock_link(&sco_sk_list, sk); + return sk; + } diff --git a/patches.suse/Bluetooth-switch-to-lock_sock-in-SCO.patch b/patches.suse/Bluetooth-switch-to-lock_sock-in-SCO.patch index 790de31..b5ca4f3 100644 --- a/patches.suse/Bluetooth-switch-to-lock_sock-in-SCO.patch +++ b/patches.suse/Bluetooth-switch-to-lock_sock-in-SCO.patch @@ -24,7 +24,7 @@ Acked-by: Takashi Iwai --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c -@@ -79,10 +79,10 @@ static void sco_sock_timeout(struct time +@@ -92,10 +92,10 @@ static void sco_sock_timeout(struct work BT_DBG("sock %p state %d", sk, sk->sk_state); @@ -37,7 +37,7 @@ Acked-by: Takashi Iwai sco_sock_kill(sk); sock_put(sk); -@@ -172,10 +172,10 @@ static void sco_conn_del(struct hci_conn +@@ -192,10 +192,10 @@ static void sco_conn_del(struct hci_conn if (sk) { sock_hold(sk); @@ -49,8 +49,8 @@ Acked-by: Takashi Iwai + release_sock(sk); sco_sock_kill(sk); sock_put(sk); - } -@@ -1021,10 +1021,10 @@ static void sco_conn_ready(struct sco_co + +@@ -1047,10 +1047,10 @@ static void sco_conn_ready(struct sco_co if (sk) { sco_sock_clear_timer(sk); @@ -63,7 +63,7 @@ Acked-by: Takashi Iwai } else { sco_conn_lock(conn); -@@ -1039,12 +1039,12 @@ static void sco_conn_ready(struct sco_co +@@ -1065,12 +1065,12 @@ static void sco_conn_ready(struct sco_co return; } @@ -78,7 +78,7 @@ Acked-by: Takashi Iwai sco_conn_unlock(conn); return; } -@@ -1065,7 +1065,7 @@ static void sco_conn_ready(struct sco_co +@@ -1091,7 +1091,7 @@ static void sco_conn_ready(struct sco_co /* Wake up parent */ parent->sk_data_ready(parent); diff --git a/patches.suse/PCI-endpoint-Fix-missing-destroy_workqueue.patch b/patches.suse/PCI-endpoint-Fix-missing-destroy_workqueue.patch new file mode 100644 index 0000000..ef71135 --- /dev/null +++ b/patches.suse/PCI-endpoint-Fix-missing-destroy_workqueue.patch @@ -0,0 +1,41 @@ +From acaef7981a218813e3617edb9c01837808de063c Mon Sep 17 00:00:00 2001 +From: Yang Yingliang +Date: Wed, 31 Mar 2021 16:40:12 +0800 +Subject: [PATCH] PCI: endpoint: Fix missing destroy_workqueue() +Git-commit: acaef7981a218813e3617edb9c01837808de063c +References: git-fixes +Patch-mainline: v5.13-rc1 + +Add the missing destroy_workqueue() before return from +pci_epf_test_init() in the error handling case and add +destroy_workqueue() in pci_epf_test_exit(). + +Link: https://lore.kernel.org/r/20210331084012.2091010-1-yangyingliang@huawei.com +Fixes: 349e7a85b25fa ("PCI: endpoint: functions: Add an EP function to test PCI") +Reported-by: Hulk Robot +Signed-off-by: Yang Yingliang +Signed-off-by: Lorenzo Pieralisi +Signed-off-by: Oliver Neukum +--- + drivers/pci/endpoint/functions/pci-epf-test.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/drivers/pci/endpoint/functions/pci-epf-test.c ++++ b/drivers/pci/endpoint/functions/pci-epf-test.c +@@ -491,6 +491,7 @@ static int __init pci_epf_test_init(void + WQ_MEM_RECLAIM | WQ_HIGHPRI, 0); + ret = pci_epf_register_driver(&test_driver); + if (ret) { ++ destroy_workqueue(kpcitest_workqueue); + pr_err("failed to register pci epf test driver --> %d\n", ret); + return ret; + } +@@ -501,6 +502,8 @@ module_init(pci_epf_test_init); + + static void __exit pci_epf_test_exit(void) + { ++ if (kpcitest_workqueue) ++ destroy_workqueue(kpcitest_workqueue); + pci_epf_unregister_driver(&test_driver); + } + module_exit(pci_epf_test_exit); diff --git a/patches.suse/RDMA-efa-Free-IRQ-vectors-on-error-flow.patch b/patches.suse/RDMA-efa-Free-IRQ-vectors-on-error-flow.patch new file mode 100644 index 0000000..7fba26c --- /dev/null +++ b/patches.suse/RDMA-efa-Free-IRQ-vectors-on-error-flow.patch @@ -0,0 +1,37 @@ +From dbe986bdfd6dfe6ef24b833767fff4151e024357 Mon Sep 17 00:00:00 2001 +From: Gal Pressman +Date: Wed, 11 Aug 2021 18:11:28 +0300 +Subject: [PATCH 1/1] RDMA/efa: Free IRQ vectors on error flow +Git-commit: dbe986bdfd6dfe6ef24b833767fff4151e024357 +Patch-mainline: v5.14 +References: git-fixes + +Make sure to free the IRQ vectors in case the allocation doesn't return +the expected number of IRQs. + +Fixes: b7f5e880f377 ("RDMA/efa: Add the efa module") +Link: https://lore.kernel.org/r/20210811151131.39138-2-galpress@amazon.com +Reviewed-by: Firas JahJah +Reviewed-by: Yossi Leybovich +Signed-off-by: Gal Pressman +Signed-off-by: Jason Gunthorpe +Acked-by: Nicolas Morey-Chaisemartin +--- + drivers/infiniband/hw/efa/efa_main.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/drivers/infiniband/hw/efa/efa_main.c b/drivers/infiniband/hw/efa/efa_main.c +index 203e6ddcacbc..be4a07bd268a 100644 +--- a/drivers/infiniband/hw/efa/efa_main.c ++++ b/drivers/infiniband/hw/efa/efa_main.c +@@ -357,6 +357,7 @@ static int efa_enable_msix(struct efa_dev *dev) + } + + if (irq_num != msix_vecs) { ++ efa_disable_msix(dev); + dev_err(&dev->pdev->dev, + "Allocated %d MSI-X (out of %d requested)\n", + irq_num, msix_vecs); +-- +2.31.1.5.g533053588dc3 + diff --git a/patches.suse/RDMA-efa-Remove-double-QP-type-assignment.patch b/patches.suse/RDMA-efa-Remove-double-QP-type-assignment.patch new file mode 100644 index 0000000..5d789c8 --- /dev/null +++ b/patches.suse/RDMA-efa-Remove-double-QP-type-assignment.patch @@ -0,0 +1,35 @@ +From f9193d266347fe9bed5c173e7a1bf96268142a79 Mon Sep 17 00:00:00 2001 +From: Leon Romanovsky +Date: Fri, 23 Jul 2021 14:39:45 +0300 +Subject: [PATCH 1/1] RDMA/efa: Remove double QP type assignment +Git-commit: f9193d266347fe9bed5c173e7a1bf96268142a79 +Patch-mainline: v5.15-rc1 +References: git-fixes + +The QP type is set by the IB/core and shouldn't be set in the driver. + +Fixes: 40909f664d27 ("RDMA/efa: Add EFA verbs implementation") +Link: https://lore.kernel.org/r/838c40134c1590167b888ca06ad51071139ff2ae.1627040189.git.leonro@nvidia.com +Acked-by: Gal Pressman +Signed-off-by: Leon Romanovsky +Signed-off-by: Jason Gunthorpe +Acked-by: Nicolas Morey-Chaisemartin +--- + drivers/infiniband/hw/efa/efa_verbs.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c +index b4cfb656ddd5..b1c4780e86be 100644 +--- a/drivers/infiniband/hw/efa/efa_verbs.c ++++ b/drivers/infiniband/hw/efa/efa_verbs.c +@@ -926,7 +926,6 @@ struct ib_qp *efa_create_qp(struct ib_pd *ibpd, + rq_entry_inserted = true; + qp->qp_handle = create_qp_resp.qp_handle; + qp->ibqp.qp_num = create_qp_resp.qp_num; +- qp->ibqp.qp_type = init_attr->qp_type; + qp->max_send_wr = init_attr->cap.max_send_wr; + qp->max_recv_wr = init_attr->cap.max_recv_wr; + qp->max_send_sge = init_attr->cap.max_send_sge; +-- +2.31.1.5.g533053588dc3 + diff --git a/patches.suse/SUNRPC-Simplify-socket-shutdown-when-not-reusing-TCP.patch b/patches.suse/SUNRPC-Simplify-socket-shutdown-when-not-reusing-TCP.patch new file mode 100644 index 0000000..c0d0f68 --- /dev/null +++ b/patches.suse/SUNRPC-Simplify-socket-shutdown-when-not-reusing-TCP.patch @@ -0,0 +1,33 @@ +From: Trond Myklebust +Date: Tue, 24 Aug 2021 11:38:17 -0400 +Subject: [PATCH] SUNRPC: Simplify socket shutdown when not reusing TCP ports +Git-commit: 0a6ff58edbfb26469a095ab964095506352fc960 +Patch-mainline: v5.15-rc1 +References: git-fixes + +If we're not required to reuse the TCP port, then we can just +immediately close the socket, and leave the cleanup details to the TCP +layer. + +Fixes: e6237b6feb37 ("NFSv4.1: Don't rebind to the same source port when reconnecting to the server") +Signed-off-by: Trond Myklebust +Signed-off-by: Anna Schumaker +Acked-by: NeilBrown + +--- + net/sunrpc/xprtsock.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/net/sunrpc/xprtsock.c ++++ b/net/sunrpc/xprtsock.c +@@ -2260,6 +2260,10 @@ static void xs_tcp_shutdown(struct rpc_x + + if (sock == NULL) + return; ++ if (!xprt->reuseport) { ++ xs_close(xprt); ++ return; ++ } + if (xprt_connected(xprt)) { + kernel_sock_shutdown(sock, SHUT_RDWR); + trace_rpc_socket_shutdown(xprt, sock); diff --git a/patches.suse/SUNRPC-improve-error-response-to-over-size-gss-crede.patch b/patches.suse/SUNRPC-improve-error-response-to-over-size-gss-crede.patch new file mode 100644 index 0000000..146513c --- /dev/null +++ b/patches.suse/SUNRPC-improve-error-response-to-over-size-gss-crede.patch @@ -0,0 +1,50 @@ +From: NeilBrown +Date: Thu, 2 Sep 2021 09:30:37 +1000 +Subject: [PATCH] SUNRPC: improve error response to over-size gss credential +Git-commit: 0c217d5066c84f67cd672cf03ec8f682e5d013c2 +Patch-mainline: v5.15-rc1 +References: bsc#1190022 + +When the NFS server receives a large gss (kerberos) credential and tries +to pass it up to rpc.svcgssd (which is deprecated), it triggers an +infinite loop in cache_read(). + +cache_request() always returns -EAGAIN, and this causes a "goto again". + +This patch: + - changes the error to -E2BIG to avoid the infinite loop, and + - generates a WARN_ONCE when rsi_request first sees an over-sized + credential. The warning suggests switching to gssproxy. + +Link: https://bugzilla.kernel.org/show_bug.cgi?id=196583 +Signed-off-by: NeilBrown +Signed-off-by: Chuck Lever +Acked-by: NeilBrown + +--- + net/sunrpc/auth_gss/svcauth_gss.c | 2 ++ + net/sunrpc/cache.c | 2 +- + 2 files changed, 3 insertions(+), 1 deletion(-) + +--- a/net/sunrpc/auth_gss/svcauth_gss.c ++++ b/net/sunrpc/auth_gss/svcauth_gss.c +@@ -180,6 +180,8 @@ static void rsi_request(struct cache_det + qword_addhex(bpp, blen, rsii->in_handle.data, rsii->in_handle.len); + qword_addhex(bpp, blen, rsii->in_token.data, rsii->in_token.len); + (*bpp)[-1] = '\n'; ++ WARN_ONCE(*blen < 0, ++ "RPCSEC/GSS credential too large - please use gssproxy\n"); + } + + static int rsi_parse(struct cache_detail *cd, +--- a/net/sunrpc/cache.c ++++ b/net/sunrpc/cache.c +@@ -770,7 +770,7 @@ static int cache_request(struct cache_de + + detail->cache_request(detail, crq->item, &bp, &len); + if (len < 0) +- return -EAGAIN; ++ return -E2BIG; + return PAGE_SIZE - len; + } + diff --git a/patches.suse/bpf-Fix-leakage-due-to-insufficient-speculative-stor.patch b/patches.suse/bpf-Fix-leakage-due-to-insufficient-speculative-stor.patch new file mode 100644 index 0000000..a8c1125 --- /dev/null +++ b/patches.suse/bpf-Fix-leakage-due-to-insufficient-speculative-stor.patch @@ -0,0 +1,427 @@ +From: Daniel Borkmann +Date: Tue, 13 Jul 2021 08:18:31 +0000 +Subject: bpf: Fix leakage due to insufficient speculative store bypass + mitigation +Patch-mainline: v5.14-rc4 +Git-commit: 2039f26f3aca5b0e419b98f65dd36481337b86ee +References: bsc#1188983, bsc#1188985, CVE-2021-34556, CVE-2021-35477 + +Spectre v4 gadgets make use of memory disambiguation, which is a set of +techniques that execute memory access instructions, that is, loads and +stores, out of program order; Intel's optimization manual, section 2.4.4.5: + + A load instruction micro-op may depend on a preceding store. Many + microarchitectures block loads until all preceding store addresses are + known. The memory disambiguator predicts which loads will not depend on + any previous stores. When the disambiguator predicts that a load does + not have such a dependency, the load takes its data from the L1 data + cache. Eventually, the prediction is verified. If an actual conflict is + detected, the load and all succeeding instructions are re-executed. + +af86ca4e3088 ("bpf: Prevent memory disambiguation attack") tried to mitigate +this attack by sanitizing the memory locations through preemptive "fast" +(low latency) stores of zero prior to the actual "slow" (high latency) store +of a pointer value such that upon dependency misprediction the CPU then +speculatively executes the load of the pointer value and retrieves the zero +value instead of the attacker controlled scalar value previously stored at +that location, meaning, subsequent access in the speculative domain is then +redirected to the "zero page". + +The sanitized preemptive store of zero prior to the actual "slow" store is +done through a simple ST instruction based on r10 (frame pointer) with +relative offset to the stack location that the verifier has been tracking +on the original used register for STX, which does not have to be r10. Thus, +there are no memory dependencies for this store, since it's only using r10 +and immediate constant of zero; hence af86ca4e3088 /assumed/ a low latency +operation. + +However, a recent attack demonstrated that this mitigation is not sufficient +since the preemptive store of zero could also be turned into a "slow" store +and is thus bypassed as well: + + [...] + // r2 = oob address (e.g. scalar) + // r7 = pointer to map value + 31: (7b) *(u64 *)(r10 -16) = r2 + // r9 will remain "fast" register, r10 will become "slow" register below + 32: (bf) r9 = r10 + // JIT maps BPF reg to x86 reg: + // r9 -> r15 (callee saved) + // r10 -> rbp + // train store forward prediction to break dependency link between both r9 + // and r10 by evicting them from the predictor's LRU table. + 33: (61) r0 = *(u32 *)(r7 +24576) + 34: (63) *(u32 *)(r7 +29696) = r0 + 35: (61) r0 = *(u32 *)(r7 +24580) + 36: (63) *(u32 *)(r7 +29700) = r0 + 37: (61) r0 = *(u32 *)(r7 +24584) + 38: (63) *(u32 *)(r7 +29704) = r0 + 39: (61) r0 = *(u32 *)(r7 +24588) + 40: (63) *(u32 *)(r7 +29708) = r0 + [...] + 543: (61) r0 = *(u32 *)(r7 +25596) + 544: (63) *(u32 *)(r7 +30716) = r0 + // prepare call to bpf_ringbuf_output() helper. the latter will cause rbp + // to spill to stack memory while r13/r14/r15 (all callee saved regs) remain + // in hardware registers. rbp becomes slow due to push/pop latency. below is + // disasm of bpf_ringbuf_output() helper for better visual context: + // + // ffffffff8117ee20: 41 54 push r12 + // ffffffff8117ee22: 55 push rbp + // ffffffff8117ee23: 53 push rbx + // ffffffff8117ee24: 48 f7 c1 fc ff ff ff test rcx,0xfffffffffffffffc + // ffffffff8117ee2b: 0f 85 af 00 00 00 jne ffffffff8117eee0 <-- jump taken + // [...] + // ffffffff8117eee0: 49 c7 c4 ea ff ff ff mov r12,0xffffffffffffffea + // ffffffff8117eee7: 5b pop rbx + // ffffffff8117eee8: 5d pop rbp + // ffffffff8117eee9: 4c 89 e0 mov rax,r12 + // ffffffff8117eeec: 41 5c pop r12 + // ffffffff8117eeee: c3 ret + 545: (18) r1 = map[id:4] + 547: (bf) r2 = r7 + 548: (b7) r3 = 0 + 549: (b7) r4 = 4 + 550: (85) call bpf_ringbuf_output#194288 + // instruction 551 inserted by verifier \ + 551: (7a) *(u64 *)(r10 -16) = 0 | /both/ are now slow stores here + // storing map value pointer r7 at fp-16 | since value of r10 is "slow". + 552: (7b) *(u64 *)(r10 -16) = r7 / + // following "fast" read to the same memory location, but due to dependency + // misprediction it will speculatively execute before insn 551/552 completes. + 553: (79) r2 = *(u64 *)(r9 -16) + // in speculative domain contains attacker controlled r2. in non-speculative + // domain this contains r7, and thus accesses r7 +0 below. + 554: (71) r3 = *(u8 *)(r2 +0) + // leak r3 + +As can be seen, the current speculative store bypass mitigation which the +verifier inserts at line 551 is insufficient since /both/, the write of +the zero sanitation as well as the map value pointer are a high latency +instruction due to prior memory access via push/pop of r10 (rbp) in contrast +to the low latency read in line 553 as r9 (r15) which stays in hardware +registers. Thus, architecturally, fp-16 is r7, however, microarchitecturally, +fp-16 can still be r2. + +Initial thoughts to address this issue was to track spilled pointer loads +from stack and enforce their load via LDX through r10 as well so that /both/ +the preemptive store of zero /as well as/ the load use the /same/ register +such that a dependency is created between the store and load. However, this +option is not sufficient either since it can be bypassed as well under +speculation. An updated attack with pointer spill/fills now _all_ based on +r10 would look as follows: + + [...] + // r2 = oob address (e.g. scalar) + // r7 = pointer to map value + [...] + // longer store forward prediction training sequence than before. + 2062: (61) r0 = *(u32 *)(r7 +25588) + 2063: (63) *(u32 *)(r7 +30708) = r0 + 2064: (61) r0 = *(u32 *)(r7 +25592) + 2065: (63) *(u32 *)(r7 +30712) = r0 + 2066: (61) r0 = *(u32 *)(r7 +25596) + 2067: (63) *(u32 *)(r7 +30716) = r0 + // store the speculative load address (scalar) this time after the store + // forward prediction training. + 2068: (7b) *(u64 *)(r10 -16) = r2 + // preoccupy the CPU store port by running sequence of dummy stores. + 2069: (63) *(u32 *)(r7 +29696) = r0 + 2070: (63) *(u32 *)(r7 +29700) = r0 + 2071: (63) *(u32 *)(r7 +29704) = r0 + 2072: (63) *(u32 *)(r7 +29708) = r0 + 2073: (63) *(u32 *)(r7 +29712) = r0 + 2074: (63) *(u32 *)(r7 +29716) = r0 + 2075: (63) *(u32 *)(r7 +29720) = r0 + 2076: (63) *(u32 *)(r7 +29724) = r0 + 2077: (63) *(u32 *)(r7 +29728) = r0 + 2078: (63) *(u32 *)(r7 +29732) = r0 + 2079: (63) *(u32 *)(r7 +29736) = r0 + 2080: (63) *(u32 *)(r7 +29740) = r0 + 2081: (63) *(u32 *)(r7 +29744) = r0 + 2082: (63) *(u32 *)(r7 +29748) = r0 + 2083: (63) *(u32 *)(r7 +29752) = r0 + 2084: (63) *(u32 *)(r7 +29756) = r0 + 2085: (63) *(u32 *)(r7 +29760) = r0 + 2086: (63) *(u32 *)(r7 +29764) = r0 + 2087: (63) *(u32 *)(r7 +29768) = r0 + 2088: (63) *(u32 *)(r7 +29772) = r0 + 2089: (63) *(u32 *)(r7 +29776) = r0 + 2090: (63) *(u32 *)(r7 +29780) = r0 + 2091: (63) *(u32 *)(r7 +29784) = r0 + 2092: (63) *(u32 *)(r7 +29788) = r0 + 2093: (63) *(u32 *)(r7 +29792) = r0 + 2094: (63) *(u32 *)(r7 +29796) = r0 + 2095: (63) *(u32 *)(r7 +29800) = r0 + 2096: (63) *(u32 *)(r7 +29804) = r0 + 2097: (63) *(u32 *)(r7 +29808) = r0 + 2098: (63) *(u32 *)(r7 +29812) = r0 + // overwrite scalar with dummy pointer; same as before, also including the + // sanitation store with 0 from the current mitigation by the verifier. + 2099: (7a) *(u64 *)(r10 -16) = 0 | /both/ are now slow stores here + 2100: (7b) *(u64 *)(r10 -16) = r7 | since store unit is still busy. + // load from stack intended to bypass stores. + 2101: (79) r2 = *(u64 *)(r10 -16) + 2102: (71) r3 = *(u8 *)(r2 +0) + // leak r3 + [...] + +Looking at the CPU microarchitecture, the scheduler might issue loads (such +as seen in line 2101) before stores (line 2099,2100) because the load execution +units become available while the store execution unit is still busy with the +sequence of dummy stores (line 2069-2098). And so the load may use the prior +stored scalar from r2 at address r10 -16 for speculation. The updated attack +may work less reliable on CPU microarchitectures where loads and stores share +execution resources. + +This concludes that the sanitizing with zero stores from af86ca4e3088 ("bpf: +Prevent memory disambiguation attack") is insufficient. Moreover, the detection +of stack reuse from af86ca4e3088 where previously data (STACK_MISC) has been +written to a given stack slot where a pointer value is now to be stored does +not have sufficient coverage as precondition for the mitigation either; for +several reasons outlined as follows: + + 1) Stack content from prior program runs could still be preserved and is + therefore not "random", best example is to split a speculative store + bypass attack between tail calls, program A would prepare and store the + oob address at a given stack slot and then tail call into program B which + does the "slow" store of a pointer to the stack with subsequent "fast" + read. From program B PoV such stack slot type is STACK_INVALID, and + therefore also must be subject to mitigation. + + 2) The STACK_SPILL must not be coupled to register_is_const(&stack->spilled_ptr) + condition, for example, the previous content of that memory location could + also be a pointer to map or map value. Without the fix, a speculative + store bypass is not mitigated in such precondition and can then lead to + a type confusion in the speculative domain leaking kernel memory near + these pointer types. + +While brainstorming on various alternative mitigation possibilities, we also +stumbled upon a retrospective from Chrome developers [0]: + + [...] For variant 4, we implemented a mitigation to zero the unused memory + of the heap prior to allocation, which cost about 1% when done concurrently + and 4% for scavenging. Variant 4 defeats everything we could think of. We + explored more mitigations for variant 4 but the threat proved to be more + pervasive and dangerous than we anticipated. For example, stack slots used + by the register allocator in the optimizing compiler could be subject to + type confusion, leading to pointer crafting. Mitigating type confusion for + stack slots alone would have required a complete redesign of the backend of + the optimizing compiler, perhaps man years of work, without a guarantee of + completeness. [...] + +From BPF side, the problem space is reduced, however, options are rather +limited. One idea that has been explored was to xor-obfuscate pointer spills +to the BPF stack: + + [...] + // preoccupy the CPU store port by running sequence of dummy stores. + [...] + 2106: (63) *(u32 *)(r7 +29796) = r0 + 2107: (63) *(u32 *)(r7 +29800) = r0 + 2108: (63) *(u32 *)(r7 +29804) = r0 + 2109: (63) *(u32 *)(r7 +29808) = r0 + 2110: (63) *(u32 *)(r7 +29812) = r0 + // overwrite scalar with dummy pointer; xored with random 'secret' value + // of 943576462 before store ... + 2111: (b4) w11 = 943576462 + 2112: (af) r11 ^= r7 + 2113: (7b) *(u64 *)(r10 -16) = r11 + 2114: (79) r11 = *(u64 *)(r10 -16) + 2115: (b4) w2 = 943576462 + 2116: (af) r2 ^= r11 + // ... and restored with the same 'secret' value with the help of AX reg. + 2117: (71) r3 = *(u8 *)(r2 +0) + [...] + +While the above would not prevent speculation, it would make data leakage +infeasible by directing it to random locations. In order to be effective +and prevent type confusion under speculation, such random secret would have +to be regenerated for each store. The additional complexity involved for a +tracking mechanism that prevents jumps such that restoring spilled pointers +would not get corrupted is not worth the gain for unprivileged. Hence, the +fix in here eventually opted for emitting a non-public BPF_ST | BPF_NOSPEC +instruction which the x86 JIT translates into a lfence opcode. Inserting the +latter in between the store and load instruction is one of the mitigations +options [1]. The x86 instruction manual notes: + + [...] An LFENCE that follows an instruction that stores to memory might + complete before the data being stored have become globally visible. [...] + +The latter meaning that the preceding store instruction finished execution +and the store is at minimum guaranteed to be in the CPU's store queue, but +it's not guaranteed to be in that CPU's L1 cache at that point (globally +visible). The latter would only be guaranteed via sfence. So the load which +is guaranteed to execute after the lfence for that local CPU would have to +rely on store-to-load forwarding. [2], in section 2.3 on store buffers says: + + [...] For every store operation that is added to the ROB, an entry is + allocated in the store buffer. This entry requires both the virtual and + physical address of the target. Only if there is no free entry in the store + buffer, the frontend stalls until there is an empty slot available in the + store buffer again. Otherwise, the CPU can immediately continue adding + subsequent instructions to the ROB and execute them out of order. On Intel + CPUs, the store buffer has up to 56 entries. [...] + +One small upside on the fix is that it lifts constraints from af86ca4e3088 +where the sanitize_stack_off relative to r10 must be the same when coming +from different paths. The BPF_ST | BPF_NOSPEC gets emitted after a BPF_STX +or BPF_ST instruction. This happens either when we store a pointer or data +value to the BPF stack for the first time, or upon later pointer spills. +The former needs to be enforced since otherwise stale stack data could be +leaked under speculation as outlined earlier. For non-x86 JITs the BPF_ST | +BPF_NOSPEC mapping is currently optimized away, but others could emit a +speculation barrier as well if necessary. For real-world unprivileged +programs e.g. generated by LLVM, pointer spill/fill is only generated upon +register pressure and LLVM only tries to do that for pointers which are not +used often. The program main impact will be the initial BPF_ST | BPF_NOSPEC +sanitation for the STACK_INVALID case when the first write to a stack slot +occurs e.g. upon map lookup. In future we might refine ways to mitigate +the latter cost. + + [0] https://arxiv.org/pdf/1902.05178.pdf + [1] https://msrc-blog.microsoft.com/2018/05/21/analysis-and-mitigation-of-speculative-store-bypass-cve-2018-3639/ + [2] https://arxiv.org/pdf/1905.05725.pdf + +Fixes: af86ca4e3088 ("bpf: Prevent memory disambiguation attack") +Fixes: f7cf25b2026d ("bpf: track spill/fill of constants") +Co-developed-by: Piotr Krysiuk +Co-developed-by: Benedict Schlueter +Signed-off-by: Daniel Borkmann +Signed-off-by: Piotr Krysiuk +Signed-off-by: Benedict Schlueter +Acked-by: Alexei Starovoitov +Acked-by: Shung-Hsi Yu +--- + include/linux/bpf_verifier.h | 2 - + kernel/bpf/verifier.c | 76 +++++++++++++++++++------------------------ + 2 files changed, 35 insertions(+), 43 deletions(-) + +--- a/include/linux/bpf_verifier.h ++++ b/include/linux/bpf_verifier.h +@@ -171,8 +171,8 @@ struct bpf_insn_aux_data { + u32 alu_limit; /* limit for add/sub register with pointer */ + }; + int ctx_field_size; /* the ctx field size for load insn, maybe 0 */ +- int sanitize_stack_off; /* stack slot to be cleared */ + bool seen; /* this insn was processed by the verifier */ ++ bool sanitize_stack_spill; /* subject to Spectre v4 sanitation */ + u8 alu_state; /* used in combination with alu_limit */ + }; + +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -1039,6 +1039,20 @@ static int check_stack_write(struct bpf_ + } + + cur = env->cur_state->frame[env->cur_state->curframe]; ++ if (!env->allow_ptr_leaks) { ++ bool sanitize = value_regno >= 0 && is_spillable_regtype(state->regs[value_regno].type); ++ ++ for (i = 0; i < size; i++) { ++ if (state->stack[spi].slot_type[i] == STACK_INVALID) { ++ sanitize = true; ++ break; ++ } ++ } ++ ++ if (sanitize) ++ env->insn_aux_data[insn_idx].sanitize_stack_spill = true; ++ } ++ + if (value_regno >= 0 && + is_spillable_regtype((type = cur->regs[value_regno].type))) { + +@@ -1058,30 +1072,6 @@ static int check_stack_write(struct bpf_ + state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN; + + for (i = 0; i < BPF_REG_SIZE; i++) { +- if (state->stack[spi].slot_type[i] == STACK_MISC && +- !env->allow_ptr_leaks) { +- int *poff = &env->insn_aux_data[insn_idx].sanitize_stack_off; +- int soff = (-spi - 1) * BPF_REG_SIZE; +- +- /* detected reuse of integer stack slot with a pointer +- * which means either llvm is reusing stack slot or +- * an attacker is trying to exploit CVE-2018-3639 +- * (speculative store bypass) +- * Have to sanitize that slot with preemptive +- * store of zero. +- */ +- if (*poff && *poff != soff) { +- /* disallow programs where single insn stores +- * into two different stack slots, since verifier +- * cannot sanitize them +- */ +- verbose(env, +- "insn %d cannot access two stack slots fp%d and fp%d", +- insn_idx, *poff, soff); +- return -EINVAL; +- } +- *poff = soff; +- } + state->stack[spi].slot_type[i] = STACK_SPILL; + } + } else { +@@ -5968,34 +5958,33 @@ static int convert_ctx_accesses(struct b + insn = env->prog->insnsi + delta; + + for (i = 0; i < insn_cnt; i++, insn++) { ++ bool ctx_access; ++ + if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) || + insn->code == (BPF_LDX | BPF_MEM | BPF_H) || + insn->code == (BPF_LDX | BPF_MEM | BPF_W) || +- insn->code == (BPF_LDX | BPF_MEM | BPF_DW)) ++ insn->code == (BPF_LDX | BPF_MEM | BPF_DW)) { + type = BPF_READ; +- else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) || +- insn->code == (BPF_STX | BPF_MEM | BPF_H) || +- insn->code == (BPF_STX | BPF_MEM | BPF_W) || +- insn->code == (BPF_STX | BPF_MEM | BPF_DW)) ++ ctx_access = true; ++ } else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) || ++ insn->code == (BPF_STX | BPF_MEM | BPF_H) || ++ insn->code == (BPF_STX | BPF_MEM | BPF_W) || ++ insn->code == (BPF_STX | BPF_MEM | BPF_DW) || ++ insn->code == (BPF_ST | BPF_MEM | BPF_B) || ++ insn->code == (BPF_ST | BPF_MEM | BPF_H) || ++ insn->code == (BPF_ST | BPF_MEM | BPF_W) || ++ insn->code == (BPF_ST | BPF_MEM | BPF_DW)) { + type = BPF_WRITE; +- else ++ ctx_access = BPF_CLASS(insn->code) == BPF_STX; ++ } else { + continue; ++ } + + if (type == BPF_WRITE && +- env->insn_aux_data[i + delta].sanitize_stack_off) { ++ env->insn_aux_data[i + delta].sanitize_stack_spill) { + struct bpf_insn patch[] = { +- /* Sanitize suspicious stack slot with zero. +- * There are no memory dependencies for this store, +- * since it's only using frame pointer and immediate +- * constant of zero +- */ +- BPF_ST_MEM(BPF_DW, BPF_REG_FP, +- env->insn_aux_data[i + delta].sanitize_stack_off, +- 0), +- /* the original STX instruction will immediately +- * overwrite the same stack slot with appropriate value +- */ + *insn, ++ BPF_ST_NOSPEC(), + }; + + cnt = ARRAY_SIZE(patch); +@@ -6009,6 +5998,9 @@ static int convert_ctx_accesses(struct b + continue; + } + ++ if (!ctx_access) ++ continue; ++ + if (env->insn_aux_data[i + delta].ptr_type != PTR_TO_CTX) + continue; + diff --git a/patches.suse/bpf-Introduce-BPF-nospec-instruction-for-mitigating-.patch b/patches.suse/bpf-Introduce-BPF-nospec-instruction-for-mitigating-.patch new file mode 100644 index 0000000..fe5f65e --- /dev/null +++ b/patches.suse/bpf-Introduce-BPF-nospec-instruction-for-mitigating-.patch @@ -0,0 +1,206 @@ +From: Daniel Borkmann +Date: Tue, 13 Jul 2021 08:18:31 +0000 +Subject: bpf: Introduce BPF nospec instruction for mitigating Spectre v4 +Patch-mainline: v5.14-rc4 +Git-commit: f5e81d1117501546b7be050c5fbafa6efd2c722c +References: bsc#1188983, bsc#1188985, CVE-2021-34556, CVE-2021-35477 + +In case of JITs, each of the JIT backends compiles the BPF nospec instruction +/either/ to a machine instruction which emits a speculation barrier /or/ to +/no/ machine instruction in case the underlying architecture is not affected +by Speculative Store Bypass or has different mitigations in place already. + +This covers both x86 and (implicitly) arm64: In case of x86, we use 'lfence' +instruction for mitigation. In case of arm64, we rely on the firmware mitigation +as controlled via the ssbd kernel parameter. Whenever the mitigation is enabled, +it works for all of the kernel code with no need to provide any additional +instructions here (hence only comment in arm64 JIT). Other archs can follow +as needed. The BPF nospec instruction is specifically targeting Spectre v4 +since i) we don't use a serialization barrier for the Spectre v1 case, and +ii) mitigation instructions for v1 and v4 might be different on some archs. + +The BPF nospec is required for a future commit, where the BPF verifier does +annotate intermediate BPF programs with speculation barriers. + +Co-developed-by: Piotr Krysiuk +Co-developed-by: Benedict Schlueter +Signed-off-by: Daniel Borkmann +Signed-off-by: Piotr Krysiuk +Signed-off-by: Benedict Schlueter +Acked-by: Alexei Starovoitov +Acked-by: Shung-Hsi Yu +--- +syu: Drop RISCV, ARM and x86 arch code. +--- + arch/arm64/net/bpf_jit_comp.c | 13 +++++++++++++ + arch/powerpc/net/bpf_jit_comp64.c | 6 ++++++ + arch/s390/net/bpf_jit_comp.c | 5 +++++ + arch/sparc/net/bpf_jit_comp_64.c | 3 +++ + arch/x86/net/bpf_jit_comp.c | 7 +++++++ + include/linux/filter.h | 15 +++++++++++++++ + kernel/bpf/core.c | 18 +++++++++++++++++- + kernel/bpf/disasm.c | 2 ++ + 8 files changed, 68 insertions(+), 1 deletion(-) + +--- a/arch/arm64/net/bpf_jit_comp.c ++++ b/arch/arm64/net/bpf_jit_comp.c +@@ -659,6 +659,19 @@ emit_cond_jmp: + } + break; + ++ /* speculation barrier */ ++ case BPF_ST | BPF_NOSPEC: ++ /* ++ * Nothing required here. ++ * ++ * In case of arm64, we rely on the firmware mitigation of ++ * Speculative Store Bypass as controlled via the ssbd kernel ++ * parameter. Whenever the mitigation is enabled, it works ++ * for all of the kernel code with no need to provide any ++ * additional instructions. ++ */ ++ break; ++ + /* ST: *(size *)(dst + off) = imm */ + case BPF_ST | BPF_MEM | BPF_W: + case BPF_ST | BPF_MEM | BPF_H: +--- a/arch/powerpc/net/bpf_jit_comp64.c ++++ b/arch/powerpc/net/bpf_jit_comp64.c +@@ -643,6 +643,12 @@ emit_clear: + break; + + /* ++ * BPF_ST NOSPEC (speculation barrier) ++ */ ++ case BPF_ST | BPF_NOSPEC: ++ break; ++ ++ /* + * BPF_ST(X) + */ + case BPF_STX | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = src */ +--- a/arch/s390/net/bpf_jit_comp.c ++++ b/arch/s390/net/bpf_jit_comp.c +@@ -930,6 +930,11 @@ static noinline int bpf_jit_insn(struct + } + break; + /* ++ * BPF_NOSPEC (speculation barrier) ++ */ ++ case BPF_ST | BPF_NOSPEC: ++ break; ++ /* + * BPF_ST(X) + */ + case BPF_STX | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = src_reg */ +--- a/arch/sparc/net/bpf_jit_comp_64.c ++++ b/arch/sparc/net/bpf_jit_comp_64.c +@@ -1275,6 +1275,9 @@ static int build_insn(const struct bpf_i + emit(opcode | RS1(src) | rs2 | RD(dst), ctx); + break; + } ++ /* speculation barrier */ ++ case BPF_ST | BPF_NOSPEC: ++ break; + /* ST: *(size *)(dst + off) = imm */ + case BPF_ST | BPF_MEM | BPF_W: + case BPF_ST | BPF_MEM | BPF_H: +--- a/arch/x86/net/bpf_jit_comp.c ++++ b/arch/x86/net/bpf_jit_comp.c +@@ -715,6 +715,13 @@ static int do_jit(struct bpf_prog *bpf_p + } + break; + ++ /* speculation barrier */ ++ case BPF_ST | BPF_NOSPEC: ++ if (boot_cpu_has(X86_FEATURE_XMM2)) ++ /* Emit 'lfence' */ ++ EMIT3(0x0F, 0xAE, 0xE8); ++ break; ++ + /* ST: *(u8*)(dst_reg + off) = imm */ + case BPF_ST | BPF_MEM | BPF_B: + if (is_ereg(dst_reg)) +--- a/include/linux/filter.h ++++ b/include/linux/filter.h +@@ -62,6 +62,11 @@ struct xdp_buff; + /* unused opcode to mark call to interpreter with arguments */ + #define BPF_CALL_ARGS 0xe0 + ++/* unused opcode to mark speculation barrier for mitigating ++ * Speculative Store Bypass ++ */ ++#define BPF_NOSPEC 0xc0 ++ + /* As per nm, we expose JITed images as text (code) section for + * kallsyms. That way, tools like perf can find it to match + * addresses. +@@ -326,6 +331,16 @@ struct xdp_buff; + .dst_reg = 0, \ + .src_reg = 0, \ + .off = 0, \ ++ .imm = 0 }) ++ ++/* Speculation barrier */ ++ ++#define BPF_ST_NOSPEC() \ ++ ((struct bpf_insn) { \ ++ .code = BPF_ST | BPF_NOSPEC, \ ++ .dst_reg = 0, \ ++ .src_reg = 0, \ ++ .off = 0, \ + .imm = 0 }) + + /* Internal classic blocks for direct assignment */ +--- a/kernel/bpf/core.c ++++ b/kernel/bpf/core.c +@@ -33,6 +33,7 @@ + #include + #include + ++#include + #include + + /* Registers */ +@@ -1057,6 +1058,7 @@ static u64 ___bpf_prog_run(u64 *regs, co + /* Non-UAPI available opcodes. */ + [BPF_JMP | BPF_CALL_ARGS] = &&JMP_CALL_ARGS, + [BPF_JMP | BPF_TAIL_CALL] = &&JMP_TAIL_CALL, ++ [BPF_ST | BPF_NOSPEC] = &&ST_NOSPEC, + }; + #undef BPF_INSN_3_LBL + #undef BPF_INSN_2_LBL +@@ -1362,7 +1364,21 @@ out: + JMP_EXIT: + return BPF_R0; + +- /* STX and ST and LDX*/ ++ /* ST, STX and LDX*/ ++ ST_NOSPEC: ++ /* Speculation barrier for mitigating Speculative Store Bypass. ++ * In case of arm64, we rely on the firmware mitigation as ++ * controlled via the ssbd kernel parameter. Whenever the ++ * mitigation is enabled, it works for all of the kernel code ++ * with no need to provide any additional instructions here. ++ * In case of x86, we use 'lfence' insn for mitigation. We ++ * reuse preexisting logic from Spectre v1 mitigation that ++ * happens to produce the required code on x86 for v4 as well. ++ */ ++#ifdef CONFIG_X86 ++ barrier_nospec(); ++#endif ++ CONT; + #define LDST(SIZEOP, SIZE) \ + STX_MEM_##SIZEOP: \ + *(SIZE *)(unsigned long) (DST + insn->off) = SRC; \ +--- a/kernel/bpf/disasm.c ++++ b/kernel/bpf/disasm.c +@@ -162,6 +162,8 @@ void print_bpf_insn(const struct bpf_ins + bpf_ldst_string[BPF_SIZE(insn->code) >> 3], + insn->dst_reg, + insn->off, insn->src_reg); ++ else if (BPF_MODE(insn->code) == 0xc0 /* BPF_NOSPEC, no UAPI */) ++ verbose(cbs->private_data, "(%02x) nospec\n", insn->code); + else if (BPF_MODE(insn->code) == BPF_XADD) + verbose(cbs->private_data, "(%02x) lock *(%s *)(r%d %+d) += r%d\n", + insn->code, diff --git a/patches.suse/crypto-picoxcell-Fix-error-handling-in-spacc_probe.patch b/patches.suse/crypto-picoxcell-Fix-error-handling-in-spacc_probe.patch new file mode 100644 index 0000000..a805e4d --- /dev/null +++ b/patches.suse/crypto-picoxcell-Fix-error-handling-in-spacc_probe.patch @@ -0,0 +1,92 @@ +From 2d55807b7f7bf62bb05a8b91247c5eb7cd19ac04 Mon Sep 17 00:00:00 2001 +From: Alexey Khoroshilov +Date: Sat, 20 Jan 2018 00:53:15 +0300 +Subject: [PATCH] crypto: picoxcell - Fix error handling in spacc_probe() +Git-commit: 2d55807b7f7bf62bb05a8b91247c5eb7cd19ac04 +References: git-fixes +Patch-mainline: v4.16-rc1 + +If clk_get() fails, device_remove_file() looks inappropriate. + +The error path, where all crypto_register fail, misses resource +deallocations. + +Found by Linux Driver Verification project (linuxtesting.org). + +Signed-off-by: Alexey Khoroshilov +Reviewed-by: Jamie Iles +Signed-off-by: Herbert Xu +Signed-off-by: Oliver Neukum +--- + drivers/crypto/picoxcell_crypto.c | 27 +++++++++++++++++---------- + 1 file changed, 17 insertions(+), 10 deletions(-) + +diff --git a/drivers/crypto/picoxcell_crypto.c b/drivers/crypto/picoxcell_crypto.c +index 5a6dc53b2b9d..4ef52c9d72fc 100644 +--- a/drivers/crypto/picoxcell_crypto.c ++++ b/drivers/crypto/picoxcell_crypto.c +@@ -1618,7 +1618,7 @@ MODULE_DEVICE_TABLE(of, spacc_of_id_table); + + static int spacc_probe(struct platform_device *pdev) + { +- int i, err, ret = -EINVAL; ++ int i, err, ret; + struct resource *mem, *irq; + struct device_node *np = pdev->dev.of_node; + struct spacc_engine *engine = devm_kzalloc(&pdev->dev, sizeof(*engine), +@@ -1679,22 +1679,18 @@ static int spacc_probe(struct platform_device *pdev) + engine->clk = clk_get(&pdev->dev, "ref"); + if (IS_ERR(engine->clk)) { + dev_info(&pdev->dev, "clk unavailable\n"); +- device_remove_file(&pdev->dev, &dev_attr_stat_irq_thresh); + return PTR_ERR(engine->clk); + } + + if (clk_prepare_enable(engine->clk)) { + dev_info(&pdev->dev, "unable to prepare/enable clk\n"); +- clk_put(engine->clk); +- return -EIO; ++ ret = -EIO; ++ goto err_clk_put; + } + +- err = device_create_file(&pdev->dev, &dev_attr_stat_irq_thresh); +- if (err) { +- clk_disable_unprepare(engine->clk); +- clk_put(engine->clk); +- return err; +- } ++ ret = device_create_file(&pdev->dev, &dev_attr_stat_irq_thresh); ++ if (ret) ++ goto err_clk_disable; + + + /* +@@ -1725,6 +1721,7 @@ static int spacc_probe(struct platform_device *pdev) + + platform_set_drvdata(pdev, engine); + ++ ret = -EINVAL; + INIT_LIST_HEAD(&engine->registered_algs); + for (i = 0; i < engine->num_algs; ++i) { + engine->algs[i].engine = engine; +@@ -1759,6 +1756,16 @@ static int spacc_probe(struct platform_device *pdev) + engine->aeads[i].alg.base.cra_name); + } + ++ if (!ret) ++ return 0; ++ ++ del_timer_sync(&engine->packet_timeout); ++ device_remove_file(&pdev->dev, &dev_attr_stat_irq_thresh); ++err_clk_disable: ++ clk_disable_unprepare(engine->clk); ++err_clk_put: ++ clk_put(engine->clk); ++ + return ret; + } + +-- +2.26.2 + diff --git a/patches.suse/crypto-picoxcell-Fix-potential-race-condition-bug.patch b/patches.suse/crypto-picoxcell-Fix-potential-race-condition-bug.patch new file mode 100644 index 0000000..0266f2a --- /dev/null +++ b/patches.suse/crypto-picoxcell-Fix-potential-race-condition-bug.patch @@ -0,0 +1,49 @@ +From 64f4a62e3b17f1e473f971127c2924cae42afc82 Mon Sep 17 00:00:00 2001 +From: Madhuparna Bhowmik +Date: Tue, 11 Aug 2020 18:00:24 +0530 +Subject: [PATCH] crypto: picoxcell - Fix potential race condition bug +Git-commit: 64f4a62e3b17f1e473f971127c2924cae42afc82 +References: git-fixes +Patch-mainline: v5.10-rc1 + +engine->stat_irq_thresh was initialized after device_create_file() in +the probe function, the initialization may race with call to +spacc_stat_irq_thresh_store() which updates engine->stat_irq_thresh, +therefore initialize it before creating the file in probe function. + +Found by Linux Driver Verification project (linuxtesting.org). + +Fixes: ce92136843cb ("crypto: picoxcell - add support for the...") +Signed-off-by: Madhuparna Bhowmik +Acked-by: Jamie Iles +Signed-off-by: Herbert Xu +Signed-off-by: Oliver Neukum +--- + drivers/crypto/picoxcell_crypto.c | 9 ++++----- + 1 file changed, 4 insertions(+), 5 deletions(-) + +--- a/drivers/crypto/picoxcell_crypto.c ++++ b/drivers/crypto/picoxcell_crypto.c +@@ -1701,11 +1701,6 @@ static int spacc_probe(struct platform_d + goto err_clk_put; + } + +- ret = device_create_file(&pdev->dev, &dev_attr_stat_irq_thresh); +- if (ret) +- goto err_clk_disable; +- +- + /* + * Use an IRQ threshold of 50% as a default. This seems to be a + * reasonable trade off of latency against throughput but can be +@@ -1713,6 +1708,10 @@ static int spacc_probe(struct platform_d + */ + engine->stat_irq_thresh = (engine->fifo_sz / 2); + ++ ret = device_create_file(&pdev->dev, &dev_attr_stat_irq_thresh); ++ if (ret) ++ goto err_clk_disable; ++ + /* + * Configure the interrupts. We only use the STAT_CNT interrupt as we + * only submit a new packet for processing when we complete another in diff --git a/patches.suse/crypto-picoxcell-adjust-the-position-of-tasklet_init.patch b/patches.suse/crypto-picoxcell-adjust-the-position-of-tasklet_init.patch index 93b4a44..3b6e1f1 100644 --- a/patches.suse/crypto-picoxcell-adjust-the-position-of-tasklet_init.patch +++ b/patches.suse/crypto-picoxcell-adjust-the-position-of-tasklet_init.patch @@ -34,7 +34,7 @@ Acked-by: Takashi Iwai + static int spacc_probe(struct platform_device *pdev) { - int i, err, ret = -EINVAL; + int i, err, ret; @@ -1659,6 +1664,14 @@ static int spacc_probe(struct platform_d return -ENXIO; } @@ -50,7 +50,7 @@ Acked-by: Takashi Iwai if (devm_request_irq(&pdev->dev, irq->start, spacc_spacc_irq, 0, engine->name, engine)) { dev_err(engine->dev, "failed to request IRQ\n"); -@@ -1721,8 +1734,6 @@ static int spacc_probe(struct platform_d +@@ -1717,8 +1730,6 @@ static int spacc_probe(struct platform_d INIT_LIST_HEAD(&engine->completed); INIT_LIST_HEAD(&engine->in_progress); engine->in_flight = 0; diff --git a/patches.suse/crypto-qat-use-proper-type-for-vf_mask.patch b/patches.suse/crypto-qat-use-proper-type-for-vf_mask.patch new file mode 100644 index 0000000..40435f3 --- /dev/null +++ b/patches.suse/crypto-qat-use-proper-type-for-vf_mask.patch @@ -0,0 +1,71 @@ +From 462354d986b6a89c6449b85f17aaacf44e455216 Mon Sep 17 00:00:00 2001 +From: Giovanni Cabiddu +Date: Thu, 12 Aug 2021 21:21:10 +0100 +Subject: [PATCH] crypto: qat - use proper type for vf_mask +Git-commit: 462354d986b6a89c6449b85f17aaacf44e455216 +References: git-fixes +Patch-mainline: v5.15-rc1 + +Replace vf_mask type with unsigned long to avoid a stack-out-of-bound. + +This is to fix the following warning reported by KASAN the first time +adf_msix_isr_ae() gets called. + + [ 692.091987] BUG: KASAN: stack-out-of-bounds in find_first_bit+0x28/0x50 + [ 692.092017] Read of size 8 at addr ffff88afdf789e60 by task swapper/32/0 + [ 692.092076] Call Trace: + [ 692.092089] + [ 692.092101] dump_stack+0x9c/0xcf + [ 692.092132] print_address_description.constprop.0+0x18/0x130 + [ 692.092164] ? find_first_bit+0x28/0x50 + [ 692.092185] kasan_report.cold+0x7f/0x111 + [ 692.092213] ? static_obj+0x10/0x80 + [ 692.092234] ? find_first_bit+0x28/0x50 + [ 692.092262] find_first_bit+0x28/0x50 + [ 692.092288] adf_msix_isr_ae+0x16e/0x230 [intel_qat] + +Fixes: ed8ccaef52fa ("crypto: qat - Add support for SRIOV") +Signed-off-by: Giovanni Cabiddu +Reviewed-by: Marco Chiappero +Reviewed-by: Fiona Trahe +Signed-off-by: Herbert Xu +Signed-off-by: Oliver Neukum +--- + drivers/crypto/qat/qat_common/adf_isr.c | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +diff --git a/drivers/crypto/qat/qat_common/adf_isr.c b/drivers/crypto/qat/qat_common/adf_isr.c +index e3ad5587be49..daab02011717 100644 +--- a/drivers/crypto/qat/qat_common/adf_isr.c ++++ b/drivers/crypto/qat/qat_common/adf_isr.c +@@ -15,6 +15,8 @@ + #include "adf_transport_access_macros.h" + #include "adf_transport_internal.h" + ++#define ADF_MAX_NUM_VFS 32 ++ + static int adf_enable_msix(struct adf_accel_dev *accel_dev) + { + struct adf_accel_pci *pci_dev_info = &accel_dev->accel_pci_dev; +@@ -72,7 +74,7 @@ static irqreturn_t adf_msix_isr_ae(int irq, void *dev_ptr) + struct adf_bar *pmisc = + &GET_BARS(accel_dev)[hw_data->get_misc_bar_id(hw_data)]; + void __iomem *pmisc_bar_addr = pmisc->virt_addr; +- u32 vf_mask; ++ unsigned long vf_mask; + + /* Get the interrupt sources triggered by VFs */ + vf_mask = ((ADF_CSR_RD(pmisc_bar_addr, ADF_ERRSOU5) & +@@ -93,8 +95,7 @@ static irqreturn_t adf_msix_isr_ae(int irq, void *dev_ptr) + * unless the VF is malicious and is attempting to + * flood the host OS with VF2PF interrupts. + */ +- for_each_set_bit(i, (const unsigned long *)&vf_mask, +- (sizeof(vf_mask) * BITS_PER_BYTE)) { ++ for_each_set_bit(i, &vf_mask, ADF_MAX_NUM_VFS) { + vf_info = accel_dev->pf.vf_info + i; + + if (!__ratelimit(&vf_info->vf2pf_ratelimit)) { +-- +2.26.2 + diff --git a/patches.suse/ipc-remove-memcg-accounting-for-sops-objects.patch b/patches.suse/ipc-remove-memcg-accounting-for-sops-objects.patch new file mode 100644 index 0000000..3dbaf77 --- /dev/null +++ b/patches.suse/ipc-remove-memcg-accounting-for-sops-objects.patch @@ -0,0 +1,47 @@ +From: Vasily Averin +Subject: ipc: remove memcg accounting for sops objects in do_semtimedop() +Date: Sat, 11 Sep 2021 10:40:08 +0300 +Message-ID: <90e254df-0dfe-f080-011e-b7c53ee7fd20@virtuozzo.com> +Patch-mainline: Not yet, too soon +References: bsc#1190115 + +Linus proposes to revert an accounting for sops objects in +do_semtimedop() because it's really just a temporary buffer +for a single semtimedop() system call. + +This object can consume up to 2 pages, syscall is sleeping one, +size and duration can be controlled by user, and this allocation +can be repeated by many thread at the same time. + +However Shakeel Butt pointed that there are much more popular objects +with the same life time and similar memory consumption, the accounting +of which was decided to be rejected for performance reasons. + +In addition, any usual task consumes much more accounted memory, +so 2 pages of this temporal buffer can be safely ignored. + +Link: https://patchwork.kernel.org/project/linux-fsdevel/patch/20171005222144.123797-1-shakeelb@google.com/ + +Fixes: 18319498fdd4 ("memcg: enable accounting of ipc resources") +Signed-off-by: Vasily Averin +Acked-by: Michal Koutný +--- + ipc/sem.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/ipc/sem.c b/ipc/sem.c +index f833238df1ce..6693daf4fe11 100644 +--- a/ipc/sem.c ++++ b/ipc/sem.c +@@ -2238,7 +2238,7 @@ static long do_semtimedop(int semid, struct sembuf __user *tsops, + if (nsops > ns->sc_semopm) + return -E2BIG; + if (nsops > SEMOPM_FAST) { +- sops = kvmalloc(sizeof(*sops)*nsops, GFP_KERNEL_ACCOUNT); ++ sops = kvmalloc_array(nsops, sizeof(*sops), GFP_KERNEL); + if (sops == NULL) + return -ENOMEM; + } +-- +2.25.1 + diff --git a/patches.suse/memcg-enable-accounting-for-file-lock-caches.patch b/patches.suse/memcg-enable-accounting-for-file-lock-caches.patch deleted file mode 100644 index 36fbe0f..0000000 --- a/patches.suse/memcg-enable-accounting-for-file-lock-caches.patch +++ /dev/null @@ -1,67 +0,0 @@ -From: Vasily Averin -Date: Thu, 2 Sep 2021 14:55:19 -0700 -Subject: memcg: enable accounting for file lock caches -Git-commit: 0f12156dff2862ac54235fc72703f18770769042 -Patch-mainline: v5.15-rc1 -References: bsc#1190115 - -User can create file locks for each open file and force kernel to allocate -small but long-living objects per each open file. - -It makes sense to account for these objects to limit the host's memory -consumption from inside the memcg-limited container. - -Link: https://lkml.kernel.org/r/b009f4c7-f0ab-c0ec-8e83-918f47d677da@virtuozzo.com -Signed-off-by: Vasily Averin -Reviewed-by: Shakeel Butt -Cc: Alexander Viro -Cc: Alexey Dobriyan -Cc: Andrei Vagin -Cc: Borislav Petkov -Cc: Borislav Petkov -Cc: Christian Brauner -Cc: Dmitry Safonov <0x7f454c46@gmail.com> -Cc: "Eric W. Biederman" -Cc: Greg Kroah-Hartman -Cc: "H. Peter Anvin" -Cc: Ingo Molnar -Cc: "J. Bruce Fields" -Cc: Jeff Layton -Cc: Jens Axboe -Cc: Jiri Slaby -Cc: Johannes Weiner -Cc: Kirill Tkhai -Cc: Michal Hocko -Cc: Oleg Nesterov -Cc: Roman Gushchin -Cc: Serge Hallyn -Cc: Tejun Heo -Cc: Thomas Gleixner -Cc: Vladimir Davydov -Cc: Yutian Yang -Cc: Zefan Li -Signed-off-by: Andrew Morton -Signed-off-by: Linus Torvalds -[mkoutny: adjust context] -Acked-by: Michal Koutný ---- - fs/locks.c | 6 ++++-- - 1 file changed, 4 insertions(+), 2 deletions(-) - ---- a/fs/locks.c -+++ b/fs/locks.c -@@ -2813,10 +2813,12 @@ static int __init filelock_init(void) - int i; - - flctx_cache = kmem_cache_create("file_lock_ctx", -- sizeof(struct file_lock_context), 0, SLAB_PANIC, NULL); -+ sizeof(struct file_lock_context), 0, -+ SLAB_PANIC | SLAB_ACCOUNT, NULL); - - filelock_cache = kmem_cache_create("file_lock_cache", -- sizeof(struct file_lock), 0, SLAB_PANIC, NULL); -+ sizeof(struct file_lock), 0, -+ SLAB_PANIC | SLAB_ACCOUNT, NULL); - - - for_each_possible_cpu(i) { diff --git a/patches.suse/mm-memory.c-do_fault-avoid-usage-of-stale-vm_area_st.patch b/patches.suse/mm-memory.c-do_fault-avoid-usage-of-stale-vm_area_st.patch new file mode 100644 index 0000000..67177ef --- /dev/null +++ b/patches.suse/mm-memory.c-do_fault-avoid-usage-of-stale-vm_area_st.patch @@ -0,0 +1,116 @@ +From fc8efd2ddfed3f343c11b693e87140ff358d7ff5 Mon Sep 17 00:00:00 2001 +From: Jan Stancek +Date: Tue, 5 Mar 2019 15:50:08 -0800 +Subject: [PATCH] mm/memory.c: do_fault: avoid usage of stale vm_area_struct +Git-commit: fc8efd2ddfed3f343c11b693e87140ff358d7ff5 +Patch-mainline: v5.1-rc1 +References: bsc#1136513 + +LTP testcase mtest06 [1] can trigger a crash on s390x running 5.0.0-rc8. +This is a stress test, where one thread mmaps/writes/munmaps memory area +and other thread is trying to read from it: + + CPU: 0 PID: 2611 Comm: mmap1 Not tainted 5.0.0-rc8+ #51 + Hardware name: IBM 2964 N63 400 (z/VM 6.4.0) + Krnl PSW : 0404e00180000000 00000000001ac8d8 (__lock_acquire+0x7/0x7a8) + Call Trace: + ([<0000000000000000>] (null)) + [<00000000001adae4>] lock_acquire+0xec/0x258 + [<000000000080d1ac>] _raw_spin_lock_bh+0x5c/0x98 + [<000000000012a780>] page_table_free+0x48/0x1a8 + [<00000000002f6e54>] do_fault+0xdc/0x670 + [<00000000002fadae>] __handle_mm_fault+0x416/0x5f0 + [<00000000002fb138>] handle_mm_fault+0x1b0/0x320 + [<00000000001248cc>] do_dat_exception+0x19c/0x2c8 + [<000000000080e5ee>] pgm_check_handler+0x19e/0x200 + +page_table_free() is called with NULL mm parameter, but because "0" is a +valid address on s390 (see S390_lowcore), it keeps going until it +eventually crashes in lockdep's lock_acquire. This crash is +reproducible at least since 4.14. + +Problem is that "vmf->vma" used in do_fault() can become stale. Because +mmap_sem may be released, other threads can come in, call munmap() and +cause "vma" be returned to kmem cache, and get zeroed/re-initialized and +re-used: + +handle_mm_fault | + __handle_mm_fault | + do_fault | + vma = vmf->vma | + do_read_fault | + __do_fault | + vma->vm_ops->fault(vmf); | + mmap_sem is released | + | + | do_munmap() + | remove_vma_list() + | remove_vma() + | vm_area_free() + | # vma is released + | ... + | # same vma is allocated + | # from kmem cache + | do_mmap() + | vm_area_alloc() + | memset(vma, 0, ...) + | + pte_free(vma->vm_mm, ...); | + page_table_free | + spin_lock_bh(&mm->context.lock);| + | + +Cache mm_struct to avoid using potentially stale "vma". + +[1] https://github.com/linux-test-project/ltp/blob/master/testcases/kernel/mem/mtest06/mmap1.c + +Link: http://lkml.kernel.org/r/5b3fdf19e2a5be460a384b936f5b56e13733f1b8.1551595137.git.jstancek@redhat.com +Signed-off-by: Jan Stancek +Reviewed-by: Andrea Arcangeli +Reviewed-by: Matthew Wilcox +Acked-by: Rafael Aquini +Reviewed-by: Minchan Kim +Acked-by: Kirill A. Shutemov +Cc: Rik van Riel +Cc: Michal Hocko +Cc: Huang Ying +Cc: Souptick Joarder +Cc: Jerome Glisse +Cc: Aneesh Kumar K.V +Cc: David Hildenbrand +Cc: Andrea Arcangeli +Cc: David Rientjes +Cc: Mel Gorman +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Oscar Salvador +--- + mm/memory.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/mm/memory.c ++++ b/mm/memory.c +@@ -3684,10 +3684,13 @@ static int do_shared_fault(struct vm_fau + * but allow concurrent faults). + * The mmap_sem may have been released depending on flags and our + * return value. See filemap_fault() and __lock_page_or_retry(). ++ * If mmap_sem is released, vma may become invalid (for example ++ * by other thread calling munmap()). + */ + static int do_fault(struct vm_fault *vmf) + { + struct vm_area_struct *vma = vmf->vma; ++ struct mm_struct *vm_mm = vma->vm_mm; + int ret; + + /* +@@ -3728,7 +3731,7 @@ static int do_fault(struct vm_fault *vmf + + /* preallocated pagetable is unused: free it */ + if (vmf->prealloc_pte) { +- pte_free(vma->vm_mm, vmf->prealloc_pte); ++ pte_free(vm_mm, vmf->prealloc_pte); + vmf->prealloc_pte = NULL; + } + return ret; diff --git a/patches.suse/scsi-sg-add-sg_remove_request-in-sg_write b/patches.suse/scsi-sg-add-sg_remove_request-in-sg_write index 94fb547..dcb2e7c 100644 --- a/patches.suse/scsi-sg-add-sg_remove_request-in-sg_write +++ b/patches.suse/scsi-sg-add-sg_remove_request-in-sg_write @@ -3,7 +3,7 @@ Date: Tue, 14 Apr 2020 10:13:28 +0800 Subject: scsi: sg: add sg_remove_request in sg_write Git-commit: 83c6f2390040f188cc25b270b4befeb5628c1aee Patch-mainline: v5.7-rc3 -References: bsc#1186635 +References: bsc#1171420 CVE-2020-12770 If the __copy_from_user function failed we need to call sg_remove_request in sg_write. diff --git a/patches.suse/time-Handle-negative-seconds-correctly-in-timespec64.patch b/patches.suse/time-Handle-negative-seconds-correctly-in-timespec64.patch new file mode 100644 index 0000000..4c9a3e0 --- /dev/null +++ b/patches.suse/time-Handle-negative-seconds-correctly-in-timespec64.patch @@ -0,0 +1,60 @@ +From 39ff83f2f6cc5cc1458dfcea9697f96338210beb Mon Sep 17 00:00:00 2001 +From: Lukas Hannen +Date: Wed, 25 Aug 2021 10:12:43 +0000 +Subject: [PATCH] time: Handle negative seconds correctly in timespec64_to_ns() +Git-commit: 39ff83f2f6cc5cc1458dfcea9697f96338210beb +References: git-fixes +Patch-mainline: v5.15-rc1 + +timespec64_ns() prevents multiplication overflows by comparing the seconds +value of the timespec to KTIME_SEC_MAX. If the value is greater or equal it +returns KTIME_MAX. + +But that check casts the signed seconds value to unsigned which makes the +comparision true for all negative values and therefore return wrongly +KTIME_MAX. + +Negative second values are perfectly valid and required in some places, +e.g. ptp_clock_adjtime(). + +Remove the cast and add a check for the negative boundary which is required +to prevent undefined behaviour due to multiplication underflow. + +Fixes: cb47755725da ("time: Prevent undefined behaviour in timespec64_to_ns()")' +Signed-off-by: Lukas Hannen +Signed-off-by: Thomas Gleixner +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/AM6PR01MB541637BD6F336B8FFB72AF80EEC69@AM6PR01MB5416.eurprd01.prod.exchangelabs.com +Signed-off-by: Oliver Neukum +--- + include/linux/time64.h | 9 +++++++-- + 1 file changed, 7 insertions(+), 2 deletions(-) + +--- a/include/linux/time64.h ++++ b/include/linux/time64.h +@@ -39,7 +39,9 @@ struct itimerspec64 { + /* Located here for timespec[64]_valid_strict */ + #define TIME64_MAX ((s64)~((u64)1 << 63)) + #define KTIME_MAX ((s64)~((u64)1 << 63)) ++#define KTIME_MIN (-KTIME_MAX - 1) + #define KTIME_SEC_MAX (KTIME_MAX / NSEC_PER_SEC) ++#define KTIME_SEC_MIN (KTIME_MIN / NSEC_PER_SEC) + + #if __BITS_PER_LONG == 64 + +@@ -188,10 +190,13 @@ static inline bool timespec64_valid_stri + */ + static inline s64 timespec64_to_ns(const struct timespec64 *ts) + { +- /* Prevent multiplication overflow */ +- if ((unsigned long long)ts->tv_sec >= KTIME_SEC_MAX) ++ /* Prevent multiplication overflow / underflow */ ++ if (ts->tv_sec >= KTIME_SEC_MAX) + return KTIME_MAX; + ++ if (ts->tv_sec <= KTIME_SEC_MIN) ++ return KTIME_MIN; ++ + return ((s64) ts->tv_sec * NSEC_PER_SEC) + ts->tv_nsec; + } + diff --git a/scripts/git_sort/git_sort.py b/scripts/git_sort/git_sort.py index ba55672..d2e1da0 100755 --- a/scripts/git_sort/git_sort.py +++ b/scripts/git_sort/git_sort.py @@ -222,7 +222,7 @@ remotes = ( Head(RepoURL("mkp/scsi.git"), "fixes"), Head(RepoURL("git://git.kernel.dk/linux-block.git"), "for-next"), Head(RepoURL("git://git.kernel.org/pub/scm/virt/kvm/kvm.git"), "queue"), - Head(RepoURL("git://git.infradead.org/nvme.git"), "nvme-5.13"), + Head(RepoURL("git://git.infradead.org/nvme.git"), "nvme-5.15"), Head(RepoURL("dhowells/linux-fs.git")), Head(RepoURL("herbert/cryptodev-2.6.git")), Head(RepoURL("helgaas/pci.git"), "next"), diff --git a/scripts/patch-tag-template b/scripts/patch-tag-template index 3535523..4551bc3 100644 --- a/scripts/patch-tag-template +++ b/scripts/patch-tag-template @@ -1,7 +1,6 @@ # Here all the valid metadata tags for suse patches are recorded. Patches # in the SUSE kernel repository must have a Subject tag, and there must -# be a suse or novell email address in a From:, Signed-off-by: or Acked-by: -# tag. +# be a suse email address in a From:, Signed-off-by: or Acked-by: tag. # # Every patch in the SUSE repository must have a descriptive comment in # addition to any tags present diff --git a/scripts/python/suse_git/header.py b/scripts/python/suse_git/header.py index 262dfc8..c12697d 100755 --- a/scripts/python/suse_git/header.py +++ b/scripts/python/suse_git/header.py @@ -108,7 +108,7 @@ tag_map = { 'accepted' : [ { 'name' : 'SUSE', - 'match' : '.*@(suse\.(com|de|cz)|novell.com)', + 'match' : '.*@suse\.(com|de|cz)', }, { 'match' : '.*', @@ -120,7 +120,7 @@ tag_map = { 'accepted' : [ { 'name' : 'SUSE', - 'match' : '.*@(suse\.(com|de|cz)|novell.com)', + 'match' : '.*@suse\.(com|de|cz)', }, { 'match' : '.*', @@ -132,7 +132,7 @@ tag_map = { 'accepted' : [ { 'name' : 'SUSE', - 'match' : '.*@(suse\.(com|de|cz)|novell.com)', + 'match' : '.*@suse\.(com|de|cz)', }, { 'match' : '.*', @@ -145,7 +145,7 @@ tag_map = { 'accepted' : [ { 'name' : 'SUSE', - 'match' : '.*@(suse\.(com|de|cz)|novell.com)', + 'match' : '.*@suse\.(com|de|cz)', }, { 'match' : '.*', diff --git a/scripts/run_oldconfig.sh b/scripts/run_oldconfig.sh index d939568..429bf0a 100755 --- a/scripts/run_oldconfig.sh +++ b/scripts/run_oldconfig.sh @@ -424,6 +424,7 @@ for config in $config_files; do if [ -d scripts/dummy-tools ] ; then MAKE_ARGS="$MAKE_ARGS CROSS_COMPILE=scripts/dummy-tools/" chmod 755 scripts/dummy-tools/* + chmod 755 scripts/* fi if $silent; then MAKE_ARGS="$MAKE_ARGS -s" diff --git a/series.conf b/series.conf index 0c2e589..5f11e16 100644 --- a/series.conf +++ b/series.conf @@ -20849,6 +20849,7 @@ patches.suse/0045-crypto-inside-secure-make-function-safexcel_try_push.patch patches.suse/crypto-aesni-handle-zero-length-dst-buffer patches.suse/crypto-sha3-generic-fixes-for-alignment-and-big-endi + patches.suse/crypto-picoxcell-Fix-error-handling-in-spacc_probe.patch patches.suse/net-thunderx-Set-max-queue-count-taking-XDP_TX-into-.patch patches.suse/netxen-remove-timespec-usage.patch patches.suse/vmxnet3-increase-default-rx-ring-sizes.patch @@ -47192,6 +47193,7 @@ patches.suse/0003-ocfs2-fix-the-application-IO-timeout-when-fstrim-is-.patch patches.suse/mm-replace-all-open-encodings-for-NUMA_NO_NODE.patch patches.suse/mm-vmalloc-fix-size-check-for-remap_vmalloc_range_partial + patches.suse/mm-memory.c-do_fault-avoid-usage-of-stale-vm_area_st.patch patches.suse/intel_idle-add-support-for-Jacobsville.patch patches.suse/powercap-intel_rapl-add-support-for-Jacobsville.patch patches.suse/powercap-intel_rapl-add-Ice-Lake-mobile.patch @@ -57246,6 +57248,7 @@ patches.suse/crypto-algif_skcipher-EBUSY-on-aio-should-be-an-erro.patch patches.suse/crypto-mediatek-Fix-wrong-return-value-in-mtk_desc_r.patch patches.suse/crypto-ixp4xx-Fix-the-size-used-in-a-dma_free_cohere.patch + patches.suse/crypto-picoxcell-Fix-potential-race-condition-bug.patch patches.suse/crypto-omap-sham-fix-digcnt-register-handling-with-e.patch patches.suse/cypto-mediatek-fix-leaks-in-mtk_desc_ring_alloc.patch patches.suse/crypto-ccp-fix-error-handling.patch @@ -59185,6 +59188,7 @@ patches.suse/thermal-drivers-ti-soc-thermal-bandgap-Remove-unused.patch patches.suse/PCI-Release-OF-node-in-pci_scan_device-s-error-path.patch patches.suse/ACPI-hotplug-PCI-Fix-reference-count-leak-in-enable_.patch + patches.suse/PCI-endpoint-Fix-missing-destroy_workqueue.patch patches.suse/ACPI-custom_method-fix-potential-use-after-free-issu.patch patches.suse/ACPI-custom_method-fix-a-possible-memory-leak.patch patches.suse/ftrace-handle-commands-when-closing-set_ftrace_filter-file.patch @@ -59681,6 +59685,8 @@ patches.suse/cfg80211-Fix-possible-memory-leak-in-function-cfg802.patch patches.suse/can-raw-raw_setsockopt-fix-raw_rcv-panic-for-sock-UA.patch patches.suse/nfc-nfcsim-fix-use-after-free-during-module-unload.patch + patches.suse/bpf-Introduce-BPF-nospec-instruction-for-mitigating-.patch + patches.suse/bpf-Fix-leakage-due-to-insufficient-speculative-stor.patch patches.suse/can-hi311x-fix-a-signedness-bug-in-hi3110_cmd.patch patches.suse/can-mcba_usb_start-add-missing-urb-transfer_dma-init.patch patches.suse/can-usb_8dev-fix-memory-leak.patch @@ -59713,12 +59719,14 @@ patches.suse/PCI-MSI-Do-not-set-invalid-bits-in-MSI-mask.patch patches.suse/PCI-MSI-Correct-misleading-comments.patch patches.suse/PCI-MSI-Use-msi_mask_irq-in-pci_msi_shutdown.patch + patches.suse/RDMA-efa-Free-IRQ-vectors-on-error-flow.patch patches.suse/can-usb-esd_usb2-esd_usb2_rx_event-fix-the-interchan.patch patches.suse/Revert-USB-serial-ch341-fix-character-loss-at-high-t.patch patches.suse/vt_kdsetmode-extend-console-locking.patch patches.suse/spi-spi-fsl-dspi-Fix-issue-with-uninitialized-dma_sl.patch patches.suse/spi-spi-pic32-Fix-issue-with-uninitialized-dma_slave.patch patches.suse/power-supply-max17042-handle-fails-of-reading-status.patch + patches.suse/crypto-qat-use-proper-type-for-vf_mask.patch patches.suse/PCI-PM-Enable-PME-if-it-can-be-signaled-from-D3cold.patch patches.suse/mmc-dw_mmc-Fix-issue-with-uninitialized-dma_slave_co.patch patches.suse/mmc-moxart-Fix-issue-with-uninitialized-dma_slave_co.patch @@ -59728,6 +59736,7 @@ patches.suse/i2c-mt65xx-fix-IRQ-check.patch patches.suse/Bluetooth-sco-prevent-information-leak-in-sco_conn_d.patch patches.suse/Bluetooth-increase-BTNAMSIZ-to-21-chars-to-fix-poten.patch + patches.suse/Bluetooth-schedule-SCO-timeouts-with-delayed_work.patch patches.suse/Bluetooth-avoid-circular-locks-in-sco_sock_connect.patch patches.suse/Bluetooth-switch-to-lock_sock-in-SCO.patch patches.suse/Bluetooth-fix-repeated-calls-to-sco_sock_kill.patch @@ -59737,6 +59746,7 @@ patches.suse/ath6kl-wmi-fix-an-error-code-in-ath6kl_wmi_sync_poin.patch patches.suse/bcma-Fix-memory-leak-for-internally-handled-cores.patch patches.suse/media-go7007-fix-memory-leak-in-go7007_usb_probe.patch + patches.suse/RDMA-efa-Remove-double-QP-type-assignment.patch patches.suse/scsi-qla2xxx-Remove-redundant-continue-statement-in-.patch patches.suse/scsi-lpfc-Fix-NVMe-support-reporting-in-log-message.patch patches.suse/scsi-lpfc-Remove-use-of-kmalloc-in-trace-event-loggi.patch @@ -59784,9 +59794,11 @@ patches.suse/scsi-qla2xxx-Remove-redundant-initialization-of-vari.patch patches.suse/scsi-ibmvfc-Do-not-wait-for-initial-device-scan.patch patches.suse/ocfs2-ocfs2_downconvert_lock-failure-results-in-dead.patch - patches.suse/memcg-enable-accounting-for-file-lock-caches.patch patches.suse/memcg-enable-accounting-of-ipc-resources.patch patches.suse/mm-vmscan-guarantee-drop_slab_node-termination.patch + patches.suse/SUNRPC-Simplify-socket-shutdown-when-not-reusing-TCP.patch + patches.suse/SUNRPC-improve-error-response-to-over-size-gss-crede.patch + patches.suse/time-Handle-negative-seconds-correctly-in-timespec64.patch # dhowells/linux-fs keys-uefi patches.suse/0001-KEYS-Allow-unrestricted-boot-time-addition-of-keys-t.patch @@ -59816,6 +59828,7 @@ patches.suse/blk-mq-move-_blk_mq_update_nr_hw_queues-synchronize_rcu-call patches.suse/proc-Avoid-mixing-integer-types-in-mem_rw.patch patches.suse/scsi-smartpqi-create-module-parameters-for-LUN-reset.patch + patches.suse/ipc-remove-memcg-accounting-for-sops-objects.patch ######################################################## # end of sorted patches @@ -60724,6 +60737,7 @@ patches.kabi/kabi-fix-nvme_wait_freeze_timeout-return-type.patch patches.kabi/kabi-fix-after-kvm-vcpu-id-array-fix.patch patches.kabi/kabi-mask-changes-to-vhost_dev_init-and-struct-vhost.patch + patches.kabi/kabi-fix-bpf_insn_aux_data-revert-sanitize_stack_spill.patch ######################################################## # You'd better have a good reason for adding a patch