diff --git a/patches.suse/memcg-kmem-do-not-fail-__GFP_NOFAIL-charges.patch b/patches.suse/memcg-kmem-do-not-fail-__GFP_NOFAIL-charges.patch new file mode 100644 index 0000000..3e7e950 --- /dev/null +++ b/patches.suse/memcg-kmem-do-not-fail-__GFP_NOFAIL-charges.patch @@ -0,0 +1,87 @@ +From: Michal Hocko +Date: Wed, 25 Sep 2019 16:45:53 -0700 +Subject: memcg, kmem: do not fail __GFP_NOFAIL charges +Git-commit: e55d9d9bfb69405bd7615c0f8d229d8fafb3e9b8 +Patch-mainline: v5.4-rc1 +References: bsc#1204755 + +Thomas has noticed the following NULL ptr dereference when using cgroup +v1 kmem limit: +BUG: unable to handle kernel NULL pointer dereference at 0000000000000008 +PGD 0 +P4D 0 +Oops: 0000 [#1] PREEMPT SMP PTI +CPU: 3 PID: 16923 Comm: gtk-update-icon Not tainted 4.19.51 #42 +Hardware name: Gigabyte Technology Co., Ltd. Z97X-Gaming G1/Z97X-Gaming G1, BIOS F9 07/31/2015 +RIP: 0010:create_empty_buffers+0x24/0x100 +Code: cd 0f 1f 44 00 00 0f 1f 44 00 00 41 54 49 89 d4 ba 01 00 00 00 55 53 48 89 fb e8 97 fe ff ff 48 89 c5 48 89 c2 eb 03 48 89 ca <48> 8b 4a 08 4c 09 22 48 85 c9 75 f1 48 89 6a 08 48 8b 43 18 48 8d +RSP: 0018:ffff927ac1b37bf8 EFLAGS: 00010286 +RAX: 0000000000000000 RBX: fffff2d4429fd740 RCX: 0000000100097149 +RDX: 0000000000000000 RSI: 0000000000000082 RDI: ffff9075a99fbe00 +RBP: 0000000000000000 R08: fffff2d440949cc8 R09: 00000000000960c0 +R10: 0000000000000002 R11: 0000000000000000 R12: 0000000000000000 +R13: ffff907601f18360 R14: 0000000000002000 R15: 0000000000001000 +FS: 00007fb55b288bc0(0000) GS:ffff90761f8c0000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 0000000000000008 CR3: 000000007aebc002 CR4: 00000000001606e0 +Call Trace: + create_page_buffers+0x4d/0x60 + __block_write_begin_int+0x8e/0x5a0 + ? ext4_inode_attach_jinode.part.82+0xb0/0xb0 + ? jbd2__journal_start+0xd7/0x1f0 + ext4_da_write_begin+0x112/0x3d0 + generic_perform_write+0xf1/0x1b0 + ? file_update_time+0x70/0x140 + __generic_file_write_iter+0x141/0x1a0 + ext4_file_write_iter+0xef/0x3b0 + __vfs_write+0x17e/0x1e0 + vfs_write+0xa5/0x1a0 + ksys_write+0x57/0xd0 + do_syscall_64+0x55/0x160 + entry_SYSCALL_64_after_hwframe+0x44/0xa9 + +Tetsuo then noticed that this is because the __memcg_kmem_charge_memcg +fails __GFP_NOFAIL charge when the kmem limit is reached. This is a wrong +behavior because nofail allocations are not allowed to fail. Normal +charge path simply forces the charge even if that means to cross the +limit. Kmem accounting should be doing the same. + +Link: http://lkml.kernel.org/r/20190906125608.32129-1-mhocko@kernel.org +Signed-off-by: Michal Hocko +Reported-by: Thomas Lindroth +Debugged-by: Tetsuo Handa +Cc: Johannes Weiner +Cc: Vladimir Davydov +Cc: Andrey Ryabinin +Cc: Thomas Lindroth +Cc: Shakeel Butt +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Acked-by: Michal Koutný +--- + mm/memcontrol.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +diff --git a/mm/memcontrol.c b/mm/memcontrol.c +index 2156ef775d04..c313c49074ca 100644 +--- a/mm/memcontrol.c ++++ b/mm/memcontrol.c +@@ -2943,6 +2943,16 @@ int __memcg_kmem_charge_memcg(struct page *page, gfp_t gfp, int order, + + if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) && + !page_counter_try_charge(&memcg->kmem, nr_pages, &counter)) { ++ ++ /* ++ * Enforce __GFP_NOFAIL allocation because callers are not ++ * prepared to see failures and likely do not have any failure ++ * handling code. ++ */ ++ if (gfp & __GFP_NOFAIL) { ++ page_counter_charge(&memcg->kmem, nr_pages); ++ return 0; ++ } + cancel_charge(memcg, nr_pages); + return -ENOMEM; + } + diff --git a/series.conf b/series.conf index 2a20825..6defe8d 100644 --- a/series.conf +++ b/series.conf @@ -52671,6 +52671,7 @@ patches.suse/ceph-fix-directories-inode-i_blkbits-initialization.patch patches.suse/ceph-update-the-mtime-when-truncating-up.patch patches.suse/ceph-reconnect-connection-if-session-hang-in-opening-state.patch + patches.suse/memcg-kmem-do-not-fail-__GFP_NOFAIL-charges.patch patches.suse/kexec-bail-out-upon-sigkill-when-allocating-memory patches.suse/0001-xen-pci-reserve-MCFG-areas-earlier.patch patches.suse/s390-topology-avoid-firing-events-before-kobjs-are-created