diff --git a/patches.suse/powerpc-Avoid-nmi_enter-nmi_exit-in-real-mode-interr.patch b/patches.suse/powerpc-Avoid-nmi_enter-nmi_exit-in-real-mode-interr.patch new file mode 100644 index 0000000..83c7b33 --- /dev/null +++ b/patches.suse/powerpc-Avoid-nmi_enter-nmi_exit-in-real-mode-interr.patch @@ -0,0 +1,116 @@ +From 0db880fc865ffb522141ced4bfa66c12ab1fbb70 Mon Sep 17 00:00:00 2001 +From: Mahesh Salgaonkar +Date: Wed, 10 Apr 2024 10:00:06 +0530 +Subject: [PATCH] powerpc: Avoid nmi_enter/nmi_exit in real mode interrupt. + +References: bsc#1221645 ltc#205739 bsc#1223191 +Patch-mainline: queued +Git-repo: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git +Git-commit: 0db880fc865ffb522141ced4bfa66c12ab1fbb70 + +nmi_enter()/nmi_exit() touches per cpu variables which can lead to kernel +crash when invoked during real mode interrupt handling (e.g. early HMI/MCE +interrupt handler) if percpu allocation comes from vmalloc area. + +Early HMI/MCE handlers are called through DEFINE_INTERRUPT_HANDLER_NMI() +wrapper which invokes nmi_enter/nmi_exit calls. We don't see any issue when +percpu allocation is from the embedded first chunk. However with +CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK enabled there are chances where percpu +allocation can come from the vmalloc area. + +With kernel command line "percpu_alloc=page" we can force percpu allocation +to come from vmalloc area and can see kernel crash in machine_check_early: + +[ 1.215714] NIP [c000000000e49eb4] rcu_nmi_enter+0x24/0x110 +[ 1.215717] LR [c0000000000461a0] machine_check_early+0xf0/0x2c0 +[ 1.215719] --- interrupt: 200 +[ 1.215720] [c000000fffd73180] [0000000000000000] 0x0 (unreliable) +[ 1.215722] [c000000fffd731b0] [0000000000000000] 0x0 +[ 1.215724] [c000000fffd73210] [c000000000008364] machine_check_early_common+0x134/0x1f8 + +Fix this by avoiding use of nmi_enter()/nmi_exit() in real mode if percpu +first chunk is not embedded. + +Reviewed-by: Christophe Leroy +Tested-by: Shirisha Ganta +Signed-off-by: Mahesh Salgaonkar +Signed-off-by: Michael Ellerman +Link: https://msgid.link/20240410043006.81577-1-mahesh@linux.ibm.com +Acked-by: Michal Suchanek +--- + arch/powerpc/include/asm/interrupt.h | 10 ++++++++++ + arch/powerpc/include/asm/percpu.h | 10 ++++++++++ + arch/powerpc/kernel/setup_64.c | 2 ++ + 3 files changed, 22 insertions(+) + +diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h +index 7b610864b364..2d6c886b40f4 100644 +--- a/arch/powerpc/include/asm/interrupt.h ++++ b/arch/powerpc/include/asm/interrupt.h +@@ -336,6 +336,14 @@ static inline void interrupt_nmi_enter_prepare(struct pt_regs *regs, struct inte + if (IS_ENABLED(CONFIG_KASAN)) + return; + ++ /* ++ * Likewise, do not use it in real mode if percpu first chunk is not ++ * embedded. With CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK enabled there ++ * are chances where percpu allocation can come from vmalloc area. ++ */ ++ if (percpu_first_chunk_is_paged) ++ return; ++ + /* Otherwise, it should be safe to call it */ + nmi_enter(); + } +@@ -351,6 +359,8 @@ static inline void interrupt_nmi_exit_prepare(struct pt_regs *regs, struct inter + // no nmi_exit for a pseries hash guest taking a real mode exception + } else if (IS_ENABLED(CONFIG_KASAN)) { + // no nmi_exit for KASAN in real mode ++ } else if (percpu_first_chunk_is_paged) { ++ // no nmi_exit if percpu first chunk is not embedded + } else { + nmi_exit(); + } +diff --git a/arch/powerpc/include/asm/percpu.h b/arch/powerpc/include/asm/percpu.h +index 8e5b7d0b851c..634970ce13c6 100644 +--- a/arch/powerpc/include/asm/percpu.h ++++ b/arch/powerpc/include/asm/percpu.h +@@ -15,6 +15,16 @@ + #endif /* CONFIG_SMP */ + #endif /* __powerpc64__ */ + ++#if defined(CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK) && defined(CONFIG_SMP) ++#include ++DECLARE_STATIC_KEY_FALSE(__percpu_first_chunk_is_paged); ++ ++#define percpu_first_chunk_is_paged \ ++ (static_key_enabled(&__percpu_first_chunk_is_paged.key)) ++#else ++#define percpu_first_chunk_is_paged false ++#endif /* CONFIG_PPC64 && CONFIG_SMP */ ++ + #include + + #include +diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c +--- a/arch/powerpc/kernel/setup_64.c ++++ b/arch/powerpc/kernel/setup_64.c +@@ -834,6 +834,7 @@ static __init int pcpu_cpu_to_node(int cpu) + + unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; + EXPORT_SYMBOL(__per_cpu_offset); ++DEFINE_STATIC_KEY_FALSE(__percpu_first_chunk_is_paged); + + static void __init pcpu_populate_pte(unsigned long addr) + { +@@ -876,6 +877,7 @@ void __init setup_per_cpu_areas(void) + if (rc < 0) + panic("cannot initialize percpu area (err=%d)", rc); + ++ static_key_enable(&__percpu_first_chunk_is_paged.key); + delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; + for_each_possible_cpu(cpu) { + __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu]; +-- +2.44.0 + diff --git a/series.conf b/series.conf index 498fd8e..6bc488a 100644 --- a/series.conf +++ b/series.conf @@ -46242,6 +46242,9 @@ patches.suse/0001-stackdepot-rename-pool_index-to-pool_index_plus_1.patch patches.suse/iommu-vt-d-Allocate-local-memory-for-page-request-qu.patch + # powerpc/linux next + patches.suse/powerpc-Avoid-nmi_enter-nmi_exit-in-real-mode-interr.patch + # out-of-tree patches patches.suse/cifs-Fix-UAF-in-cifs_demultiplex_thread-.patch patches.suse/raid1-fix-use-after-free-for-original-bio-in-raid1_w.patch