Blob Blame History Raw
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Tue, 22 Oct 2019 15:57:23 +0200
Subject: bpf: Fix use after free in subprog's jited symbol removal
Patch-mainline: v5.4-rc6
Git-commit: cd7455f1013ef96d5cbf5c05d2b7c06f273810a6
References: bsc#1109837

syzkaller managed to trigger the following crash:

  [...]
  BUG: unable to handle page fault for address: ffffc90001923030
  #PF: supervisor read access in kernel mode
  #PF: error_code(0x0000) - not-present page
  PGD aa551067 P4D aa551067 PUD aa552067 PMD a572b067 PTE 80000000a1173163
  Oops: 0000 [#1] PREEMPT SMP KASAN
  CPU: 0 PID: 7982 Comm: syz-executor912 Not tainted 5.4.0-rc3+ #0
  Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
  RIP: 0010:bpf_jit_binary_hdr include/linux/filter.h:787 [inline]
  RIP: 0010:bpf_get_prog_addr_region kernel/bpf/core.c:531 [inline]
  RIP: 0010:bpf_tree_comp kernel/bpf/core.c:600 [inline]
  RIP: 0010:__lt_find include/linux/rbtree_latch.h:115 [inline]
  RIP: 0010:latch_tree_find include/linux/rbtree_latch.h:208 [inline]
  RIP: 0010:bpf_prog_kallsyms_find kernel/bpf/core.c:674 [inline]
  RIP: 0010:is_bpf_text_address+0x184/0x3b0 kernel/bpf/core.c:709
  [...]
  Call Trace:
   kernel_text_address kernel/extable.c:147 [inline]
   __kernel_text_address+0x9a/0x110 kernel/extable.c:102
   unwind_get_return_address+0x4c/0x90 arch/x86/kernel/unwind_frame.c:19
   arch_stack_walk+0x98/0xe0 arch/x86/kernel/stacktrace.c:26
   stack_trace_save+0xb6/0x150 kernel/stacktrace.c:123
   save_stack mm/kasan/common.c:69 [inline]
   set_track mm/kasan/common.c:77 [inline]
   __kasan_kmalloc+0x11c/0x1b0 mm/kasan/common.c:510
   kasan_slab_alloc+0xf/0x20 mm/kasan/common.c:518
   slab_post_alloc_hook mm/slab.h:584 [inline]
   slab_alloc mm/slab.c:3319 [inline]
   kmem_cache_alloc+0x1f5/0x2e0 mm/slab.c:3483
   getname_flags+0xba/0x640 fs/namei.c:138
   getname+0x19/0x20 fs/namei.c:209
   do_sys_open+0x261/0x560 fs/open.c:1091
   __do_sys_open fs/open.c:1115 [inline]
   __se_sys_open fs/open.c:1110 [inline]
   __x64_sys_open+0x87/0x90 fs/open.c:1110
   do_syscall_64+0xf7/0x1c0 arch/x86/entry/common.c:290
   entry_SYSCALL_64_after_hwframe+0x49/0xbe
  [...]

After further debugging it turns out that we walk kallsyms while in parallel
we tear down a BPF program which contains subprograms that have been JITed
though the program itself has not been fully exposed and is eventually bailing
out with error.

The bpf_prog_kallsyms_del_subprogs() in bpf_prog_load()'s error path removes
the symbols, however, bpf_prog_free() tears down the JIT memory too early via
scheduled work. Instead, it needs to properly respect RCU grace period as the
kallsyms walk for BPF is under RCU.

Fix it by refactoring __bpf_prog_put()'s tear down and reuse it in our error
path where we defer final destruction when we have subprogs in the program.

Fixes: 7d1982b4e335 ("bpf: fix panic in prog load calls cleanup")
Fixes: 1c2a088a6626 ("bpf: x64: add JIT support for multi-function programs")
Reported-by: syzbot+710043c5d1d5b5013bc7@syzkaller.appspotmail.com
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Tested-by: syzbot+710043c5d1d5b5013bc7@syzkaller.appspotmail.com
Link: https://lore.kernel.org/bpf/55f6367324c2d7e9583fa9ccf5385dcbba0d7a6e.1571752452.git.daniel@iogearbox.net
Acked-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
---
 include/linux/filter.h |    1 -
 kernel/bpf/core.c      |    2 +-
 kernel/bpf/syscall.c   |   22 +++++++++++++++++-----
 3 files changed, 18 insertions(+), 7 deletions(-)

--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -973,7 +973,6 @@ static inline void bpf_prog_kallsyms_del
 }
 #endif /* CONFIG_BPF_JIT */
 
-void bpf_prog_kallsyms_del_subprogs(struct bpf_prog *fp);
 void bpf_prog_kallsyms_del_all(struct bpf_prog *fp);
 
 #define BPF_ANC		BIT(15)
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -355,7 +355,7 @@ struct bpf_prog *bpf_patch_insn_single(s
 	return prog_adj;
 }
 
-void bpf_prog_kallsyms_del_subprogs(struct bpf_prog *fp)
+static void bpf_prog_kallsyms_del_subprogs(struct bpf_prog *fp)
 {
 	int i;
 
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1076,14 +1076,22 @@ static void __bpf_prog_put_rcu(struct rc
 	bpf_prog_free(aux->prog);
 }
 
+static void __bpf_prog_put_noref(struct bpf_prog *prog, bool deferred)
+{
+	bpf_prog_kallsyms_del_all(prog);
+
+	if (deferred)
+		call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu);
+	else
+		__bpf_prog_put_rcu(&prog->aux->rcu);
+}
+
 static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock)
 {
 	if (atomic_dec_and_test(&prog->aux->refcnt)) {
 		/* bpf_prog_free_id() must be called first */
 		bpf_prog_free_id(prog, do_idr_lock);
-		bpf_prog_kallsyms_del_all(prog);
-
-		call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu);
+		__bpf_prog_put_noref(prog, true);
 	}
 }
 
@@ -1419,8 +1427,12 @@ static int bpf_prog_load(union bpf_attr
 	return err;
 
 free_used_maps:
-	bpf_prog_kallsyms_del_subprogs(prog);
-	free_used_maps(prog->aux);
+	/* In case we have subprogs, we need to wait for a grace
+	 * period before we can tear down JIT memory since symbols
+	 * are already exposed under kallsyms.
+	 */
+	__bpf_prog_put_noref(prog, prog->aux->func_cnt);
+	return err;
 free_prog:
 	bpf_prog_uncharge_memlock(prog);
 free_prog_nouncharge: