From: Kui-Feng Lee <kuifeng@fb.com>
Date: Tue, 10 May 2022 13:59:20 -0700
Subject: bpf, x86: Create bpf_tramp_run_ctx on the caller thread's stack
Patch-mainline: v5.19-rc1
Git-commit: e384c7b7b46d0a5f4bf3c554f963e6e9622d0ab1
References: jsc#PED-1377
X-info: context change in arch_prepare_bpf_trampoline() due to 589127105588 "x86/ibt,bpf: Add ENDBR instructions to prologue and trampoline" not backported
BPF trampolines will create a bpf_tramp_run_ctx, a bpf_run_ctx, on
stacks and set/reset the current bpf_run_ctx before/after calling a
bpf_prog.
Signed-off-by: Kui-Feng Lee <kuifeng@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20220510205923.3206889-3-kuifeng@fb.com
Acked-by: Shung-Hsi Yu <shung-hsi.yu@suse.com>
---
arch/x86/net/bpf_jit_comp.c | 41 ++++++++++++++++++++++++++++++++---------
include/linux/bpf.h | 17 +++++++++++++----
kernel/bpf/syscall.c | 7 +++++--
kernel/bpf/trampoline.c | 20 ++++++++++++++++----
4 files changed, 66 insertions(+), 19 deletions(-)
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1744,14 +1744,30 @@ static void restore_regs(const struct bt
static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
struct bpf_tramp_link *l, int stack_size,
- bool save_ret)
+ int run_ctx_off, bool save_ret)
{
u8 *prog = *pprog;
u8 *jmp_insn;
+ int ctx_cookie_off = offsetof(struct bpf_tramp_run_ctx, bpf_cookie);
struct bpf_prog *p = l->link.prog;
+ /* mov rdi, 0 */
+ emit_mov_imm64(&prog, BPF_REG_1, 0, 0);
+
+ /* Prepare struct bpf_tramp_run_ctx.
+ *
+ * bpf_tramp_run_ctx is already preserved by
+ * arch_prepare_bpf_trampoline().
+ *
+ * mov QWORD PTR [rbp - run_ctx_off + ctx_cookie_off], rdi
+ */
+ emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_1, -run_ctx_off + ctx_cookie_off);
+
/* arg1: mov rdi, progs[i] */
emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, (u32) (long) p);
+ /* arg2: lea rsi, [rbp - ctx_cookie_off] */
+ EMIT4(0x48, 0x8D, 0x75, -run_ctx_off);
+
if (emit_call(&prog,
p->aux->sleepable ? __bpf_prog_enter_sleepable :
__bpf_prog_enter, prog))
@@ -1797,6 +1813,8 @@ static int invoke_bpf_prog(const struct
emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, (u32) (long) p);
/* arg2: mov rsi, rbx <- start time in nsec */
emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6);
+ /* arg3: lea rdx, [rbp - run_ctx_off] */
+ EMIT4(0x48, 0x8D, 0x55, -run_ctx_off);
if (emit_call(&prog,
p->aux->sleepable ? __bpf_prog_exit_sleepable :
__bpf_prog_exit, prog))
@@ -1834,14 +1852,14 @@ static int emit_cond_near_jump(u8 **ppro
static int invoke_bpf(const struct btf_func_model *m, u8 **pprog,
struct bpf_tramp_links *tl, int stack_size,
- bool save_ret)
+ int run_ctx_off, bool save_ret)
{
int i;
u8 *prog = *pprog;
for (i = 0; i < tl->nr_links; i++) {
if (invoke_bpf_prog(m, &prog, tl->links[i], stack_size,
- save_ret))
+ run_ctx_off, save_ret))
return -EINVAL;
}
*pprog = prog;
@@ -1850,7 +1868,7 @@ static int invoke_bpf(const struct btf_f
static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog,
struct bpf_tramp_links *tl, int stack_size,
- u8 **branches)
+ int run_ctx_off, u8 **branches)
{
u8 *prog = *pprog;
int i;
@@ -1861,7 +1879,7 @@ static int invoke_bpf_mod_ret(const stru
emit_mov_imm32(&prog, false, BPF_REG_0, 0);
emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8);
for (i = 0; i < tl->nr_links; i++) {
- if (invoke_bpf_prog(m, &prog, tl->links[i], stack_size, true))
+ if (invoke_bpf_prog(m, &prog, tl->links[i], stack_size, run_ctx_off, true))
return -EINVAL;
/* mod_ret prog stored return value into [rbp - 8]. Emit:
@@ -1967,7 +1985,7 @@ int arch_prepare_bpf_trampoline(struct b
void *orig_call)
{
int ret, i, nr_args = m->nr_args;
- int regs_off, ip_off, args_off, stack_size = nr_args * 8;
+ int regs_off, ip_off, args_off, stack_size = nr_args * 8, run_ctx_off;
struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
@@ -1997,6 +2015,8 @@ int arch_prepare_bpf_trampoline(struct b
* RBP - args_off [ args count ] always
*
* RBP - ip_off [ traced function ] BPF_TRAMP_F_IP_ARG flag
+ *
+ * RBP - run_ctx_off [ bpf_tramp_run_ctx ]
*/
/* room for return value of orig_call or fentry prog */
@@ -2015,6 +2035,9 @@ int arch_prepare_bpf_trampoline(struct b
ip_off = stack_size;
+ stack_size += (sizeof(struct bpf_tramp_run_ctx) + 7) & ~0x7;
+ run_ctx_off = stack_size;
+
if (flags & BPF_TRAMP_F_SKIP_FRAME)
/* skip patched call instruction and point orig_call to actual
* body of the kernel function.
@@ -2058,7 +2081,7 @@ int arch_prepare_bpf_trampoline(struct b
}
if (fentry->nr_links)
- if (invoke_bpf(m, &prog, fentry, regs_off,
+ if (invoke_bpf(m, &prog, fentry, regs_off, run_ctx_off,
flags & BPF_TRAMP_F_RET_FENTRY_RET))
return -EINVAL;
@@ -2069,7 +2092,7 @@ int arch_prepare_bpf_trampoline(struct b
return -ENOMEM;
if (invoke_bpf_mod_ret(m, &prog, fmod_ret, regs_off,
- branches)) {
+ run_ctx_off, branches)) {
ret = -EINVAL;
goto cleanup;
}
@@ -2106,7 +2129,7 @@ int arch_prepare_bpf_trampoline(struct b
}
if (fexit->nr_links)
- if (invoke_bpf(m, &prog, fexit, regs_off, false)) {
+ if (invoke_bpf(m, &prog, fexit, regs_off, run_ctx_off, false)) {
ret = -EINVAL;
goto cleanup;
}
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -731,6 +731,8 @@ struct bpf_tramp_links {
int nr_links;
};
+struct bpf_tramp_run_ctx;
+
/* Different use cases for BPF trampoline:
* 1. replace nop at the function entry (kprobe equivalent)
* flags = BPF_TRAMP_F_RESTORE_REGS
@@ -757,10 +759,11 @@ int arch_prepare_bpf_trampoline(struct b
struct bpf_tramp_links *tlinks,
void *orig_call);
/* these two functions are called from generated trampoline */
-u64 notrace __bpf_prog_enter(struct bpf_prog *prog);
-void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start);
-u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog);
-void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start);
+u64 notrace __bpf_prog_enter(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx);
+void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start, struct bpf_tramp_run_ctx *run_ctx);
+u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx);
+void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start,
+ struct bpf_tramp_run_ctx *run_ctx);
void notrace __bpf_tramp_enter(struct bpf_tramp_image *tr);
void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr);
@@ -1352,6 +1355,12 @@ struct bpf_trace_run_ctx {
u64 bpf_cookie;
};
+struct bpf_tramp_run_ctx {
+ struct bpf_run_ctx run_ctx;
+ u64 bpf_cookie;
+ struct bpf_run_ctx *saved_run_ctx;
+};
+
static inline struct bpf_run_ctx *bpf_set_run_ctx(struct bpf_run_ctx *new_ctx)
{
struct bpf_run_ctx *old_ctx = NULL;
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -5020,6 +5020,7 @@ static bool syscall_prog_is_valid_access
BPF_CALL_3(bpf_sys_bpf, int, cmd, union bpf_attr *, attr, u32, attr_size)
{
struct bpf_prog * __maybe_unused prog;
+ struct bpf_tramp_run_ctx __maybe_unused run_ctx;
switch (cmd) {
case BPF_MAP_CREATE:
@@ -5047,13 +5048,15 @@ BPF_CALL_3(bpf_sys_bpf, int, cmd, union
return -EINVAL;
}
- if (!__bpf_prog_enter_sleepable(prog)) {
+ run_ctx.bpf_cookie = 0;
+ run_ctx.saved_run_ctx = NULL;
+ if (!__bpf_prog_enter_sleepable(prog, &run_ctx)) {
/* recursion detected */
bpf_prog_put(prog);
return -EBUSY;
}
attr->test.retval = bpf_prog_run(prog, (void *) (long) attr->test.ctx_in);
- __bpf_prog_exit_sleepable(prog, 0 /* bpf_prog_run does runtime stats */);
+ __bpf_prog_exit_sleepable(prog, 0 /* bpf_prog_run does runtime stats */, &run_ctx);
bpf_prog_put(prog);
return 0;
#endif
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -580,11 +580,14 @@ static void notrace inc_misses_counter(s
* [2..MAX_U64] - execute bpf prog and record execution time.
* This is start time.
*/
-u64 notrace __bpf_prog_enter(struct bpf_prog *prog)
+u64 notrace __bpf_prog_enter(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx)
__acquires(RCU)
{
rcu_read_lock();
migrate_disable();
+
+ run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);
+
if (unlikely(__this_cpu_inc_return(*(prog->active)) != 1)) {
inc_misses_counter(prog);
return 0;
@@ -614,29 +617,38 @@ static void notrace update_prog_stats(st
}
}
-void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start)
+void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start, struct bpf_tramp_run_ctx *run_ctx)
__releases(RCU)
{
+ bpf_reset_run_ctx(run_ctx->saved_run_ctx);
+
update_prog_stats(prog, start);
__this_cpu_dec(*(prog->active));
migrate_enable();
rcu_read_unlock();
}
-u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog)
+u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx)
{
rcu_read_lock_trace();
migrate_disable();
might_fault();
+
if (unlikely(__this_cpu_inc_return(*(prog->active)) != 1)) {
inc_misses_counter(prog);
return 0;
}
+
+ run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);
+
return bpf_prog_start_time();
}
-void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start)
+void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start,
+ struct bpf_tramp_run_ctx *run_ctx)
{
+ bpf_reset_run_ctx(run_ctx->saved_run_ctx);
+
update_prog_stats(prog, start);
__this_cpu_dec(*(prog->active));
migrate_enable();