|
Gary Lin |
be700f |
From: Daniel Borkmann <daniel@iogearbox.net>
|
|
Gary Lin |
be700f |
Date: Tue, 9 Feb 2021 18:46:10 +0000
|
|
Gary Lin |
be700f |
Subject: bpf: Fix 32 bit src register truncation on div/mod
|
|
Gary Lin |
be700f |
Patch-mainline: v5.11
|
|
Gary Lin |
be700f |
Git-commit: e88b2c6e5a4d9ce30d75391e4d950da74bb2bd90
|
|
Gary Lin |
be700f |
References: bsc#1184170
|
|
Gary Lin |
be700f |
|
|
Gary Lin |
be700f |
While reviewing a different fix, John and I noticed an oddity in one of the
|
|
Gary Lin |
be700f |
BPF program dumps that stood out, for example:
|
|
Gary Lin |
be700f |
|
|
Gary Lin |
be700f |
# bpftool p d x i 13
|
|
Gary Lin |
be700f |
0: (b7) r0 = 808464450
|
|
Gary Lin |
be700f |
1: (b4) w4 = 808464432
|
|
Gary Lin |
be700f |
2: (bc) w0 = w0
|
|
Gary Lin |
be700f |
3: (15) if r0 == 0x0 goto pc+1
|
|
Gary Lin |
be700f |
4: (9c) w4 %= w0
|
|
Gary Lin |
be700f |
[...]
|
|
Gary Lin |
be700f |
|
|
Gary Lin |
be700f |
In line 2 we noticed that the mov32 would 32 bit truncate the original src
|
|
Gary Lin |
be700f |
register for the div/mod operation. While for the two operations the dst
|
|
Gary Lin |
be700f |
register is typically marked unknown e.g. from adjust_scalar_min_max_vals()
|
|
Gary Lin |
be700f |
the src register is not, and thus verifier keeps tracking original bounds,
|
|
Gary Lin |
be700f |
simplified:
|
|
Gary Lin |
be700f |
|
|
Gary Lin |
be700f |
0: R1=ctx(id=0,off=0,imm=0) R10=fp0
|
|
Gary Lin |
be700f |
0: (b7) r0 = -1
|
|
Gary Lin |
be700f |
1: R0_w=invP-1 R1=ctx(id=0,off=0,imm=0) R10=fp0
|
|
Gary Lin |
be700f |
1: (b7) r1 = -1
|
|
Gary Lin |
be700f |
2: R0_w=invP-1 R1_w=invP-1 R10=fp0
|
|
Gary Lin |
be700f |
2: (3c) w0 /= w1
|
|
Gary Lin |
be700f |
3: R0_w=invP(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R1_w=invP-1 R10=fp0
|
|
Gary Lin |
be700f |
3: (77) r1 >>= 32
|
|
Gary Lin |
be700f |
4: R0_w=invP(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R1_w=invP4294967295 R10=fp0
|
|
Gary Lin |
be700f |
4: (bf) r0 = r1
|
|
Gary Lin |
be700f |
5: R0_w=invP4294967295 R1_w=invP4294967295 R10=fp0
|
|
Gary Lin |
be700f |
5: (95) exit
|
|
Gary Lin |
be700f |
processed 6 insns (limit 1000000) max_states_per_insn 0 total_states 0 peak_states 0 mark_read 0
|
|
Gary Lin |
be700f |
|
|
Gary Lin |
be700f |
Runtime result of r0 at exit is 0 instead of expected -1. Remove the
|
|
Gary Lin |
be700f |
verifier mov32 src rewrite in div/mod and replace it with a jmp32 test
|
|
Gary Lin |
be700f |
instead. After the fix, we result in the following code generation when
|
|
Gary Lin |
be700f |
having dividend r1 and divisor r6:
|
|
Gary Lin |
be700f |
|
|
Gary Lin |
be700f |
div, 64 bit: div, 32 bit:
|
|
Gary Lin |
be700f |
|
|
Gary Lin |
be700f |
0: (b7) r6 = 8 0: (b7) r6 = 8
|
|
Gary Lin |
be700f |
1: (b7) r1 = 8 1: (b7) r1 = 8
|
|
Gary Lin |
be700f |
2: (55) if r6 != 0x0 goto pc+2 2: (56) if w6 != 0x0 goto pc+2
|
|
Gary Lin |
be700f |
3: (ac) w1 ^= w1 3: (ac) w1 ^= w1
|
|
Gary Lin |
be700f |
4: (05) goto pc+1 4: (05) goto pc+1
|
|
Gary Lin |
be700f |
5: (3f) r1 /= r6 5: (3c) w1 /= w6
|
|
Gary Lin |
be700f |
6: (b7) r0 = 0 6: (b7) r0 = 0
|
|
Gary Lin |
be700f |
7: (95) exit 7: (95) exit
|
|
Gary Lin |
be700f |
|
|
Gary Lin |
be700f |
mod, 64 bit: mod, 32 bit:
|
|
Gary Lin |
be700f |
|
|
Gary Lin |
be700f |
0: (b7) r6 = 8 0: (b7) r6 = 8
|
|
Gary Lin |
be700f |
1: (b7) r1 = 8 1: (b7) r1 = 8
|
|
Gary Lin |
be700f |
2: (15) if r6 == 0x0 goto pc+1 2: (16) if w6 == 0x0 goto pc+1
|
|
Gary Lin |
be700f |
3: (9f) r1 %= r6 3: (9c) w1 %= w6
|
|
Gary Lin |
be700f |
4: (b7) r0 = 0 4: (b7) r0 = 0
|
|
Gary Lin |
be700f |
5: (95) exit 5: (95) exit
|
|
Gary Lin |
be700f |
|
|
Gary Lin |
be700f |
x86 in particular can throw a 'divide error' exception for div
|
|
Gary Lin |
be700f |
instruction not only for divisor being zero, but also for the case
|
|
Gary Lin |
be700f |
when the quotient is too large for the designated register. For the
|
|
Gary Lin |
be700f |
edx:eax and rdx:rax dividend pair it is not an issue in x86 BPF JIT
|
|
Gary Lin |
be700f |
since we always zero edx (rdx). Hence really the only protection
|
|
Gary Lin |
be700f |
needed is against divisor being zero.
|
|
Gary Lin |
be700f |
|
|
Gary Lin |
be700f |
Fixes: 68fda450a7df ("bpf: fix 32-bit divide by zero")
|
|
Gary Lin |
be700f |
Co-developed-by: John Fastabend <john.fastabend@gmail.com>
|
|
Gary Lin |
be700f |
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
|
|
Gary Lin |
be700f |
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
|
|
Gary Lin |
be700f |
Acked-by: Alexei Starovoitov <ast@kernel.org>
|
|
Gary Lin |
be700f |
Acked-by: Gary Lin <glin@suse.com>
|
|
Gary Lin |
be700f |
|
|
Gary Lin |
be700f |
NOTE from Gary:
|
|
Gary Lin |
be700f |
* 'BPF_JMP32' in the original commit is removed since the instruction is
|
|
Gary Lin |
be700f |
not defined in this branch.
|
|
Gary Lin |
be700f |
* 'is64' is removed since there is no user of the variable after removing
|
|
Gary Lin |
be700f |
the diff for 'BPF_JMP32'.
|
|
Gary Lin |
be700f |
|
|
Gary Lin |
be700f |
---
|
|
Gary Lin |
be700f |
kernel/bpf/verifier.c | 29 +++++++++++++----------------
|
|
Gary Lin |
be700f |
1 file changed, 13 insertions(+), 16 deletions(-)
|
|
Gary Lin |
be700f |
|
|
Gary Lin |
be700f |
--- a/kernel/bpf/verifier.c
|
|
Gary Lin |
be700f |
+++ b/kernel/bpf/verifier.c
|
|
Gary Lin |
be700f |
@@ -4628,31 +4628,28 @@ static int fixup_bpf_calls(struct bpf_ve
|
|
Gary Lin |
be700f |
insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
|
|
Gary Lin |
be700f |
insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
|
|
Gary Lin |
be700f |
insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
|
|
Gary Lin |
be700f |
- bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
|
|
Gary Lin |
be700f |
- struct bpf_insn mask_and_div[] = {
|
|
Gary Lin |
be700f |
- BPF_MOV32_REG(insn->src_reg, insn->src_reg),
|
|
Gary Lin |
be700f |
+ bool isdiv = BPF_OP(insn->code) == BPF_DIV;
|
|
Gary Lin |
be700f |
+ struct bpf_insn *patchlet;
|
|
Gary Lin |
be700f |
+ struct bpf_insn chk_and_div[] = {
|
|
Gary Lin |
be700f |
/* Rx div 0 -> 0 */
|
|
Gary Lin |
be700f |
- BPF_JMP_IMM(BPF_JNE, insn->src_reg, 0, 2),
|
|
Gary Lin |
be700f |
+ BPF_RAW_INSN(BPF_JMP |
|
|
Gary Lin |
be700f |
+ BPF_JNE | BPF_K, insn->src_reg,
|
|
Gary Lin |
be700f |
+ 0, 2, 0),
|
|
Gary Lin |
be700f |
BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg),
|
|
Gary Lin |
be700f |
BPF_JMP_IMM(BPF_JA, 0, 0, 1),
|
|
Gary Lin |
be700f |
*insn,
|
|
Gary Lin |
be700f |
};
|
|
Gary Lin |
be700f |
- struct bpf_insn mask_and_mod[] = {
|
|
Gary Lin |
be700f |
- BPF_MOV32_REG(insn->src_reg, insn->src_reg),
|
|
Gary Lin |
be700f |
+ struct bpf_insn chk_and_mod[] = {
|
|
Gary Lin |
be700f |
/* Rx mod 0 -> Rx */
|
|
Gary Lin |
be700f |
- BPF_JMP_IMM(BPF_JEQ, insn->src_reg, 0, 1),
|
|
Gary Lin |
be700f |
+ BPF_RAW_INSN(BPF_JMP |
|
|
Gary Lin |
be700f |
+ BPF_JEQ | BPF_K, insn->src_reg,
|
|
Gary Lin |
be700f |
+ 0, 1, 0),
|
|
Gary Lin |
be700f |
*insn,
|
|
Gary Lin |
be700f |
};
|
|
Gary Lin |
be700f |
- struct bpf_insn *patchlet;
|
|
Gary Lin |
be700f |
|
|
Gary Lin |
be700f |
- if (insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
|
|
Gary Lin |
be700f |
- insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
|
|
Gary Lin |
be700f |
- patchlet = mask_and_div + (is64 ? 1 : 0);
|
|
Gary Lin |
be700f |
- cnt = ARRAY_SIZE(mask_and_div) - (is64 ? 1 : 0);
|
|
Gary Lin |
be700f |
- } else {
|
|
Gary Lin |
be700f |
- patchlet = mask_and_mod + (is64 ? 1 : 0);
|
|
Gary Lin |
be700f |
- cnt = ARRAY_SIZE(mask_and_mod) - (is64 ? 1 : 0);
|
|
Gary Lin |
be700f |
- }
|
|
Gary Lin |
be700f |
+ patchlet = isdiv ? chk_and_div : chk_and_mod;
|
|
Gary Lin |
be700f |
+ cnt = isdiv ? ARRAY_SIZE(chk_and_div) :
|
|
Gary Lin |
be700f |
+ ARRAY_SIZE(chk_and_mod);
|
|
Gary Lin |
be700f |
|
|
Gary Lin |
be700f |
new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt);
|
|
Gary Lin |
be700f |
if (!new_prog)
|