From: Jie Meng <jmeng@fb.com>
Date: Fri, 1 Oct 2021 20:56:26 -0700
Subject: bpf, x64: Save bytes for DIV by reducing reg copies
Patch-mainline: v5.16-rc1
Git-commit: 57a610f1c58fa493315e1c24eef6d992cdf4c4a9
References: jsc#PED-1377
Instead of unconditionally performing push/pop on %rax/%rdx in case of
division/modulo, we can save a few bytes in case of destination register
being either BPF r0 (%rax) or r3 (%rdx) since the result is written in
there anyway.
Also, we do not need to copy the source to %r11 unless the source is either
%rax, %rdx or an immediate.
For example, before the patch:
22: push %rax
23: push %rdx
24: mov %rsi,%r11
27: xor %edx,%edx
29: div %r11
2c: mov %rax,%r11
2f: pop %rdx
30: pop %rax
31: mov %r11,%rax
After:
22: push %rdx
23: xor %edx,%edx
25: div %rsi
28: pop %rdx
Signed-off-by: Jie Meng <jmeng@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Tested-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20211002035626.2041910-1-jmeng@fb.com
Acked-by: Shung-Hsi Yu <shung-hsi.yu@suse.com>
---
arch/x86/net/bpf_jit_comp.c | 71 +++++++++++++++++------------
tools/testing/selftests/bpf/verifier/jit.c | 47 +++++++++++++++++++
2 files changed, 89 insertions(+), 29 deletions(-)
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1002,19 +1002,30 @@ static int do_jit(struct bpf_prog *bpf_p
case BPF_ALU64 | BPF_MOD | BPF_X:
case BPF_ALU64 | BPF_DIV | BPF_X:
case BPF_ALU64 | BPF_MOD | BPF_K:
- case BPF_ALU64 | BPF_DIV | BPF_K:
- EMIT1(0x50); /* push rax */
- EMIT1(0x52); /* push rdx */
-
- if (BPF_SRC(insn->code) == BPF_X)
- /* mov r11, src_reg */
- EMIT_mov(AUX_REG, src_reg);
- else
+ case BPF_ALU64 | BPF_DIV | BPF_K: {
+ bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
+
+ if (dst_reg != BPF_REG_0)
+ EMIT1(0x50); /* push rax */
+ if (dst_reg != BPF_REG_3)
+ EMIT1(0x52); /* push rdx */
+
+ if (BPF_SRC(insn->code) == BPF_X) {
+ if (src_reg == BPF_REG_0 ||
+ src_reg == BPF_REG_3) {
+ /* mov r11, src_reg */
+ EMIT_mov(AUX_REG, src_reg);
+ src_reg = AUX_REG;
+ }
+ } else {
/* mov r11, imm32 */
EMIT3_off32(0x49, 0xC7, 0xC3, imm32);
+ src_reg = AUX_REG;
+ }
- /* mov rax, dst_reg */
- EMIT_mov(BPF_REG_0, dst_reg);
+ if (dst_reg != BPF_REG_0)
+ /* mov rax, dst_reg */
+ emit_mov_reg(&prog, is64, BPF_REG_0, dst_reg);
/*
* xor edx, edx
@@ -1022,26 +1033,28 @@ static int do_jit(struct bpf_prog *bpf_p
*/
EMIT2(0x31, 0xd2);
- if (BPF_CLASS(insn->code) == BPF_ALU64)
- /* div r11 */
- EMIT3(0x49, 0xF7, 0xF3);
- else
- /* div r11d */
- EMIT3(0x41, 0xF7, 0xF3);
-
- if (BPF_OP(insn->code) == BPF_MOD)
- /* mov r11, rdx */
- EMIT3(0x49, 0x89, 0xD3);
- else
- /* mov r11, rax */
- EMIT3(0x49, 0x89, 0xC3);
-
- EMIT1(0x5A); /* pop rdx */
- EMIT1(0x58); /* pop rax */
-
- /* mov dst_reg, r11 */
- EMIT_mov(dst_reg, AUX_REG);
+ if (is64)
+ EMIT1(add_1mod(0x48, src_reg));
+ else if (is_ereg(src_reg))
+ EMIT1(add_1mod(0x40, src_reg));
+ /* div src_reg */
+ EMIT2(0xF7, add_1reg(0xF0, src_reg));
+
+ if (BPF_OP(insn->code) == BPF_MOD &&
+ dst_reg != BPF_REG_3)
+ /* mov dst_reg, rdx */
+ emit_mov_reg(&prog, is64, dst_reg, BPF_REG_3);
+ else if (BPF_OP(insn->code) == BPF_DIV &&
+ dst_reg != BPF_REG_0)
+ /* mov dst_reg, rax */
+ emit_mov_reg(&prog, is64, dst_reg, BPF_REG_0);
+
+ if (dst_reg != BPF_REG_3)
+ EMIT1(0x5A); /* pop rdx */
+ if (dst_reg != BPF_REG_0)
+ EMIT1(0x58); /* pop rax */
break;
+ }
case BPF_ALU | BPF_MUL | BPF_K:
case BPF_ALU64 | BPF_MUL | BPF_K:
--- a/tools/testing/selftests/bpf/verifier/jit.c
+++ b/tools/testing/selftests/bpf/verifier/jit.c
@@ -103,6 +103,53 @@
.retval = 2,
},
{
+ "jit: various div tests",
+ .insns = {
+ BPF_LD_IMM64(BPF_REG_2, 0xefeffeULL),
+ BPF_LD_IMM64(BPF_REG_0, 0xeeff0d413122ULL),
+ BPF_LD_IMM64(BPF_REG_1, 0xfefeeeULL),
+ BPF_ALU64_REG(BPF_DIV, BPF_REG_0, BPF_REG_1),
+ BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_2, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LD_IMM64(BPF_REG_3, 0xeeff0d413122ULL),
+ BPF_ALU64_IMM(BPF_DIV, BPF_REG_3, 0xfefeeeULL),
+ BPF_JMP_REG(BPF_JEQ, BPF_REG_3, BPF_REG_2, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LD_IMM64(BPF_REG_2, 0xaa93ULL),
+ BPF_ALU64_IMM(BPF_MOD, BPF_REG_1, 0xbeefULL),
+ BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LD_IMM64(BPF_REG_1, 0xfefeeeULL),
+ BPF_LD_IMM64(BPF_REG_3, 0xbeefULL),
+ BPF_ALU64_REG(BPF_MOD, BPF_REG_1, BPF_REG_3),
+ BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LD_IMM64(BPF_REG_2, 0x5ee1dULL),
+ BPF_LD_IMM64(BPF_REG_1, 0xfefeeeULL),
+ BPF_LD_IMM64(BPF_REG_3, 0x2bULL),
+ BPF_ALU32_REG(BPF_DIV, BPF_REG_1, BPF_REG_3),
+ BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_ALU32_REG(BPF_DIV, BPF_REG_1, BPF_REG_1),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 1, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_REG(BPF_MOD, BPF_REG_2, BPF_REG_2),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_2, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .retval = 2,
+},
+{
"jit: jsgt, jslt",
.insns = {
BPF_LD_IMM64(BPF_REG_1, 0x80000000ULL),