Blob Blame History Raw
From: Jie Meng <jmeng@fb.com>
Date: Mon, 13 Sep 2021 14:13:37 -0700
Subject: bpf,x64 Emit IMUL instead of MUL for x86-64
Patch-mainline: v5.16-rc1
Git-commit: c0354077439bc9adcc9f2c96d6bbaf8b13748317
References: jsc#PED-1377

IMUL allows for multiple operands and saving and storing rax/rdx is no
longer needed. Signedness of the operands doesn't matter here because
the we only keep the lower 32/64 bit of the product for 32/64 bit
multiplications.

Signed-off-by: Jie Meng <jmeng@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210913211337.1564014-1-jmeng@fb.com
Acked-by: Shung-Hsi Yu <shung-hsi.yu@suse.com>
---
 arch/x86/net/bpf_jit_comp.c                |   53 ++++++++++++-----------------
 tools/testing/selftests/bpf/verifier/jit.c |   22 ++++++++++--
 2 files changed, 42 insertions(+), 33 deletions(-)

--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1044,41 +1044,34 @@ static int do_jit(struct bpf_prog *bpf_p
 			break;
 
 		case BPF_ALU | BPF_MUL | BPF_K:
-		case BPF_ALU | BPF_MUL | BPF_X:
 		case BPF_ALU64 | BPF_MUL | BPF_K:
-		case BPF_ALU64 | BPF_MUL | BPF_X:
-		{
-			bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
+			if (BPF_CLASS(insn->code) == BPF_ALU64)
+				EMIT1(add_2mod(0x48, dst_reg, dst_reg));
+			else if (is_ereg(dst_reg))
+				EMIT1(add_2mod(0x40, dst_reg, dst_reg));
 
-			if (dst_reg != BPF_REG_0)
-				EMIT1(0x50); /* push rax */
-			if (dst_reg != BPF_REG_3)
-				EMIT1(0x52); /* push rdx */
-
-			/* mov r11, dst_reg */
-			EMIT_mov(AUX_REG, dst_reg);
-
-			if (BPF_SRC(insn->code) == BPF_X)
-				emit_mov_reg(&prog, is64, BPF_REG_0, src_reg);
+			if (is_imm8(imm32))
+				/* imul dst_reg, dst_reg, imm8 */
+				EMIT3(0x6B, add_2reg(0xC0, dst_reg, dst_reg),
+				      imm32);
 			else
-				emit_mov_imm32(&prog, is64, BPF_REG_0, imm32);
-
-			if (is64)
-				EMIT1(add_1mod(0x48, AUX_REG));
-			else if (is_ereg(AUX_REG))
-				EMIT1(add_1mod(0x40, AUX_REG));
-			/* mul(q) r11 */
-			EMIT2(0xF7, add_1reg(0xE0, AUX_REG));
-
-			if (dst_reg != BPF_REG_3)
-				EMIT1(0x5A); /* pop rdx */
-			if (dst_reg != BPF_REG_0) {
-				/* mov dst_reg, rax */
-				EMIT_mov(dst_reg, BPF_REG_0);
-				EMIT1(0x58); /* pop rax */
-			}
+				/* imul dst_reg, dst_reg, imm32 */
+				EMIT2_off32(0x69,
+					    add_2reg(0xC0, dst_reg, dst_reg),
+					    imm32);
 			break;
-		}
+
+		case BPF_ALU | BPF_MUL | BPF_X:
+		case BPF_ALU64 | BPF_MUL | BPF_X:
+			if (BPF_CLASS(insn->code) == BPF_ALU64)
+				EMIT1(add_2mod(0x48, src_reg, dst_reg));
+			else if (is_ereg(dst_reg) || is_ereg(src_reg))
+				EMIT1(add_2mod(0x40, src_reg, dst_reg));
+
+			/* imul dst_reg, src_reg */
+			EMIT3(0x0F, 0xAF, add_2reg(0xC0, src_reg, dst_reg));
+			break;
+
 			/* Shifts */
 		case BPF_ALU | BPF_LSH | BPF_K:
 		case BPF_ALU | BPF_RSH | BPF_K:
--- a/tools/testing/selftests/bpf/verifier/jit.c
+++ b/tools/testing/selftests/bpf/verifier/jit.c
@@ -62,6 +62,11 @@
 	BPF_JMP_REG(BPF_JEQ, BPF_REG_3, BPF_REG_2, 2),
 	BPF_MOV64_IMM(BPF_REG_0, 1),
 	BPF_EXIT_INSN(),
+	BPF_LD_IMM64(BPF_REG_3, 0xfefefeULL),
+	BPF_ALU64_IMM(BPF_MUL, BPF_REG_3, 0xefefef),
+	BPF_JMP_REG(BPF_JEQ, BPF_REG_3, BPF_REG_2, 2),
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_EXIT_INSN(),
 	BPF_MOV32_REG(BPF_REG_2, BPF_REG_2),
 	BPF_LD_IMM64(BPF_REG_0, 0xfefefeULL),
 	BPF_ALU32_REG(BPF_MUL, BPF_REG_0, BPF_REG_1),
@@ -73,11 +78,22 @@
 	BPF_JMP_REG(BPF_JEQ, BPF_REG_3, BPF_REG_2, 2),
 	BPF_MOV64_IMM(BPF_REG_0, 1),
 	BPF_EXIT_INSN(),
+	BPF_LD_IMM64(BPF_REG_3, 0xfefefeULL),
+	BPF_ALU32_IMM(BPF_MUL, BPF_REG_3, 0xefefef),
+	BPF_JMP_REG(BPF_JEQ, BPF_REG_3, BPF_REG_2, 2),
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_EXIT_INSN(),
+	BPF_LD_IMM64(BPF_REG_0, 0xfefefeULL),
+	BPF_LD_IMM64(BPF_REG_2, 0x2ad4d4aaULL),
+	BPF_ALU32_IMM(BPF_MUL, BPF_REG_0, 0x2b),
+	BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_2, 2),
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_EXIT_INSN(),
 	BPF_LD_IMM64(BPF_REG_0, 0x952a7bbcULL),
 	BPF_LD_IMM64(BPF_REG_1, 0xfefefeULL),
-	BPF_LD_IMM64(BPF_REG_2, 0xeeff0d413122ULL),
-	BPF_ALU32_REG(BPF_MUL, BPF_REG_2, BPF_REG_1),
-	BPF_JMP_REG(BPF_JEQ, BPF_REG_2, BPF_REG_0, 2),
+	BPF_LD_IMM64(BPF_REG_5, 0xeeff0d413122ULL),
+	BPF_ALU32_REG(BPF_MUL, BPF_REG_5, BPF_REG_1),
+	BPF_JMP_REG(BPF_JEQ, BPF_REG_5, BPF_REG_0, 2),
 	BPF_MOV64_IMM(BPF_REG_0, 1),
 	BPF_EXIT_INSN(),
 	BPF_MOV64_IMM(BPF_REG_0, 2),