Blob Blame History Raw
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Wed, 28 Mar 2018 17:48:36 -0700
Subject: nfp: bpf: add support for atomic add of unknown values
Patch-mainline: v4.17-rc1
Git-commit: 41aed09cf61c00ef6c3b2648d5a193cbaf2a74d0
References: bsc#1109837

Allow atomic add to be used even when the value is not guaranteed
to fit into a 16 bit immediate.  This requires the value to be pulled
as data, and therefore use of a transfer register and a context swap.

Track the information about possible lengths of the value, if it's
guaranteed to be larger than 16bits don't generate the code for the
optimized case at all.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Reviewed-by: Jiong Wang <jiong.wang@netronome.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
---
 drivers/net/ethernet/netronome/nfp/bpf/jit.c      |   78 +++++++++++++++++++---
 drivers/net/ethernet/netronome/nfp/bpf/main.h     |    7 +
 drivers/net/ethernet/netronome/nfp/bpf/verifier.c |   14 +--
 drivers/net/ethernet/netronome/nfp/nfp_asm.c      |    1 
 drivers/net/ethernet/netronome/nfp/nfp_asm.h      |    3 
 5 files changed, 88 insertions(+), 15 deletions(-)

--- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
@@ -2127,12 +2127,49 @@ static int mem_stx8(struct nfp_prog *nfp
 static int
 mem_xadd(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, bool is64)
 {
-	swreg addra, addrb, off, prev_alu = imm_a(nfp_prog);
 	u8 dst_gpr = meta->insn.dst_reg * 2;
 	u8 src_gpr = meta->insn.src_reg * 2;
+	unsigned int full_add, out;
+	swreg addra, addrb, off;
 
 	off = ur_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
 
+	/* We can fit 16 bits into command immediate, if we know the immediate
+	 * is guaranteed to either always or never fit into 16 bit we only
+	 * generate code to handle that particular case, otherwise generate
+	 * code for both.
+	 */
+	out = nfp_prog_current_offset(nfp_prog);
+	full_add = nfp_prog_current_offset(nfp_prog);
+
+	if (meta->insn.off) {
+		out += 2;
+		full_add += 2;
+	}
+	if (meta->xadd_maybe_16bit) {
+		out += 3;
+		full_add += 3;
+	}
+	if (meta->xadd_over_16bit)
+		out += 2 + is64;
+	if (meta->xadd_maybe_16bit && meta->xadd_over_16bit) {
+		out += 5;
+		full_add += 5;
+	}
+
+	/* Generate the branch for choosing add_imm vs add */
+	if (meta->xadd_maybe_16bit && meta->xadd_over_16bit) {
+		swreg max_imm = imm_a(nfp_prog);
+
+		wrp_immed(nfp_prog, max_imm, 0xffff);
+		emit_alu(nfp_prog, reg_none(),
+			 max_imm, ALU_OP_SUB, reg_b(src_gpr));
+		emit_alu(nfp_prog, reg_none(),
+			 reg_imm(0), ALU_OP_SUB_C, reg_b(src_gpr + 1));
+		emit_br(nfp_prog, BR_BLO, full_add, meta->insn.off ? 2 : 0);
+		/* defer for add */
+	}
+
 	/* If insn has an offset add to the address */
 	if (!meta->insn.off) {
 		addra = reg_a(dst_gpr);
@@ -2146,13 +2183,38 @@ mem_xadd(struct nfp_prog *nfp_prog, stru
 		addrb = imma_b(nfp_prog);
 	}
 
-	wrp_immed(nfp_prog, prev_alu,
-		  FIELD_PREP(CMD_OVE_DATA, 2) |
-		  CMD_OVE_LEN |
-		  FIELD_PREP(CMD_OV_LEN, 0x8 | is64 << 2));
-	wrp_reg_or_subpart(nfp_prog, prev_alu, reg_b(src_gpr), 2, 2);
-	emit_cmd_indir(nfp_prog, CMD_TGT_ADD_IMM, CMD_MODE_40b_BA, 0,
-		       addra, addrb, 0, CMD_CTX_NO_SWAP);
+	/* Generate the add_imm if 16 bits are possible */
+	if (meta->xadd_maybe_16bit) {
+		swreg prev_alu = imm_a(nfp_prog);
+
+		wrp_immed(nfp_prog, prev_alu,
+			  FIELD_PREP(CMD_OVE_DATA, 2) |
+			  CMD_OVE_LEN |
+			  FIELD_PREP(CMD_OV_LEN, 0x8 | is64 << 2));
+		wrp_reg_or_subpart(nfp_prog, prev_alu, reg_b(src_gpr), 2, 2);
+		emit_cmd_indir(nfp_prog, CMD_TGT_ADD_IMM, CMD_MODE_40b_BA, 0,
+			       addra, addrb, 0, CMD_CTX_NO_SWAP);
+
+		if (meta->xadd_over_16bit)
+			emit_br(nfp_prog, BR_UNC, out, 0);
+	}
+
+	if (!nfp_prog_confirm_current_offset(nfp_prog, full_add))
+		return -EINVAL;
+
+	/* Generate the add if 16 bits are not guaranteed */
+	if (meta->xadd_over_16bit) {
+		emit_cmd(nfp_prog, CMD_TGT_ADD, CMD_MODE_40b_BA, 0,
+			 addra, addrb, is64 << 2,
+			 is64 ? CMD_CTX_SWAP_DEFER2 : CMD_CTX_SWAP_DEFER1);
+
+		wrp_mov(nfp_prog, reg_xfer(0), reg_a(src_gpr));
+		if (is64)
+			wrp_mov(nfp_prog, reg_xfer(1), reg_a(src_gpr + 1));
+	}
+
+	if (!nfp_prog_confirm_current_offset(nfp_prog, out))
+		return -EINVAL;
 
 	return 0;
 }
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.h
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h
@@ -229,6 +229,8 @@ struct nfp_bpf_reg_state {
  * @pkt_cache.range_start: start offset for associated packet data cache
  * @pkt_cache.range_end: end offset for associated packet data cache
  * @pkt_cache.do_init: this read needs to initialize packet data cache
+ * @xadd_over_16bit: 16bit immediate is not guaranteed
+ * @xadd_maybe_16bit: 16bit immediate is possible
  * @jmp_dst: destination info for jump instructions
  * @func_id: function id for call instructions
  * @arg1: arg1 for call instructions
@@ -243,6 +245,7 @@ struct nfp_bpf_reg_state {
 struct nfp_insn_meta {
 	struct bpf_insn insn;
 	union {
+		/* pointer ops (ld/st/xadd) */
 		struct {
 			struct bpf_reg_state ptr;
 			struct bpf_insn *paired_st;
@@ -253,8 +256,12 @@ struct nfp_insn_meta {
 				s16 range_end;
 				bool do_init;
 			} pkt_cache;
+			bool xadd_over_16bit;
+			bool xadd_maybe_16bit;
 		};
+		/* jump */
 		struct nfp_insn_meta *jmp_dst;
+		/* function calls */
 		struct {
 			u32 func_id;
 			struct bpf_reg_state arg1;
--- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
@@ -414,16 +414,16 @@ nfp_bpf_check_xadd(struct nfp_prog *nfp_
 			dreg->type);
 		return -EOPNOTSUPP;
 	}
-	if (sreg->type != SCALAR_VALUE ||
-	    sreg->var_off.value > 0xffff || sreg->var_off.mask > 0xffff) {
-		char tn_buf[48];
-
-		tnum_strn(tn_buf, sizeof(tn_buf), sreg->var_off);
-		pr_vlog(env, "atomic add not of a small constant scalar: %s\n",
-			tn_buf);
+	if (sreg->type != SCALAR_VALUE) {
+		pr_vlog(env, "atomic add not of a scalar: %d\n", sreg->type);
 		return -EOPNOTSUPP;
 	}
 
+	meta->xadd_over_16bit |=
+		sreg->var_off.value > 0xffff || sreg->var_off.mask > 0xffff;
+	meta->xadd_maybe_16bit |=
+		(sreg->var_off.value & ~sreg->var_off.mask) <= 0xffff;
+
 	return nfp_bpf_check_ptr(nfp_prog, meta, env, meta->insn.dst_reg);
 }
 
--- a/drivers/net/ethernet/netronome/nfp/nfp_asm.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.c
@@ -48,6 +48,7 @@ const struct cmd_tgt_act cmd_tgt_act[__C
 	[CMD_TGT_READ32_SWAP] =		{ 0x02, 0x5c },
 	[CMD_TGT_READ_LE] =		{ 0x01, 0x40 },
 	[CMD_TGT_READ_SWAP_LE] =	{ 0x03, 0x40 },
+	[CMD_TGT_ADD] =			{ 0x00, 0x47 },
 	[CMD_TGT_ADD_IMM] =		{ 0x02, 0x47 },
 };
 
--- a/drivers/net/ethernet/netronome/nfp/nfp_asm.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.h
@@ -238,6 +238,7 @@ enum cmd_tgt_map {
 	CMD_TGT_READ32_SWAP,
 	CMD_TGT_READ_LE,
 	CMD_TGT_READ_SWAP_LE,
+	CMD_TGT_ADD,
 	CMD_TGT_ADD_IMM,
 	__CMD_TGT_MAP_SIZE,
 };
@@ -252,6 +253,8 @@ enum cmd_mode {
 
 enum cmd_ctx_swap {
 	CMD_CTX_SWAP = 0,
+	CMD_CTX_SWAP_DEFER1 = 1,
+	CMD_CTX_SWAP_DEFER2 = 2,
 	CMD_CTX_NO_SWAP = 3,
 };