Blob Blame History Raw
From: YiFei Zhu <zhuyifei@google.com>
Date: Thu, 16 Dec 2021 02:04:26 +0000
Subject: bpf: Move getsockopt retval to struct bpf_cg_run_ctx
Patch-mainline: v5.18-rc1
Git-commit: c4dcfdd406aa2167396ac215e351e5e4dfd7efe3
References: jsc#PED-1377

The retval value is moved to struct bpf_cg_run_ctx for ease of access
in different prog types with different context structs layouts. The
helper implementation (to be added in a later patch in the series) can
simply perform a container_of from current->bpf_ctx to retrieve
bpf_cg_run_ctx.

Unfortunately, there is no easy way to access the current task_struct
via the verifier BPF bytecode rewrite, aside from possibly calling a
helper, so a pointer to current task is added to struct bpf_sockopt_kern
so that the rewritten BPF bytecode can access struct bpf_cg_run_ctx with
an indirection.

For backward compatibility, if a getsockopt program rejects a syscall
by returning 0, an -EPERM will be generated, by having the
BPF_PROG_RUN_ARRAY_CG family macros automatically set the retval to
-EPERM. Unlike prior to this patch, this -EPERM will be visible to
ctx->retval for any other hooks down the line in the prog array.

Additionally, the restriction that getsockopt filters can only set
the retval to 0 is removed, considering that certain getsockopt
implementations may return optlen. Filters are now able to set the
value arbitrarily.

Signed-off-by: YiFei Zhu <zhuyifei@google.com>
Reviewed-by: Stanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/r/73b0325f5c29912ccea7ea57ec1ed4d388fc1d37.1639619851.git.zhuyifei@google.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Shung-Hsi Yu <shung-hsi.yu@suse.com>
---
 include/linux/bpf.h    |   20 ++++++-----
 include/linux/filter.h |    5 ++
 kernel/bpf/cgroup.c    |   82 ++++++++++++++++++++++++++++---------------------
 3 files changed, 63 insertions(+), 44 deletions(-)

--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1250,6 +1250,7 @@ struct bpf_run_ctx {};
 struct bpf_cg_run_ctx {
 	struct bpf_run_ctx run_ctx;
 	const struct bpf_prog_array_item *prog_item;
+	int retval;
 };
 
 struct bpf_trace_run_ctx {
@@ -1285,16 +1286,16 @@ typedef u32 (*bpf_prog_run_fn)(const str
 static __always_inline int
 BPF_PROG_RUN_ARRAY_CG_FLAGS(const struct bpf_prog_array __rcu *array_rcu,
 			    const void *ctx, bpf_prog_run_fn run_prog,
-			    u32 *ret_flags)
+			    int retval, u32 *ret_flags)
 {
 	const struct bpf_prog_array_item *item;
 	const struct bpf_prog *prog;
 	const struct bpf_prog_array *array;
 	struct bpf_run_ctx *old_run_ctx;
 	struct bpf_cg_run_ctx run_ctx;
-	int ret = 0;
 	u32 func_ret;
 
+	run_ctx.retval = retval;
 	migrate_disable();
 	rcu_read_lock();
 	array = rcu_dereference(array_rcu);
@@ -1304,27 +1305,28 @@ BPF_PROG_RUN_ARRAY_CG_FLAGS(const struct
 		run_ctx.prog_item = item;
 		func_ret = run_prog(prog, ctx);
 		if (!(func_ret & 1))
-			ret = -EPERM;
+			run_ctx.retval = -EPERM;
 		*(ret_flags) |= (func_ret >> 1);
 		item++;
 	}
 	bpf_reset_run_ctx(old_run_ctx);
 	rcu_read_unlock();
 	migrate_enable();
-	return ret;
+	return run_ctx.retval;
 }
 
 static __always_inline int
 BPF_PROG_RUN_ARRAY_CG(const struct bpf_prog_array __rcu *array_rcu,
-		      const void *ctx, bpf_prog_run_fn run_prog)
+		      const void *ctx, bpf_prog_run_fn run_prog,
+		      int retval)
 {
 	const struct bpf_prog_array_item *item;
 	const struct bpf_prog *prog;
 	const struct bpf_prog_array *array;
 	struct bpf_run_ctx *old_run_ctx;
 	struct bpf_cg_run_ctx run_ctx;
-	int ret = 0;
 
+	run_ctx.retval = retval;
 	migrate_disable();
 	rcu_read_lock();
 	array = rcu_dereference(array_rcu);
@@ -1333,13 +1335,13 @@ BPF_PROG_RUN_ARRAY_CG(const struct bpf_p
 	while ((prog = READ_ONCE(item->prog))) {
 		run_ctx.prog_item = item;
 		if (!run_prog(prog, ctx))
-			ret = -EPERM;
+			run_ctx.retval = -EPERM;
 		item++;
 	}
 	bpf_reset_run_ctx(old_run_ctx);
 	rcu_read_unlock();
 	migrate_enable();
-	return ret;
+	return run_ctx.retval;
 }
 
 static __always_inline u32
@@ -1399,7 +1401,7 @@ out:
 		u32 _flags = 0;				\
 		bool _cn;				\
 		u32 _ret;				\
-		_ret = BPF_PROG_RUN_ARRAY_CG_FLAGS(array, ctx, func, &_flags); \
+		_ret = BPF_PROG_RUN_ARRAY_CG_FLAGS(array, ctx, func, 0, &_flags); \
 		_cn = _flags & BPF_RET_SET_CN;		\
 		if (!_ret)				\
 			_ret = (_cn ? NET_XMIT_CN : NET_XMIT_SUCCESS);	\
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -1356,7 +1356,10 @@ struct bpf_sockopt_kern {
 	s32		level;
 	s32		optname;
 	s32		optlen;
-	s32		retval;
+	/* for retval in struct bpf_cg_run_ctx */
+	struct task_struct *current_task;
+	/* Temporary "register" for indirect stores to ppos. */
+	u64		tmp_reg;
 };
 
 int copy_bpf_fprog_from_user(struct sock_fprog *dst, sockptr_t src, int len);
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -1043,7 +1043,7 @@ int __cgroup_bpf_run_filter_skb(struct s
 			cgrp->bpf.effective[atype], skb, __bpf_prog_run_save_cb);
 	} else {
 		ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], skb,
-					    __bpf_prog_run_save_cb);
+					    __bpf_prog_run_save_cb, 0);
 	}
 	bpf_restore_data_end(skb, saved_data_end);
 	__skb_pull(skb, offset);
@@ -1072,7 +1072,7 @@ int __cgroup_bpf_run_filter_sk(struct so
 	struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
 
 	return BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], sk,
-				     bpf_prog_run);
+				     bpf_prog_run, 0);
 }
 EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
 
@@ -1118,7 +1118,7 @@ int __cgroup_bpf_run_filter_sock_addr(st
 
 	cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
 	return BPF_PROG_RUN_ARRAY_CG_FLAGS(cgrp->bpf.effective[atype], &ctx,
-					   bpf_prog_run, flags);
+					   bpf_prog_run, 0, flags);
 }
 EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_addr);
 
@@ -1145,7 +1145,7 @@ int __cgroup_bpf_run_filter_sock_ops(str
 	struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
 
 	return BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], sock_ops,
-				     bpf_prog_run);
+				     bpf_prog_run, 0);
 }
 EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops);
 
@@ -1163,7 +1163,7 @@ int __cgroup_bpf_check_dev_permission(sh
 	rcu_read_lock();
 	cgrp = task_dfl_cgroup(current);
 	ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx,
-				    bpf_prog_run);
+				    bpf_prog_run, 0);
 	rcu_read_unlock();
 
 	return ret;
@@ -1294,7 +1294,8 @@ int __cgroup_bpf_run_filter_sysctl(struc
 
 	rcu_read_lock();
 	cgrp = task_dfl_cgroup(current);
-	ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx, bpf_prog_run);
+	ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx,
+				    bpf_prog_run, 0);
 	rcu_read_unlock();
 
 	kfree(ctx.cur_val);
@@ -1409,7 +1410,7 @@ int __cgroup_bpf_run_filter_setsockopt(s
 
 	lock_sock(sk);
 	ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_SETSOCKOPT],
-				    &ctx, bpf_prog_run);
+				    &ctx, bpf_prog_run, 0);
 	release_sock(sk);
 
 	if (ret)
@@ -1473,7 +1474,7 @@ int __cgroup_bpf_run_filter_getsockopt(s
 		.sk = sk,
 		.level = level,
 		.optname = optname,
-		.retval = retval,
+		.current_task = current,
 	};
 	int ret;
 
@@ -1517,10 +1518,10 @@ int __cgroup_bpf_run_filter_getsockopt(s
 
 	lock_sock(sk);
 	ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_GETSOCKOPT],
-				    &ctx, bpf_prog_run);
+				    &ctx, bpf_prog_run, retval);
 	release_sock(sk);
 
-	if (ret)
+	if (ret < 0)
 		goto out;
 
 	if (ctx.optlen > max_optlen || ctx.optlen < 0) {
@@ -1528,14 +1529,6 @@ int __cgroup_bpf_run_filter_getsockopt(s
 		goto out;
 	}
 
-	/* BPF programs only allowed to set retval to 0, not some
-	 * arbitrary value.
-	 */
-	if (ctx.retval != 0 && ctx.retval != retval) {
-		ret = -EFAULT;
-		goto out;
-	}
-
 	if (ctx.optlen != 0) {
 		if (copy_to_user(optval, ctx.optval, ctx.optlen) ||
 		    put_user(ctx.optlen, optlen)) {
@@ -1544,8 +1537,6 @@ int __cgroup_bpf_run_filter_getsockopt(s
 		}
 	}
 
-	ret = ctx.retval;
-
 out:
 	sockopt_free_buf(&ctx, &buf);
 	return ret;
@@ -1560,10 +1551,10 @@ int __cgroup_bpf_run_filter_getsockopt_k
 		.sk = sk,
 		.level = level,
 		.optname = optname,
-		.retval = retval,
 		.optlen = *optlen,
 		.optval = optval,
 		.optval_end = optval + *optlen,
+		.current_task = current,
 	};
 	int ret;
 
@@ -1576,25 +1567,19 @@ int __cgroup_bpf_run_filter_getsockopt_k
 	 */
 
 	ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_GETSOCKOPT],
-				    &ctx, bpf_prog_run);
-	if (ret)
+				    &ctx, bpf_prog_run, retval);
+	if (ret < 0)
 		return ret;
 
 	if (ctx.optlen > *optlen)
 		return -EFAULT;
 
-	/* BPF programs only allowed to set retval to 0, not some
-	 * arbitrary value.
-	 */
-	if (ctx.retval != 0 && ctx.retval != retval)
-		return -EFAULT;
-
 	/* BPF programs can shrink the buffer, export the modifications.
 	 */
 	if (ctx.optlen != 0)
 		*optlen = ctx.optlen;
 
-	return ctx.retval;
+	return ret;
 }
 #endif
 
@@ -2010,10 +1995,39 @@ static u32 cg_sockopt_convert_ctx_access
 			*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optlen);
 		break;
 	case offsetof(struct bpf_sockopt, retval):
-		if (type == BPF_WRITE)
-			*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_STX_MEM, retval);
-		else
-			*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, retval);
+		BUILD_BUG_ON(offsetof(struct bpf_cg_run_ctx, run_ctx) != 0);
+
+		if (type == BPF_WRITE) {
+			int treg = BPF_REG_9;
+
+			if (si->src_reg == treg || si->dst_reg == treg)
+				--treg;
+			if (si->src_reg == treg || si->dst_reg == treg)
+				--treg;
+			*insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, treg,
+					      offsetof(struct bpf_sockopt_kern, tmp_reg));
+			*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, current_task),
+					      treg, si->dst_reg,
+					      offsetof(struct bpf_sockopt_kern, current_task));
+			*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct task_struct, bpf_ctx),
+					      treg, treg,
+					      offsetof(struct task_struct, bpf_ctx));
+			*insn++ = BPF_STX_MEM(BPF_FIELD_SIZEOF(struct bpf_cg_run_ctx, retval),
+					      treg, si->src_reg,
+					      offsetof(struct bpf_cg_run_ctx, retval));
+			*insn++ = BPF_LDX_MEM(BPF_DW, treg, si->dst_reg,
+					      offsetof(struct bpf_sockopt_kern, tmp_reg));
+		} else {
+			*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, current_task),
+					      si->dst_reg, si->src_reg,
+					      offsetof(struct bpf_sockopt_kern, current_task));
+			*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct task_struct, bpf_ctx),
+					      si->dst_reg, si->dst_reg,
+					      offsetof(struct task_struct, bpf_ctx));
+			*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_cg_run_ctx, retval),
+					      si->dst_reg, si->dst_reg,
+					      offsetof(struct bpf_cg_run_ctx, retval));
+		}
 		break;
 	case offsetof(struct bpf_sockopt, optval):
 		*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optval);