Blob Blame History Raw
From: John Fastabend <john.fastabend@gmail.com>
Date: Thu, 5 Jul 2018 08:50:15 -0700
Subject: bpf: sockmap, convert bpf_compute_data_pointers to bpf_*_sk_skb
Patch-mainline: v4.18-rc6
Git-commit: 0ea488ff8d23c93da383fcf424825c298b13b1fb
References: bsc#1109837

In commit

  'bpf: bpf_compute_data uses incorrect cb structure' (8108a7751512)

we added the routine bpf_compute_data_end_sk_skb() to compute the
correct data_end values, but this has since been lost. In kernel
v4.14 this was correct and the above patch was applied in it
entirety. Then when v4.14 was merged into v4.15-rc1 net-next tree
we lost the piece that renamed bpf_compute_data_pointers to the
new function bpf_compute_data_end_sk_skb. This was done here,

e1ea2f9856b7 ("Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net")

When it conflicted with the following rename patch,

6aaae2b6c433 ("bpf: rename bpf_compute_data_end into bpf_compute_data_pointers")

Finally, after a refactor I thought even the function
bpf_compute_data_end_sk_skb() was no longer needed and it was
erroneously removed.

However, we never reverted the sk_skb_convert_ctx_access() usage of
tcp_skb_cb which had been committed and survived the merge conflict.
Here we fix this by adding back the helper and *_data_end_sk_skb()
usage. Using the bpf_skc_data_end mapping is not correct because it
expects a qdisc_skb_cb object but at the sock layer this is not the
case. Even though it happens to work here because we don't overwrite
any data in-use at the socket layer and the cb structure is cleared
later this has potential to create some subtle issues. But, even
more concretely the filter.c access check uses tcp_skb_cb.

And by some act of chance though,

struct bpf_skb_data_end {
        struct qdisc_skb_cb        qdisc_cb;             /*     0    28 */

        /* XXX 4 bytes hole, try to pack */

        void *                     data_meta;            /*    32     8 */
        void *                     data_end;             /*    40     8 */

        /* size: 48, cachelines: 1, members: 3 */
        /* sum members: 44, holes: 1, sum holes: 4 */
        /* last cacheline: 48 bytes */
};

and then tcp_skb_cb,

struct tcp_skb_cb {
	[...]
                struct {
                        __u32      flags;                /*    24     4 */
                        struct sock * sk_redir;          /*    32     8 */
                        void *     data_end;             /*    40     8 */
                } bpf;                                   /*          24 */
        };

So when we use offset_of() to track down the byte offset we get 40 in
either case and everything continues to work. Fix this mess and use
correct structures its unclear how long this might actually work for
until someone moves the structs around.

Reported-by: Martin KaFai Lau <kafai@fb.com>
Fixes: e1ea2f9856b7 ("Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net")
Fixes: 6aaae2b6c433 ("bpf: rename bpf_compute_data_end into bpf_compute_data_pointers")
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
---
 include/net/tcp.h |    4 ++
 net/core/filter.c |   98 ++++++++++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 95 insertions(+), 7 deletions(-)

--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -844,6 +844,10 @@ struct tcp_skb_cb {
 
 #define TCP_SKB_CB(__skb)	((struct tcp_skb_cb *)&((__skb)->cb[0]))
 
+static inline void bpf_compute_data_end_sk_skb(struct sk_buff *skb)
+{
+	TCP_SKB_CB(skb)->bpf.data_end = skb->data + skb_headlen(skb);
+}
 
 #if IS_ENABLED(CONFIG_IPV6)
 /* This is the variant of inet6_iif() that must be used by TCP,
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1754,6 +1754,37 @@ static const struct bpf_func_proto bpf_s
 	.arg2_type	= ARG_ANYTHING,
 };
 
+static inline int sk_skb_try_make_writable(struct sk_buff *skb,
+					   unsigned int write_len)
+{
+	int err = __bpf_try_make_writable(skb, write_len);
+
+	bpf_compute_data_end_sk_skb(skb);
+	return err;
+}
+
+BPF_CALL_2(sk_skb_pull_data, struct sk_buff *, skb, u32, len)
+{
+	/* Idea is the following: should the needed direct read/write
+	 * test fail during runtime, we can pull in more data and redo
+	 * again, since implicitly, we invalidate previous checks here.
+	 *
+	 * Or, since we know how much we need to make read/writeable,
+	 * this can be done once at the program beginning for direct
+	 * access case. By this we overcome limitations of only current
+	 * headroom being accessible.
+	 */
+	return sk_skb_try_make_writable(skb, len ? : skb_headlen(skb));
+}
+
+static const struct bpf_func_proto sk_skb_pull_data_proto = {
+	.func		= sk_skb_pull_data,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_ANYTHING,
+};
+
 BPF_CALL_5(bpf_l3_csum_replace, struct sk_buff *, skb, u32, offset,
 	   u64, from, u64, to, u64, flags)
 {
@@ -2844,8 +2875,8 @@ static int bpf_skb_trim_rcsum(struct sk_
 	return __skb_trim_rcsum(skb, new_len);
 }
 
-BPF_CALL_3(bpf_skb_change_tail, struct sk_buff *, skb, u32, new_len,
-	   u64, flags)
+static inline int __bpf_skb_change_tail(struct sk_buff *skb, u32 new_len,
+					u64 flags)
 {
 	u32 max_len = __bpf_skb_max_len(skb);
 	u32 min_len = __bpf_skb_min_len(skb);
@@ -2881,6 +2912,13 @@ BPF_CALL_3(bpf_skb_change_tail, struct s
 		if (!ret && skb_is_gso(skb))
 			skb_gso_reset(skb);
 	}
+	return ret;
+}
+
+BPF_CALL_3(bpf_skb_change_tail, struct sk_buff *, skb, u32, new_len,
+	   u64, flags)
+{
+	int ret = __bpf_skb_change_tail(skb, new_len, flags);
 
 	bpf_compute_data_pointers(skb);
 	return ret;
@@ -2895,9 +2933,27 @@ static const struct bpf_func_proto bpf_s
 	.arg3_type	= ARG_ANYTHING,
 };
 
-BPF_CALL_3(bpf_skb_change_head, struct sk_buff *, skb, u32, head_room,
+BPF_CALL_3(sk_skb_change_tail, struct sk_buff *, skb, u32, new_len,
 	   u64, flags)
 {
+	int ret = __bpf_skb_change_tail(skb, new_len, flags);
+
+	bpf_compute_data_end_sk_skb(skb);
+	return ret;
+}
+
+static const struct bpf_func_proto sk_skb_change_tail_proto = {
+	.func		= sk_skb_change_tail,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_ANYTHING,
+	.arg3_type	= ARG_ANYTHING,
+};
+
+static inline int __bpf_skb_change_head(struct sk_buff *skb, u32 head_room,
+					u64 flags)
+{
 	u32 max_len = __bpf_skb_max_len(skb);
 	u32 new_len = skb->len + head_room;
 	int ret;
@@ -2922,8 +2978,16 @@ BPF_CALL_3(bpf_skb_change_head, struct s
 		skb_reset_mac_header(skb);
 	}
 
+	return ret;
+}
+
+BPF_CALL_3(bpf_skb_change_head, struct sk_buff *, skb, u32, head_room,
+	   u64, flags)
+{
+	int ret = __bpf_skb_change_head(skb, head_room, flags);
+
 	bpf_compute_data_pointers(skb);
-	return 0;
+	return ret;
 }
 
 static const struct bpf_func_proto bpf_skb_change_head_proto = {
@@ -2935,6 +2999,23 @@ static const struct bpf_func_proto bpf_s
 	.arg3_type	= ARG_ANYTHING,
 };
 
+BPF_CALL_3(sk_skb_change_head, struct sk_buff *, skb, u32, head_room,
+	   u64, flags)
+{
+	int ret = __bpf_skb_change_head(skb, head_room, flags);
+
+	bpf_compute_data_end_sk_skb(skb);
+	return ret;
+}
+
+static const struct bpf_func_proto sk_skb_change_head_proto = {
+	.func		= sk_skb_change_head,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_ANYTHING,
+	.arg3_type	= ARG_ANYTHING,
+};
 static unsigned long xdp_get_metalen(const struct xdp_buff *xdp)
 {
 	return xdp_data_meta_unsupported(xdp) ? 0 :
@@ -3337,9 +3418,12 @@ bool bpf_helper_changes_pkt_data(void *f
 	    func == bpf_skb_store_bytes ||
 	    func == bpf_skb_change_proto ||
 	    func == bpf_skb_change_head ||
+	    func == sk_skb_change_head ||
 	    func == bpf_skb_change_tail ||
+	    func == sk_skb_change_tail ||
 	    func == bpf_skb_adjust_room ||
 	    func == bpf_skb_pull_data ||
+	    func == sk_skb_pull_data ||
 	    func == bpf_clone_redirect ||
 	    func == bpf_l3_csum_replace ||
 	    func == bpf_l4_csum_replace ||
@@ -4322,11 +4406,11 @@ sk_skb_func_proto(enum bpf_func_id func_
 	case BPF_FUNC_skb_load_bytes:
 		return &bpf_skb_load_bytes_proto;
 	case BPF_FUNC_skb_pull_data:
-		return &bpf_skb_pull_data_proto;
+		return &sk_skb_pull_data_proto;
 	case BPF_FUNC_skb_change_tail:
-		return &bpf_skb_change_tail_proto;
+		return &sk_skb_change_tail_proto;
 	case BPF_FUNC_skb_change_head:
-		return &bpf_skb_change_head_proto;
+		return &sk_skb_change_head_proto;
 	case BPF_FUNC_get_socket_cookie:
 		return &bpf_get_socket_cookie_proto;
 	case BPF_FUNC_get_socket_uid: