bpf: fix bpf helpers to use skb->mac_header relative offsets

For the short-term solution, lets fix bpf helper functions to use skb->mac_header relative offsets instead of skb->data in order to get the same eBPF programs with cls_bpf and act_bpf work on ingress and egress qdisc path. We need to ensure that mac_header is set before calling into programs. This is effectively the first option from below referenced discussion. More long term solution for LD_ABS|LD_IND instructions will be more intrusive but also more beneficial than this, and implemented later as it's too risky at this point in time. I.e., we plan to look into the option of moving skb_pull() out of eth_type_trans() and into netif_receive_skb() as has been suggested as second option. Meanwhile, this solution ensures ingress can be used with eBPF, too, and that we won't run into ABI troubles later. For dealing with negative offsets inside eBPF helper functions, we've implemented bpf_skb_clone_unwritable() to test for unwriteable headers. Reference: http://thread.gmane.org/gmane.linux.network/359129/focus=359694 Fixes: 608cd71a9c7c ("tc: bpf: generalize pedit action") Fixes: 91bc4822c3d6 ("tc: bpf: add checksum helpers") Signed-off-by: Alexei Starovoitov <ast@plumgrid.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Signed-off-by: David S. Miller <davem@davemloft.net>
author: Alexei Starovoitov <ast@plumgrid.com> 2015-04-15 12:55:45 -0700
committer: David S. Miller <davem@davemloft.net> 2015-04-16 14:08:49 -0400
commit: a166151cbe33b53221c24259e4a7201064b3ba79 (patch)
tree: bbc94226f42d4baa1b6ee5203d58d55fa9a69919 /net
parent: 51b5df886874816ff986fe66fe0d7b7eca9f6cd1 (diff)
3 files changed, 38 insertions, 9 deletions
diff --git a/net/core/filter.c b/net/core/filter.c
index b669e75d2b36..bf831a85c315 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1175,12 +1175,27 @@ int sk_attach_bpf(u32 ufd, struct sock *sk)
 	return 0;
 }
 
+/**
+ *	bpf_skb_clone_not_writable - is the header of a clone not writable
+ *	@skb: buffer to check
+ *	@len: length up to which to write, can be negative
+ *
+ *	Returns true if modifying the header part of the cloned buffer
+ *	does require the data to be copied. I.e. this version works with
+ *	negative lengths needed for eBPF case!
+ */
+static bool bpf_skb_clone_unwritable(const struct sk_buff *skb, int len)
+{
+	return skb_header_cloned(skb) ||
+	       (int) skb_headroom(skb) + len > skb->hdr_len;
+}
+
 #define BPF_RECOMPUTE_CSUM(flags)	((flags) & 1)
 
 static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
 {
 	struct sk_buff *skb = (struct sk_buff *) (long) r1;
-	unsigned int offset = (unsigned int) r2;
+	int offset = (int) r2;
 	void *from = (void *) (long) r3;
 	unsigned int len = (unsigned int) r4;
 	char buf[16];
@@ -1194,10 +1209,12 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
 	 *
 	 * so check for invalid 'offset' and too large 'len'
 	 */
-	if (unlikely(offset > 0xffff || len > sizeof(buf)))
+	if (unlikely((u32) offset > 0xffff || len > sizeof(buf)))
 		return -EFAULT;
 
-	if (skb_cloned(skb) && !skb_clone_writable(skb, offset + len))
+	offset -= skb->data - skb_mac_header(skb);
+	if (unlikely(skb_cloned(skb) &&
+		     bpf_skb_clone_unwritable(skb, offset + len)))
 		return -EFAULT;
 
 	ptr = skb_header_pointer(skb, offset, len, buf);
@@ -1232,15 +1249,18 @@ const struct bpf_func_proto bpf_skb_store_bytes_proto = {
 #define BPF_HEADER_FIELD_SIZE(flags)	((flags) & 0x0f)
 #define BPF_IS_PSEUDO_HEADER(flags)	((flags) & 0x10)
 
-static u64 bpf_l3_csum_replace(u64 r1, u64 offset, u64 from, u64 to, u64 flags)
+static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
 {
 	struct sk_buff *skb = (struct sk_buff *) (long) r1;
+	int offset = (int) r2;
 	__sum16 sum, *ptr;
 
-	if (unlikely(offset > 0xffff))
+	if (unlikely((u32) offset > 0xffff))
 		return -EFAULT;
 
-	if (skb_cloned(skb) && !skb_clone_writable(skb, offset + sizeof(sum)))
+	offset -= skb->data - skb_mac_header(skb);
+	if (unlikely(skb_cloned(skb) &&
+		     bpf_skb_clone_unwritable(skb, offset + sizeof(sum))))
 		return -EFAULT;
 
 	ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum);
@@ -1276,16 +1296,19 @@ const struct bpf_func_proto bpf_l3_csum_replace_proto = {
 	.arg5_type	= ARG_ANYTHING,
 };
 
-static u64 bpf_l4_csum_replace(u64 r1, u64 offset, u64 from, u64 to, u64 flags)
+static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
 {
 	struct sk_buff *skb = (struct sk_buff *) (long) r1;
 	u32 is_pseudo = BPF_IS_PSEUDO_HEADER(flags);
+	int offset = (int) r2;
 	__sum16 sum, *ptr;
 
-	if (unlikely(offset > 0xffff))
+	if (unlikely((u32) offset > 0xffff))
 		return -EFAULT;
 
-	if (skb_cloned(skb) && !skb_clone_writable(skb, offset + sizeof(sum)))
+	offset -= skb->data - skb_mac_header(skb);
+	if (unlikely(skb_cloned(skb) &&
+		     bpf_skb_clone_unwritable(skb, offset + sizeof(sum))))
 		return -EFAULT;
 
 	ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum);
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 4d2cede17468..dc6a2d324bd8 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -38,6 +38,9 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act,
 	struct tcf_bpf *prog = act->priv;
 	int action, filter_res;
 
+	if (unlikely(!skb_mac_header_was_set(skb)))
+		return TC_ACT_UNSPEC;
+
 	spin_lock(&prog->tcf_lock);
 
 	prog->tcf_tm.lastuse = jiffies;
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 5c4171c5d2bd..91bd9c19471d 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -66,6 +66,9 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 	struct cls_bpf_prog *prog;
 	int ret = -1;
 
+	if (unlikely(!skb_mac_header_was_set(skb)))
+		return -1;
+
 	/* Needed here for accessing maps. */
 	rcu_read_lock();
 	list_for_each_entry_rcu(prog, &head->plist, link) {
author	Alexei Starovoitov <ast@plumgrid.com>	2015-04-15 12:55:45 -0700
committer	David S. Miller <davem@davemloft.net>	2015-04-16 14:08:49 -0400
commit	a166151cbe33b53221c24259e4a7201064b3ba79 (patch)
tree	bbc94226f42d4baa1b6ee5203d58d55fa9a69919 /net
parent	51b5df886874816ff986fe66fe0d7b7eca9f6cd1 (diff)