summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMartin KaFai Lau <martin.lau@kernel.org>2023-02-17 12:55:14 -0800
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2023-10-10 22:00:42 +0200
commit8904d8848b31038b8ede40a0247a6682b0133f7a (patch)
treeab4ad9c5d1ce6e39ba0b163bfded316e1750af06
parentf82aac8162871e87027692b36af335a2375d4580 (diff)
bpf: Add BPF_FIB_LOOKUP_SKIP_NEIGH for bpf_fib_lookup
[ Upstream commit 31de4105f00d64570139bc5494a201b0bd57349f ] The bpf_fib_lookup() also looks up the neigh table. This was done before bpf_redirect_neigh() was added. In the use case that does not manage the neigh table and requires bpf_fib_lookup() to lookup a fib to decide if it needs to redirect or not, the bpf prog can depend only on using bpf_redirect_neigh() to lookup the neigh. It also keeps the neigh entries fresh and connected. This patch adds a bpf_fib_lookup flag, SKIP_NEIGH, to avoid the double neigh lookup when the bpf prog always call bpf_redirect_neigh() to do the neigh lookup. The params->smac output is skipped together when SKIP_NEIGH is set because bpf_redirect_neigh() will figure out the smac also. Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Link: https://lore.kernel.org/bpf/20230217205515.3583372-1-martin.lau@linux.dev Stable-dep-of: 5baa0433a15e ("neighbour: fix data-races around n->output") Signed-off-by: Sasha Levin <sashal@kernel.org>
-rw-r--r--include/uapi/linux/bpf.h6
-rw-r--r--net/core/filter.c39
-rw-r--r--tools/include/uapi/linux/bpf.h6
3 files changed, 38 insertions, 13 deletions
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 53bc48794719..92dbe89dafbf 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3112,6 +3112,11 @@ union bpf_attr {
* **BPF_FIB_LOOKUP_OUTPUT**
* Perform lookup from an egress perspective (default is
* ingress).
+ * **BPF_FIB_LOOKUP_SKIP_NEIGH**
+ * Skip the neighbour table lookup. *params*->dmac
+ * and *params*->smac will not be set as output. A common
+ * use case is to call **bpf_redirect_neigh**\ () after
+ * doing **bpf_fib_lookup**\ ().
*
* *ctx* is either **struct xdp_md** for XDP programs or
* **struct sk_buff** tc cls_act programs.
@@ -6678,6 +6683,7 @@ struct bpf_raw_tracepoint_args {
enum {
BPF_FIB_LOOKUP_DIRECT = (1U << 0),
BPF_FIB_LOOKUP_OUTPUT = (1U << 1),
+ BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2),
};
enum {
diff --git a/net/core/filter.c b/net/core/filter.c
index 9fd7c88b5db4..6ef62d84dcac 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5674,12 +5674,8 @@ static const struct bpf_func_proto bpf_skb_get_xfrm_state_proto = {
#endif
#if IS_ENABLED(CONFIG_INET) || IS_ENABLED(CONFIG_IPV6)
-static int bpf_fib_set_fwd_params(struct bpf_fib_lookup *params,
- const struct neighbour *neigh,
- const struct net_device *dev, u32 mtu)
+static int bpf_fib_set_fwd_params(struct bpf_fib_lookup *params, u32 mtu)
{
- memcpy(params->dmac, neigh->ha, ETH_ALEN);
- memcpy(params->smac, dev->dev_addr, ETH_ALEN);
params->h_vlan_TCI = 0;
params->h_vlan_proto = 0;
if (mtu)
@@ -5790,21 +5786,29 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
if (likely(nhc->nhc_gw_family != AF_INET6)) {
if (nhc->nhc_gw_family)
params->ipv4_dst = nhc->nhc_gw.ipv4;
-
- neigh = __ipv4_neigh_lookup_noref(dev,
- (__force u32)params->ipv4_dst);
} else {
struct in6_addr *dst = (struct in6_addr *)params->ipv6_dst;
params->family = AF_INET6;
*dst = nhc->nhc_gw.ipv6;
- neigh = __ipv6_neigh_lookup_noref_stub(dev, dst);
}
+ if (flags & BPF_FIB_LOOKUP_SKIP_NEIGH)
+ goto set_fwd_params;
+
+ if (likely(nhc->nhc_gw_family != AF_INET6))
+ neigh = __ipv4_neigh_lookup_noref(dev,
+ (__force u32)params->ipv4_dst);
+ else
+ neigh = __ipv6_neigh_lookup_noref_stub(dev, params->ipv6_dst);
+
if (!neigh || !(neigh->nud_state & NUD_VALID))
return BPF_FIB_LKUP_RET_NO_NEIGH;
+ memcpy(params->dmac, neigh->ha, ETH_ALEN);
+ memcpy(params->smac, dev->dev_addr, ETH_ALEN);
- return bpf_fib_set_fwd_params(params, neigh, dev, mtu);
+set_fwd_params:
+ return bpf_fib_set_fwd_params(params, mtu);
}
#endif
@@ -5912,24 +5916,33 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
params->rt_metric = res.f6i->fib6_metric;
params->ifindex = dev->ifindex;
+ if (flags & BPF_FIB_LOOKUP_SKIP_NEIGH)
+ goto set_fwd_params;
+
/* xdp and cls_bpf programs are run in RCU-bh so rcu_read_lock_bh is
* not needed here.
*/
neigh = __ipv6_neigh_lookup_noref_stub(dev, dst);
if (!neigh || !(neigh->nud_state & NUD_VALID))
return BPF_FIB_LKUP_RET_NO_NEIGH;
+ memcpy(params->dmac, neigh->ha, ETH_ALEN);
+ memcpy(params->smac, dev->dev_addr, ETH_ALEN);
- return bpf_fib_set_fwd_params(params, neigh, dev, mtu);
+set_fwd_params:
+ return bpf_fib_set_fwd_params(params, mtu);
}
#endif
+#define BPF_FIB_LOOKUP_MASK (BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT | \
+ BPF_FIB_LOOKUP_SKIP_NEIGH)
+
BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx,
struct bpf_fib_lookup *, params, int, plen, u32, flags)
{
if (plen < sizeof(*params))
return -EINVAL;
- if (flags & ~(BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT))
+ if (flags & ~BPF_FIB_LOOKUP_MASK)
return -EINVAL;
switch (params->family) {
@@ -5967,7 +5980,7 @@ BPF_CALL_4(bpf_skb_fib_lookup, struct sk_buff *, skb,
if (plen < sizeof(*params))
return -EINVAL;
- if (flags & ~(BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT))
+ if (flags & ~BPF_FIB_LOOKUP_MASK)
return -EINVAL;
if (params->tot_len)
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 53bc48794719..92dbe89dafbf 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3112,6 +3112,11 @@ union bpf_attr {
* **BPF_FIB_LOOKUP_OUTPUT**
* Perform lookup from an egress perspective (default is
* ingress).
+ * **BPF_FIB_LOOKUP_SKIP_NEIGH**
+ * Skip the neighbour table lookup. *params*->dmac
+ * and *params*->smac will not be set as output. A common
+ * use case is to call **bpf_redirect_neigh**\ () after
+ * doing **bpf_fib_lookup**\ ().
*
* *ctx* is either **struct xdp_md** for XDP programs or
* **struct sk_buff** tc cls_act programs.
@@ -6678,6 +6683,7 @@ struct bpf_raw_tracepoint_args {
enum {
BPF_FIB_LOOKUP_DIRECT = (1U << 0),
BPF_FIB_LOOKUP_OUTPUT = (1U << 1),
+ BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2),
};
enum {