summaryrefslogtreecommitdiff
path: root/net/ipv6
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv6')
-rw-r--r--net/ipv6/Kconfig26
-rw-r--r--net/ipv6/Makefile7
-rw-r--r--net/ipv6/addrconf.c289
-rw-r--r--net/ipv6/addrlabel.c18
-rw-r--r--net/ipv6/af_inet6.c248
-rw-r--r--net/ipv6/ah6.c24
-rw-r--r--net/ipv6/anycast.c27
-rw-r--r--net/ipv6/datagram.c43
-rw-r--r--net/ipv6/esp6.c5
-rw-r--r--net/ipv6/exthdrs.c73
-rw-r--r--net/ipv6/exthdrs_core.c168
-rw-r--r--net/ipv6/exthdrs_offload.c41
-rw-r--r--net/ipv6/fib6_rules.c2
-rw-r--r--net/ipv6/icmp.c14
-rw-r--r--net/ipv6/inet6_connection_sock.c27
-rw-r--r--net/ipv6/inet6_hashtables.c46
-rw-r--r--net/ipv6/ip6_checksum.c97
-rw-r--r--net/ipv6/ip6_fib.c69
-rw-r--r--net/ipv6/ip6_flowlabel.c174
-rw-r--r--net/ipv6/ip6_gre.c48
-rw-r--r--net/ipv6/ip6_input.c44
-rw-r--r--net/ipv6/ip6_offload.c283
-rw-r--r--net/ipv6/ip6_offload.h18
-rw-r--r--net/ipv6/ip6_output.c153
-rw-r--r--net/ipv6/ip6_tunnel.c292
-rw-r--r--net/ipv6/ip6mr.c296
-rw-r--r--net/ipv6/ipv6_sockglue.c16
-rw-r--r--net/ipv6/mcast.c98
-rw-r--r--net/ipv6/ndisc.c422
-rw-r--r--net/ipv6/netfilter/ip6_tables.c127
-rw-r--r--net/ipv6/netfilter/ip6t_NPT.c47
-rw-r--r--net/ipv6/netfilter/ip6t_REJECT.c3
-rw-r--r--net/ipv6/netfilter/ip6t_rpfilter.c2
-rw-r--r--net/ipv6/netfilter/ip6table_mangle.c5
-rw-r--r--net/ipv6/netfilter/ip6table_nat.c15
-rw-r--r--net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c168
-rw-r--r--net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c4
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c24
-rw-r--r--net/ipv6/netfilter/nf_defrag_ipv6_hooks.c6
-rw-r--r--net/ipv6/netfilter/nf_nat_proto_icmpv6.c2
-rw-r--r--net/ipv6/output_core.c76
-rw-r--r--net/ipv6/proc.c16
-rw-r--r--net/ipv6/protocol.c25
-rw-r--r--net/ipv6/raw.c19
-rw-r--r--net/ipv6/reassembly.c32
-rw-r--r--net/ipv6/route.c394
-rw-r--r--net/ipv6/sit.c512
-rw-r--r--net/ipv6/syncookies.c4
-rw-r--r--net/ipv6/tcp_ipv6.c167
-rw-r--r--net/ipv6/tcpv6_offload.c95
-rw-r--r--net/ipv6/udp.c164
-rw-r--r--net/ipv6/udp_offload.c121
-rw-r--r--net/ipv6/xfrm6_mode_tunnel.c4
-rw-r--r--net/ipv6/xfrm6_policy.c74
-rw-r--r--net/ipv6/xfrm6_state.c4
-rw-r--r--net/ipv6/xfrm6_tunnel.c16
56 files changed, 3287 insertions, 1907 deletions
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 4f7fe7270e37..ed0b9e2e797a 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -11,7 +11,7 @@ menuconfig IPV6
You will still be able to do traditional IPv4 networking as well.
For general information about IPv6, see
- <http://playground.sun.com/pub/ipng/html/ipng-main.html>.
+ <https://en.wikipedia.org/wiki/IPv6>.
For Linux IPv6 development information, see <http://www.linux-ipv6.org>.
For specific information about IPv6 under Linux, read the HOWTO at
<http://www.bieringer.de/linux/IPv6/>.
@@ -50,16 +50,15 @@ config IPV6_ROUTER_PREF
If unsure, say N.
config IPV6_ROUTE_INFO
- bool "IPv6: Route Information (RFC 4191) support (EXPERIMENTAL)"
- depends on IPV6_ROUTER_PREF && EXPERIMENTAL
+ bool "IPv6: Route Information (RFC 4191) support"
+ depends on IPV6_ROUTER_PREF
---help---
This is experimental support of Route Information.
If unsure, say N.
config IPV6_OPTIMISTIC_DAD
- bool "IPv6: Enable RFC 4429 Optimistic DAD (EXPERIMENTAL)"
- depends on EXPERIMENTAL
+ bool "IPv6: Enable RFC 4429 Optimistic DAD"
---help---
This is experimental support for optimistic Duplicate
Address Detection. It allows for autoconfigured addresses
@@ -105,8 +104,7 @@ config INET6_IPCOMP
If unsure, say Y.
config IPV6_MIP6
- tristate "IPv6: Mobility (EXPERIMENTAL)"
- depends on EXPERIMENTAL
+ tristate "IPv6: Mobility"
select XFRM
---help---
Support for IPv6 Mobility described in RFC 3775.
@@ -150,8 +148,7 @@ config INET6_XFRM_MODE_BEET
If unsure, say Y.
config INET6_XFRM_MODE_ROUTEOPTIMIZATION
- tristate "IPv6: MIPv6 route optimization mode (EXPERIMENTAL)"
- depends on EXPERIMENTAL
+ tristate "IPv6: MIPv6 route optimization mode"
select XFRM
---help---
Support for MIPv6 route optimization mode.
@@ -171,8 +168,8 @@ config IPV6_SIT
Saying M here will produce a module called sit. If unsure, say Y.
config IPV6_SIT_6RD
- bool "IPv6: IPv6 Rapid Deployment (6RD) (EXPERIMENTAL)"
- depends on IPV6_SIT && EXPERIMENTAL
+ bool "IPv6: IPv6 Rapid Deployment (6RD)"
+ depends on IPV6_SIT
default n
---help---
IPv6 Rapid Deployment (6rd; draft-ietf-softwire-ipv6-6rd) builds upon
@@ -219,7 +216,6 @@ config IPV6_GRE
config IPV6_MULTIPLE_TABLES
bool "IPv6: Multiple Routing Tables"
- depends on EXPERIMENTAL
select FIB_RULES
---help---
Support multiple routing tables.
@@ -239,8 +235,8 @@ config IPV6_SUBTREES
If unsure, say N.
config IPV6_MROUTE
- bool "IPv6: multicast routing (EXPERIMENTAL)"
- depends on IPV6 && EXPERIMENTAL
+ bool "IPv6: multicast routing"
+ depends on IPV6
---help---
Experimental support for IPv6 multicast forwarding.
If unsure, say N.
@@ -260,7 +256,7 @@ config IPV6_MROUTE_MULTIPLE_TABLES
If unsure, say N.
config IPV6_PIMSM_V2
- bool "IPv6: PIM-SM version 2 support (EXPERIMENTAL)"
+ bool "IPv6: PIM-SM version 2 support"
depends on IPV6_MROUTE
---help---
Support for IPv6 PIM multicast routing protocol PIM-SMv2.
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index b6d3f79151e2..309af19a0a0a 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -7,9 +7,11 @@ obj-$(CONFIG_IPV6) += ipv6.o
ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \
addrlabel.o \
route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \
- raw.o protocol.o icmp.o mcast.o reassembly.o tcp_ipv6.o \
+ raw.o icmp.o mcast.o reassembly.o tcp_ipv6.o \
exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o
+ipv6-offload := ip6_offload.o tcpv6_offload.o udp_offload.o exthdrs_offload.o
+
ipv6-$(CONFIG_SYSCTL) = sysctl_net_ipv6.o
ipv6-$(CONFIG_IPV6_MROUTE) += ip6mr.o
@@ -38,6 +40,7 @@ obj-$(CONFIG_IPV6_SIT) += sit.o
obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o
obj-$(CONFIG_IPV6_GRE) += ip6_gre.o
-obj-y += addrconf_core.o exthdrs_core.o
+obj-y += addrconf_core.o exthdrs_core.o ip6_checksum.o
+obj-$(CONFIG_INET) += output_core.o protocol.o $(ipv6-offload)
obj-$(subst m,y,$(CONFIG_IPV6)) += inet6_hashtables.o
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 0424e4e27414..f2c7e615f902 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -81,6 +81,7 @@
#include <net/pkt_sched.h>
#include <linux/if_tunnel.h>
#include <linux/rtnetlink.h>
+#include <linux/netconf.h>
#ifdef CONFIG_IPV6_PRIVACY
#include <linux/random.h>
@@ -109,10 +110,6 @@ static inline u32 cstamp_delta(unsigned long cstamp)
return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
}
-#define ADDRCONF_TIMER_FUZZ_MINUS (HZ > 50 ? HZ/50 : 1)
-#define ADDRCONF_TIMER_FUZZ (HZ / 4)
-#define ADDRCONF_TIMER_FUZZ_MAX (HZ)
-
#ifdef CONFIG_SYSCTL
static void addrconf_sysctl_register(struct inet6_dev *idev);
static void addrconf_sysctl_unregister(struct inet6_dev *idev);
@@ -153,6 +150,11 @@ static void addrconf_type_change(struct net_device *dev,
unsigned long event);
static int addrconf_ifdown(struct net_device *dev, int how);
+static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
+ int plen,
+ const struct net_device *dev,
+ u32 flags, u32 noflags);
+
static void addrconf_dad_start(struct inet6_ifaddr *ifp);
static void addrconf_dad_timer(unsigned long data);
static void addrconf_dad_completed(struct inet6_ifaddr *ifp);
@@ -242,6 +244,9 @@ const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT;
const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT;
const struct in6_addr in6addr_linklocal_allnodes = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
const struct in6_addr in6addr_linklocal_allrouters = IN6ADDR_LINKLOCAL_ALLROUTERS_INIT;
+const struct in6_addr in6addr_interfacelocal_allnodes = IN6ADDR_INTERFACELOCAL_ALLNODES_INIT;
+const struct in6_addr in6addr_interfacelocal_allrouters = IN6ADDR_INTERFACELOCAL_ALLROUTERS_INIT;
+const struct in6_addr in6addr_sitelocal_allrouters = IN6ADDR_SITELOCAL_ALLROUTERS_INIT;
/* Check if a valid qdisc is available */
static inline bool addrconf_qdisc_ok(const struct net_device *dev)
@@ -249,12 +254,6 @@ static inline bool addrconf_qdisc_ok(const struct net_device *dev)
return !qdisc_tx_is_noop(dev);
}
-/* Check if a route is valid prefix route */
-static inline int addrconf_is_prefix_route(const struct rt6_info *rt)
-{
- return (rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0;
-}
-
static void addrconf_del_timer(struct inet6_ifaddr *ifp)
{
if (del_timer(&ifp->timer))
@@ -401,7 +400,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
ndev->cnf.accept_dad = -1;
-#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_SIT)
if (dev->type == ARPHRD_SIT && (dev->priv_flags & IFF_ISATAP)) {
pr_info("%s: Disabled Multicast RS\n", dev->name);
ndev->cnf.rtr_solicits = 0;
@@ -432,6 +431,9 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
/* protected by rtnl_lock */
rcu_assign_pointer(dev->ip6_ptr, ndev);
+ /* Join interface-local all-node multicast group */
+ ipv6_dev_mc_inc(dev, &in6addr_interfacelocal_allnodes);
+
/* Join all-node multicast group */
ipv6_dev_mc_inc(dev, &in6addr_linklocal_allnodes);
@@ -460,6 +462,149 @@ static struct inet6_dev *ipv6_find_idev(struct net_device *dev)
return idev;
}
+static int inet6_netconf_msgsize_devconf(int type)
+{
+ int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
+ + nla_total_size(4); /* NETCONFA_IFINDEX */
+
+ /* type -1 is used for ALL */
+ if (type == -1 || type == NETCONFA_FORWARDING)
+ size += nla_total_size(4);
+#ifdef CONFIG_IPV6_MROUTE
+ if (type == -1 || type == NETCONFA_MC_FORWARDING)
+ size += nla_total_size(4);
+#endif
+
+ return size;
+}
+
+static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
+ struct ipv6_devconf *devconf, u32 portid,
+ u32 seq, int event, unsigned int flags,
+ int type)
+{
+ struct nlmsghdr *nlh;
+ struct netconfmsg *ncm;
+
+ nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
+ flags);
+ if (nlh == NULL)
+ return -EMSGSIZE;
+
+ ncm = nlmsg_data(nlh);
+ ncm->ncm_family = AF_INET6;
+
+ if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
+ goto nla_put_failure;
+
+ /* type -1 is used for ALL */
+ if ((type == -1 || type == NETCONFA_FORWARDING) &&
+ nla_put_s32(skb, NETCONFA_FORWARDING, devconf->forwarding) < 0)
+ goto nla_put_failure;
+#ifdef CONFIG_IPV6_MROUTE
+ if ((type == -1 || type == NETCONFA_MC_FORWARDING) &&
+ nla_put_s32(skb, NETCONFA_MC_FORWARDING,
+ devconf->mc_forwarding) < 0)
+ goto nla_put_failure;
+#endif
+ return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+ nlmsg_cancel(skb, nlh);
+ return -EMSGSIZE;
+}
+
+void inet6_netconf_notify_devconf(struct net *net, int type, int ifindex,
+ struct ipv6_devconf *devconf)
+{
+ struct sk_buff *skb;
+ int err = -ENOBUFS;
+
+ skb = nlmsg_new(inet6_netconf_msgsize_devconf(type), GFP_ATOMIC);
+ if (skb == NULL)
+ goto errout;
+
+ err = inet6_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
+ RTM_NEWNETCONF, 0, type);
+ if (err < 0) {
+ /* -EMSGSIZE implies BUG in inet6_netconf_msgsize_devconf() */
+ WARN_ON(err == -EMSGSIZE);
+ kfree_skb(skb);
+ goto errout;
+ }
+ rtnl_notify(skb, net, 0, RTNLGRP_IPV6_NETCONF, NULL, GFP_ATOMIC);
+ return;
+errout:
+ rtnl_set_sk_err(net, RTNLGRP_IPV6_NETCONF, err);
+}
+
+static const struct nla_policy devconf_ipv6_policy[NETCONFA_MAX+1] = {
+ [NETCONFA_IFINDEX] = { .len = sizeof(int) },
+ [NETCONFA_FORWARDING] = { .len = sizeof(int) },
+};
+
+static int inet6_netconf_get_devconf(struct sk_buff *in_skb,
+ struct nlmsghdr *nlh,
+ void *arg)
+{
+ struct net *net = sock_net(in_skb->sk);
+ struct nlattr *tb[NETCONFA_MAX+1];
+ struct netconfmsg *ncm;
+ struct sk_buff *skb;
+ struct ipv6_devconf *devconf;
+ struct inet6_dev *in6_dev;
+ struct net_device *dev;
+ int ifindex;
+ int err;
+
+ err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
+ devconf_ipv6_policy);
+ if (err < 0)
+ goto errout;
+
+ err = EINVAL;
+ if (!tb[NETCONFA_IFINDEX])
+ goto errout;
+
+ ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
+ switch (ifindex) {
+ case NETCONFA_IFINDEX_ALL:
+ devconf = net->ipv6.devconf_all;
+ break;
+ case NETCONFA_IFINDEX_DEFAULT:
+ devconf = net->ipv6.devconf_dflt;
+ break;
+ default:
+ dev = __dev_get_by_index(net, ifindex);
+ if (dev == NULL)
+ goto errout;
+ in6_dev = __in6_dev_get(dev);
+ if (in6_dev == NULL)
+ goto errout;
+ devconf = &in6_dev->cnf;
+ break;
+ }
+
+ err = -ENOBUFS;
+ skb = nlmsg_new(inet6_netconf_msgsize_devconf(-1), GFP_ATOMIC);
+ if (skb == NULL)
+ goto errout;
+
+ err = inet6_netconf_fill_devconf(skb, ifindex, devconf,
+ NETLINK_CB(in_skb).portid,
+ nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
+ -1);
+ if (err < 0) {
+ /* -EMSGSIZE implies BUG in inet6_netconf_msgsize_devconf() */
+ WARN_ON(err == -EMSGSIZE);
+ kfree_skb(skb);
+ goto errout;
+ }
+ err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
+errout:
+ return err;
+}
+
#ifdef CONFIG_SYSCTL
static void dev_forward_change(struct inet6_dev *idev)
{
@@ -471,11 +616,16 @@ static void dev_forward_change(struct inet6_dev *idev)
dev = idev->dev;
if (idev->cnf.forwarding)
dev_disable_lro(dev);
- if (dev && (dev->flags & IFF_MULTICAST)) {
- if (idev->cnf.forwarding)
+ if (dev->flags & IFF_MULTICAST) {
+ if (idev->cnf.forwarding) {
ipv6_dev_mc_inc(dev, &in6addr_linklocal_allrouters);
- else
+ ipv6_dev_mc_inc(dev, &in6addr_interfacelocal_allrouters);
+ ipv6_dev_mc_inc(dev, &in6addr_sitelocal_allrouters);
+ } else {
ipv6_dev_mc_dec(dev, &in6addr_linklocal_allrouters);
+ ipv6_dev_mc_dec(dev, &in6addr_interfacelocal_allrouters);
+ ipv6_dev_mc_dec(dev, &in6addr_sitelocal_allrouters);
+ }
}
list_for_each_entry(ifa, &idev->addr_list, if_list) {
@@ -486,6 +636,8 @@ static void dev_forward_change(struct inet6_dev *idev)
else
addrconf_leave_anycast(ifa);
}
+ inet6_netconf_notify_devconf(dev_net(dev), NETCONFA_FORWARDING,
+ dev->ifindex, &idev->cnf);
}
@@ -518,6 +670,10 @@ static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int newf)
*p = newf;
if (p == &net->ipv6.devconf_dflt->forwarding) {
+ if ((!newf) ^ (!old))
+ inet6_netconf_notify_devconf(net, NETCONFA_FORWARDING,
+ NETCONFA_IFINDEX_DEFAULT,
+ net->ipv6.devconf_dflt);
rtnl_unlock();
return 0;
}
@@ -525,6 +681,10 @@ static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int newf)
if (p == &net->ipv6.devconf_all->forwarding) {
net->ipv6.devconf_dflt->forwarding = newf;
addrconf_forward_change(net, newf);
+ if ((!newf) ^ (!old))
+ inet6_netconf_notify_devconf(net, NETCONFA_FORWARDING,
+ NETCONFA_IFINDEX_ALL,
+ net->ipv6.devconf_all);
} else if ((!newf) ^ (!old))
dev_forward_change((struct inet6_dev *)table->extra1);
rtnl_unlock();
@@ -553,7 +713,7 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)
pr_warn("Freeing alive inet6 address %p\n", ifp);
return;
}
- dst_release(&ifp->rt->dst);
+ ip6_rt_put(ifp->rt);
kfree_rcu(ifp, rcu);
}
@@ -787,17 +947,15 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
if ((ifp->flags & IFA_F_PERMANENT) && onlink < 1) {
struct in6_addr prefix;
struct rt6_info *rt;
- struct net *net = dev_net(ifp->idev->dev);
- struct flowi6 fl6 = {};
ipv6_addr_prefix(&prefix, &ifp->addr, ifp->prefix_len);
- fl6.flowi6_oif = ifp->idev->dev->ifindex;
- fl6.daddr = prefix;
- rt = (struct rt6_info *)ip6_route_lookup(net, &fl6,
- RT6_LOOKUP_F_IFACE);
- if (rt != net->ipv6.ip6_null_entry &&
- addrconf_is_prefix_route(rt)) {
+ rt = addrconf_get_prefix_route(&prefix,
+ ifp->prefix_len,
+ ifp->idev->dev,
+ 0, RTF_GATEWAY | RTF_DEFAULT);
+
+ if (rt) {
if (onlink == 0) {
ip6_del_rt(rt);
rt = NULL;
@@ -805,7 +963,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
rt6_set_expires(rt, expires);
}
}
- dst_release(&rt->dst);
+ ip6_rt_put(rt);
}
/* clean up prefsrc entries */
@@ -900,7 +1058,7 @@ retry:
ipv6_add_addr(idev, &addr, tmp_plen,
ipv6_addr_type(&addr)&IPV6_ADDR_SCOPE_MASK,
addr_flags) : NULL;
- if (!ift || IS_ERR(ift)) {
+ if (IS_ERR_OR_NULL(ift)) {
in6_ifa_put(ifp);
in6_dev_put(idev);
pr_info("%s: retry temporary address regeneration\n", __func__);
@@ -1261,11 +1419,10 @@ int ipv6_chk_addr(struct net *net, const struct in6_addr *addr,
struct net_device *dev, int strict)
{
struct inet6_ifaddr *ifp;
- struct hlist_node *node;
unsigned int hash = inet6_addr_hash(addr);
rcu_read_lock_bh();
- hlist_for_each_entry_rcu(ifp, node, &inet6_addr_lst[hash], addr_lst) {
+ hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) {
if (!net_eq(dev_net(ifp->idev->dev), net))
continue;
if (ipv6_addr_equal(&ifp->addr, addr) &&
@@ -1287,9 +1444,8 @@ static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
{
unsigned int hash = inet6_addr_hash(addr);
struct inet6_ifaddr *ifp;
- struct hlist_node *node;
- hlist_for_each_entry(ifp, node, &inet6_addr_lst[hash], addr_lst) {
+ hlist_for_each_entry(ifp, &inet6_addr_lst[hash], addr_lst) {
if (!net_eq(dev_net(ifp->idev->dev), net))
continue;
if (ipv6_addr_equal(&ifp->addr, addr)) {
@@ -1329,10 +1485,9 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *add
{
struct inet6_ifaddr *ifp, *result = NULL;
unsigned int hash = inet6_addr_hash(addr);
- struct hlist_node *node;
rcu_read_lock_bh();
- hlist_for_each_entry_rcu_bh(ifp, node, &inet6_addr_lst[hash], addr_lst) {
+ hlist_for_each_entry_rcu_bh(ifp, &inet6_addr_lst[hash], addr_lst) {
if (!net_eq(dev_net(ifp->idev->dev), net))
continue;
if (ipv6_addr_equal(&ifp->addr, addr)) {
@@ -1509,6 +1664,7 @@ static int addrconf_ifid_eui64(u8 *eui, struct net_device *dev)
if (dev->addr_len != IEEE802154_ADDR_LEN)
return -1;
memcpy(eui, dev->dev_addr, 8);
+ eui[0] ^= 2;
return 0;
}
@@ -1692,7 +1848,7 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev,
This thing is done here expecting that the whole
class of non-broadcast devices need not cloning.
*/
-#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_SIT)
if (dev->type == ARPHRD_SIT && (dev->flags & IFF_POINTOPOINT))
cfg.fc_flags |= RTF_NONEXTHOP;
#endif
@@ -1723,7 +1879,7 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
continue;
if ((rt->rt6i_flags & flags) != flags)
continue;
- if ((noflags != 0) && ((rt->rt6i_flags & flags) != 0))
+ if ((rt->rt6i_flags & noflags) != 0)
continue;
dst_hold(&rt->dst);
break;
@@ -1752,7 +1908,7 @@ static void addrconf_add_mroute(struct net_device *dev)
ip6_route_add(&cfg);
}
-#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_SIT)
static void sit_route_add(struct net_device *dev)
{
struct fib6_config cfg = {
@@ -1881,8 +2037,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
addrconf_prefix_route(&pinfo->prefix, pinfo->prefix_len,
dev, expires, flags);
}
- if (rt)
- dst_release(&rt->dst);
+ ip6_rt_put(rt);
}
/* Try to figure out our local address for this prefix */
@@ -1929,7 +2084,7 @@ ok:
addr_type&IPV6_ADDR_SCOPE_MASK,
addr_flags);
- if (!ifp || IS_ERR(ifp)) {
+ if (IS_ERR_OR_NULL(ifp)) {
in6_dev_put(in6_dev);
return;
}
@@ -2104,7 +2259,7 @@ int addrconf_set_dstaddr(struct net *net, void __user *arg)
if (dev == NULL)
goto err_exit;
-#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_SIT)
if (dev->type == ARPHRD_SIT) {
const struct net_device_ops *ops = dev->netdev_ops;
struct ifreq ifr;
@@ -2268,7 +2423,7 @@ int addrconf_add_ifaddr(struct net *net, void __user *arg)
struct in6_ifreq ireq;
int err;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EPERM;
if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq)))
@@ -2287,7 +2442,7 @@ int addrconf_del_ifaddr(struct net *net, void __user *arg)
struct in6_ifreq ireq;
int err;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EPERM;
if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq)))
@@ -2315,7 +2470,7 @@ static void add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
}
}
-#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_SIT)
static void sit_add_v4_addrs(struct inet6_dev *idev)
{
struct in6_addr addr;
@@ -2434,7 +2589,7 @@ static void addrconf_dev_config(struct net_device *dev)
addrconf_add_linklocal(idev, &addr);
}
-#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_SIT)
static void addrconf_sit_config(struct net_device *dev)
{
struct inet6_dev *idev;
@@ -2471,7 +2626,7 @@ static void addrconf_sit_config(struct net_device *dev)
}
#endif
-#if defined(CONFIG_NET_IPGRE) || defined(CONFIG_NET_IPGRE_MODULE)
+#if IS_ENABLED(CONFIG_NET_IPGRE)
static void addrconf_gre_config(struct net_device *dev)
{
struct inet6_dev *idev;
@@ -2601,12 +2756,12 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
}
switch (dev->type) {
-#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_SIT)
case ARPHRD_SIT:
addrconf_sit_config(dev);
break;
#endif
-#if defined(CONFIG_NET_IPGRE) || defined(CONFIG_NET_IPGRE_MODULE)
+#if IS_ENABLED(CONFIG_NET_IPGRE)
case ARPHRD_IPGRE:
addrconf_gre_config(dev);
break;
@@ -2749,11 +2904,10 @@ static int addrconf_ifdown(struct net_device *dev, int how)
/* Step 2: clear hash table */
for (i = 0; i < IN6_ADDR_HSIZE; i++) {
struct hlist_head *h = &inet6_addr_lst[i];
- struct hlist_node *n;
spin_lock_bh(&addrconf_hash_lock);
restart:
- hlist_for_each_entry_rcu(ifa, n, h, addr_lst) {
+ hlist_for_each_entry_rcu(ifa, h, addr_lst) {
if (ifa->idev == idev) {
hlist_del_init_rcu(&ifa->addr_lst);
addrconf_del_timer(ifa);
@@ -2843,7 +2997,7 @@ static void addrconf_rs_timer(unsigned long data)
if (idev->dead || !(idev->if_flags & IF_READY))
goto out;
- if (idev->cnf.forwarding)
+ if (!ipv6_accept_ra(idev))
goto out;
/* Announcement received after solicitation was sent */
@@ -3005,8 +3159,7 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp)
router advertisements, start sending router solicitations.
*/
- if (((ifp->idev->cnf.accept_ra == 1 && !ifp->idev->cnf.forwarding) ||
- ifp->idev->cnf.accept_ra == 2) &&
+ if (ipv6_accept_ra(ifp->idev) &&
ifp->idev->cnf.rtr_solicits > 0 &&
(dev->flags&IFF_LOOPBACK) == 0 &&
(ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)) {
@@ -3061,8 +3214,7 @@ static struct inet6_ifaddr *if6_get_first(struct seq_file *seq, loff_t pos)
}
for (; state->bucket < IN6_ADDR_HSIZE; ++state->bucket) {
- struct hlist_node *n;
- hlist_for_each_entry_rcu_bh(ifa, n, &inet6_addr_lst[state->bucket],
+ hlist_for_each_entry_rcu_bh(ifa, &inet6_addr_lst[state->bucket],
addr_lst) {
if (!net_eq(dev_net(ifa->idev->dev), net))
continue;
@@ -3087,9 +3239,8 @@ static struct inet6_ifaddr *if6_get_next(struct seq_file *seq,
{
struct if6_iter_state *state = seq->private;
struct net *net = seq_file_net(seq);
- struct hlist_node *n = &ifa->addr_lst;
- hlist_for_each_entry_continue_rcu_bh(ifa, n, addr_lst) {
+ hlist_for_each_entry_continue_rcu_bh(ifa, addr_lst) {
if (!net_eq(dev_net(ifa->idev->dev), net))
continue;
state->offset++;
@@ -3098,7 +3249,7 @@ static struct inet6_ifaddr *if6_get_next(struct seq_file *seq,
while (++state->bucket < IN6_ADDR_HSIZE) {
state->offset = 0;
- hlist_for_each_entry_rcu_bh(ifa, n,
+ hlist_for_each_entry_rcu_bh(ifa,
&inet6_addr_lst[state->bucket], addr_lst) {
if (!net_eq(dev_net(ifa->idev->dev), net))
continue;
@@ -3168,14 +3319,14 @@ static const struct file_operations if6_fops = {
static int __net_init if6_proc_net_init(struct net *net)
{
- if (!proc_net_fops_create(net, "if_inet6", S_IRUGO, &if6_fops))
+ if (!proc_create("if_inet6", S_IRUGO, net->proc_net, &if6_fops))
return -ENOMEM;
return 0;
}
static void __net_exit if6_proc_net_exit(struct net *net)
{
- proc_net_remove(net, "if_inet6");
+ remove_proc_entry("if_inet6", net->proc_net);
}
static struct pernet_operations if6_proc_net_ops = {
@@ -3194,17 +3345,16 @@ void if6_proc_exit(void)
}
#endif /* CONFIG_PROC_FS */
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
/* Check if address is a home address configured on any interface. */
int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr)
{
int ret = 0;
struct inet6_ifaddr *ifp = NULL;
- struct hlist_node *n;
unsigned int hash = inet6_addr_hash(addr);
rcu_read_lock_bh();
- hlist_for_each_entry_rcu_bh(ifp, n, &inet6_addr_lst[hash], addr_lst) {
+ hlist_for_each_entry_rcu_bh(ifp, &inet6_addr_lst[hash], addr_lst) {
if (!net_eq(dev_net(ifp->idev->dev), net))
continue;
if (ipv6_addr_equal(&ifp->addr, addr) &&
@@ -3226,7 +3376,6 @@ static void addrconf_verify(unsigned long foo)
{
unsigned long now, next, next_sec, next_sched;
struct inet6_ifaddr *ifp;
- struct hlist_node *node;
int i;
rcu_read_lock_bh();
@@ -3238,7 +3387,7 @@ static void addrconf_verify(unsigned long foo)
for (i = 0; i < IN6_ADDR_HSIZE; i++) {
restart:
- hlist_for_each_entry_rcu_bh(ifp, node,
+ hlist_for_each_entry_rcu_bh(ifp,
&inet6_addr_lst[i], addr_lst) {
unsigned long age;
@@ -3709,7 +3858,6 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
struct net_device *dev;
struct inet6_dev *idev;
struct hlist_head *head;
- struct hlist_node *node;
s_h = cb->args[0];
s_idx = idx = cb->args[1];
@@ -3719,7 +3867,7 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
idx = 0;
head = &net->dev_index_head[h];
- hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
+ hlist_for_each_entry_rcu(dev, head, index_hlist) {
if (idx < s_idx)
goto cont;
if (h > s_h || idx > s_idx)
@@ -3892,6 +4040,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
array[DEVCONF_DISABLE_IPV6] = cnf->disable_ipv6;
array[DEVCONF_ACCEPT_DAD] = cnf->accept_dad;
array[DEVCONF_FORCE_TLLAO] = cnf->force_tllao;
+ array[DEVCONF_NDISC_NOTIFY] = cnf->ndisc_notify;
}
static inline size_t inet6_ifla6_size(void)
@@ -4064,7 +4213,6 @@ static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
struct net_device *dev;
struct inet6_dev *idev;
struct hlist_head *head;
- struct hlist_node *node;
s_h = cb->args[0];
s_idx = cb->args[1];
@@ -4073,7 +4221,7 @@ static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
idx = 0;
head = &net->dev_index_head[h];
- hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
+ hlist_for_each_entry_rcu(dev, head, index_hlist) {
if (idx < s_idx)
goto cont;
idev = __in6_dev_get(dev);
@@ -4560,6 +4708,13 @@ static struct addrconf_sysctl_table
.proc_handler = proc_dointvec
},
{
+ .procname = "ndisc_notify",
+ .data = &ipv6_devconf.ndisc_notify,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
/* sentinel */
}
},
@@ -4784,6 +4939,8 @@ int __init addrconf_init(void)
inet6_dump_ifmcaddr, NULL);
__rtnl_register(PF_INET6, RTM_GETANYCAST, NULL,
inet6_dump_ifacaddr, NULL);
+ __rtnl_register(PF_INET6, RTM_GETNETCONF, inet6_netconf_get_devconf,
+ NULL, NULL);
ipv6_addr_label_rtnl_register();
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index ff76eecfd622..aad64352cb60 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -173,9 +173,8 @@ static struct ip6addrlbl_entry *__ipv6_addr_label(struct net *net,
const struct in6_addr *addr,
int type, int ifindex)
{
- struct hlist_node *pos;
struct ip6addrlbl_entry *p;
- hlist_for_each_entry_rcu(p, pos, &ip6addrlbl_table.head, list) {
+ hlist_for_each_entry_rcu(p, &ip6addrlbl_table.head, list) {
if (__ip6addrlbl_match(net, p, addr, type, ifindex))
return p;
}
@@ -261,9 +260,9 @@ static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace)
if (hlist_empty(&ip6addrlbl_table.head)) {
hlist_add_head_rcu(&newp->list, &ip6addrlbl_table.head);
} else {
- struct hlist_node *pos, *n;
+ struct hlist_node *n;
struct ip6addrlbl_entry *p = NULL;
- hlist_for_each_entry_safe(p, pos, n,
+ hlist_for_each_entry_safe(p, n,
&ip6addrlbl_table.head, list) {
if (p->prefixlen == newp->prefixlen &&
net_eq(ip6addrlbl_net(p), ip6addrlbl_net(newp)) &&
@@ -319,13 +318,13 @@ static int __ip6addrlbl_del(struct net *net,
int ifindex)
{
struct ip6addrlbl_entry *p = NULL;
- struct hlist_node *pos, *n;
+ struct hlist_node *n;
int ret = -ESRCH;
ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d)\n",
__func__, prefix, prefixlen, ifindex);
- hlist_for_each_entry_safe(p, pos, n, &ip6addrlbl_table.head, list) {
+ hlist_for_each_entry_safe(p, n, &ip6addrlbl_table.head, list) {
if (p->prefixlen == prefixlen &&
net_eq(ip6addrlbl_net(p), net) &&
p->ifindex == ifindex &&
@@ -380,11 +379,11 @@ static int __net_init ip6addrlbl_net_init(struct net *net)
static void __net_exit ip6addrlbl_net_exit(struct net *net)
{
struct ip6addrlbl_entry *p = NULL;
- struct hlist_node *pos, *n;
+ struct hlist_node *n;
/* Remove all labels belonging to the exiting net */
spin_lock(&ip6addrlbl_table.lock);
- hlist_for_each_entry_safe(p, pos, n, &ip6addrlbl_table.head, list) {
+ hlist_for_each_entry_safe(p, n, &ip6addrlbl_table.head, list) {
if (net_eq(ip6addrlbl_net(p), net)) {
hlist_del_rcu(&p->list);
ip6addrlbl_put(p);
@@ -505,12 +504,11 @@ static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
struct net *net = sock_net(skb->sk);
struct ip6addrlbl_entry *p;
- struct hlist_node *pos;
int idx = 0, s_idx = cb->args[0];
int err;
rcu_read_lock();
- hlist_for_each_entry_rcu(p, pos, &ip6addrlbl_table.head, list) {
+ hlist_for_each_entry_rcu(p, &ip6addrlbl_table.head, list) {
if (idx >= s_idx &&
net_eq(ip6addrlbl_net(p), net)) {
if ((err = ip6addrlbl_fill(skb, p,
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index a974247a9ae4..6b793bfc0e10 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -160,7 +160,8 @@ lookup_protocol:
}
err = -EPERM;
- if (sock->type == SOCK_RAW && !kern && !capable(CAP_NET_RAW))
+ if (sock->type == SOCK_RAW && !kern &&
+ !ns_capable(net->user_ns, CAP_NET_RAW))
goto out_rcu_unlock;
sock->ops = answer->ops;
@@ -282,7 +283,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
return -EINVAL;
snum = ntohs(addr->sin6_port);
- if (snum && snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
+ if (snum && snum < PROT_SOCK && !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE))
return -EACCES;
lock_sock(sk);
@@ -699,249 +700,9 @@ bool ipv6_opt_accepted(const struct sock *sk, const struct sk_buff *skb)
}
EXPORT_SYMBOL_GPL(ipv6_opt_accepted);
-static int ipv6_gso_pull_exthdrs(struct sk_buff *skb, int proto)
-{
- const struct inet6_protocol *ops = NULL;
-
- for (;;) {
- struct ipv6_opt_hdr *opth;
- int len;
-
- if (proto != NEXTHDR_HOP) {
- ops = rcu_dereference(inet6_protos[proto]);
-
- if (unlikely(!ops))
- break;
-
- if (!(ops->flags & INET6_PROTO_GSO_EXTHDR))
- break;
- }
-
- if (unlikely(!pskb_may_pull(skb, 8)))
- break;
-
- opth = (void *)skb->data;
- len = ipv6_optlen(opth);
-
- if (unlikely(!pskb_may_pull(skb, len)))
- break;
-
- proto = opth->nexthdr;
- __skb_pull(skb, len);
- }
-
- return proto;
-}
-
-static int ipv6_gso_send_check(struct sk_buff *skb)
-{
- const struct ipv6hdr *ipv6h;
- const struct inet6_protocol *ops;
- int err = -EINVAL;
-
- if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h))))
- goto out;
-
- ipv6h = ipv6_hdr(skb);
- __skb_pull(skb, sizeof(*ipv6h));
- err = -EPROTONOSUPPORT;
-
- rcu_read_lock();
- ops = rcu_dereference(inet6_protos[
- ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr)]);
-
- if (likely(ops && ops->gso_send_check)) {
- skb_reset_transport_header(skb);
- err = ops->gso_send_check(skb);
- }
- rcu_read_unlock();
-
-out:
- return err;
-}
-
-static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
- netdev_features_t features)
-{
- struct sk_buff *segs = ERR_PTR(-EINVAL);
- struct ipv6hdr *ipv6h;
- const struct inet6_protocol *ops;
- int proto;
- struct frag_hdr *fptr;
- unsigned int unfrag_ip6hlen;
- u8 *prevhdr;
- int offset = 0;
-
- if (!(features & NETIF_F_V6_CSUM))
- features &= ~NETIF_F_SG;
-
- if (unlikely(skb_shinfo(skb)->gso_type &
- ~(SKB_GSO_UDP |
- SKB_GSO_DODGY |
- SKB_GSO_TCP_ECN |
- SKB_GSO_TCPV6 |
- 0)))
- goto out;
-
- if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h))))
- goto out;
-
- ipv6h = ipv6_hdr(skb);
- __skb_pull(skb, sizeof(*ipv6h));
- segs = ERR_PTR(-EPROTONOSUPPORT);
-
- proto = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr);
- rcu_read_lock();
- ops = rcu_dereference(inet6_protos[proto]);
- if (likely(ops && ops->gso_segment)) {
- skb_reset_transport_header(skb);
- segs = ops->gso_segment(skb, features);
- }
- rcu_read_unlock();
-
- if (IS_ERR(segs))
- goto out;
-
- for (skb = segs; skb; skb = skb->next) {
- ipv6h = ipv6_hdr(skb);
- ipv6h->payload_len = htons(skb->len - skb->mac_len -
- sizeof(*ipv6h));
- if (proto == IPPROTO_UDP) {
- unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr);
- fptr = (struct frag_hdr *)(skb_network_header(skb) +
- unfrag_ip6hlen);
- fptr->frag_off = htons(offset);
- if (skb->next != NULL)
- fptr->frag_off |= htons(IP6_MF);
- offset += (ntohs(ipv6h->payload_len) -
- sizeof(struct frag_hdr));
- }
- }
-
-out:
- return segs;
-}
-
-static struct sk_buff **ipv6_gro_receive(struct sk_buff **head,
- struct sk_buff *skb)
-{
- const struct inet6_protocol *ops;
- struct sk_buff **pp = NULL;
- struct sk_buff *p;
- struct ipv6hdr *iph;
- unsigned int nlen;
- unsigned int hlen;
- unsigned int off;
- int flush = 1;
- int proto;
- __wsum csum;
-
- off = skb_gro_offset(skb);
- hlen = off + sizeof(*iph);
- iph = skb_gro_header_fast(skb, off);
- if (skb_gro_header_hard(skb, hlen)) {
- iph = skb_gro_header_slow(skb, hlen, off);
- if (unlikely(!iph))
- goto out;
- }
-
- skb_gro_pull(skb, sizeof(*iph));
- skb_set_transport_header(skb, skb_gro_offset(skb));
-
- flush += ntohs(iph->payload_len) != skb_gro_len(skb);
-
- rcu_read_lock();
- proto = iph->nexthdr;
- ops = rcu_dereference(inet6_protos[proto]);
- if (!ops || !ops->gro_receive) {
- __pskb_pull(skb, skb_gro_offset(skb));
- proto = ipv6_gso_pull_exthdrs(skb, proto);
- skb_gro_pull(skb, -skb_transport_offset(skb));
- skb_reset_transport_header(skb);
- __skb_push(skb, skb_gro_offset(skb));
-
- ops = rcu_dereference(inet6_protos[proto]);
- if (!ops || !ops->gro_receive)
- goto out_unlock;
-
- iph = ipv6_hdr(skb);
- }
-
- NAPI_GRO_CB(skb)->proto = proto;
-
- flush--;
- nlen = skb_network_header_len(skb);
-
- for (p = *head; p; p = p->next) {
- const struct ipv6hdr *iph2;
- __be32 first_word; /* <Version:4><Traffic_Class:8><Flow_Label:20> */
-
- if (!NAPI_GRO_CB(p)->same_flow)
- continue;
-
- iph2 = ipv6_hdr(p);
- first_word = *(__be32 *)iph ^ *(__be32 *)iph2 ;
-
- /* All fields must match except length and Traffic Class. */
- if (nlen != skb_network_header_len(p) ||
- (first_word & htonl(0xF00FFFFF)) ||
- memcmp(&iph->nexthdr, &iph2->nexthdr,
- nlen - offsetof(struct ipv6hdr, nexthdr))) {
- NAPI_GRO_CB(p)->same_flow = 0;
- continue;
- }
- /* flush if Traffic Class fields are different */
- NAPI_GRO_CB(p)->flush |= !!(first_word & htonl(0x0FF00000));
- NAPI_GRO_CB(p)->flush |= flush;
- }
-
- NAPI_GRO_CB(skb)->flush |= flush;
-
- csum = skb->csum;
- skb_postpull_rcsum(skb, iph, skb_network_header_len(skb));
-
- pp = ops->gro_receive(head, skb);
-
- skb->csum = csum;
-
-out_unlock:
- rcu_read_unlock();
-
-out:
- NAPI_GRO_CB(skb)->flush |= flush;
-
- return pp;
-}
-
-static int ipv6_gro_complete(struct sk_buff *skb)
-{
- const struct inet6_protocol *ops;
- struct ipv6hdr *iph = ipv6_hdr(skb);
- int err = -ENOSYS;
-
- iph->payload_len = htons(skb->len - skb_network_offset(skb) -
- sizeof(*iph));
-
- rcu_read_lock();
- ops = rcu_dereference(inet6_protos[NAPI_GRO_CB(skb)->proto]);
- if (WARN_ON(!ops || !ops->gro_complete))
- goto out_unlock;
-
- err = ops->gro_complete(skb);
-
-out_unlock:
- rcu_read_unlock();
-
- return err;
-}
-
static struct packet_type ipv6_packet_type __read_mostly = {
.type = cpu_to_be16(ETH_P_IPV6),
.func = ipv6_rcv,
- .gso_send_check = ipv6_gso_send_check,
- .gso_segment = ipv6_gso_segment,
- .gro_receive = ipv6_gro_receive,
- .gro_complete = ipv6_gro_complete,
};
static int __init ipv6_packet_init(void)
@@ -1050,11 +811,10 @@ static struct pernet_operations inet6_net_ops = {
static int __init inet6_init(void)
{
- struct sk_buff *dummy_skb;
struct list_head *r;
int err = 0;
- BUILD_BUG_ON(sizeof(struct inet6_skb_parm) > sizeof(dummy_skb->cb));
+ BUILD_BUG_ON(sizeof(struct inet6_skb_parm) > FIELD_SIZEOF(struct sk_buff, cb));
/* Register the socket-side information for inet6_create. */
for (r = &inetsw6[0]; r < &inetsw6[SOCK_MAX]; ++r)
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 7e6139508ee7..bb02e176cb70 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -44,7 +44,7 @@
#define IPV6HDR_BASELEN 8
struct tmp_ext {
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
struct in6_addr saddr;
#endif
struct in6_addr daddr;
@@ -152,7 +152,7 @@ bad:
return false;
}
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
/**
* ipv6_rearrange_destopt - rearrange IPv6 destination options header
* @iph: IPv6 header
@@ -320,7 +320,7 @@ static void ah6_output_done(struct crypto_async_request *base, int err)
memcpy(top_iph, iph_base, IPV6HDR_BASELEN);
if (extlen) {
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
memcpy(&top_iph->saddr, iph_ext, extlen);
#else
memcpy(&top_iph->daddr, iph_ext, extlen);
@@ -385,7 +385,7 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
memcpy(iph_base, top_iph, IPV6HDR_BASELEN);
if (extlen) {
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
memcpy(iph_ext, &top_iph->saddr, extlen);
#else
memcpy(iph_ext, &top_iph->daddr, extlen);
@@ -434,7 +434,7 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
memcpy(top_iph, iph_base, IPV6HDR_BASELEN);
if (extlen) {
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
memcpy(&top_iph->saddr, iph_ext, extlen);
#else
memcpy(&top_iph->daddr, iph_ext, extlen);
@@ -472,7 +472,10 @@ static void ah6_input_done(struct crypto_async_request *base, int err)
skb->network_header += ah_hlen;
memcpy(skb_network_header(skb), work_iph, hdr_len);
__skb_pull(skb, ah_hlen + hdr_len);
- skb_set_transport_header(skb, -hdr_len);
+ if (x->props.mode == XFRM_MODE_TUNNEL)
+ skb_reset_transport_header(skb);
+ else
+ skb_set_transport_header(skb, -hdr_len);
out:
kfree(AH_SKB_CB(skb)->tmp);
xfrm_input_resume(skb, err);
@@ -518,8 +521,7 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
/* We are going to _remove_ AH header to keep sockets happy,
* so... Later this can change. */
- if (skb_cloned(skb) &&
- pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+ if (skb_unclone(skb, GFP_ATOMIC))
goto out;
skb->ip_summed = CHECKSUM_NONE;
@@ -593,9 +595,13 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
skb->network_header += ah_hlen;
memcpy(skb_network_header(skb), work_iph, hdr_len);
- skb->transport_header = skb->network_header;
__skb_pull(skb, ah_hlen + hdr_len);
+ if (x->props.mode == XFRM_MODE_TUNNEL)
+ skb_reset_transport_header(skb);
+ else
+ skb_set_transport_header(skb, -hdr_len);
+
err = nexthdr;
out_free:
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index cdf02be5f191..5a80f15a9de2 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -47,7 +47,7 @@
static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr);
/* Big ac list lock for all the sockets */
-static DEFINE_RWLOCK(ipv6_sk_ac_lock);
+static DEFINE_SPINLOCK(ipv6_sk_ac_lock);
/*
@@ -64,7 +64,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
int ishost = !net->ipv6.devconf_all->forwarding;
int err = 0;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EPERM;
if (ipv6_addr_is_multicast(addr))
return -EINVAL;
@@ -84,7 +84,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
rt = rt6_lookup(net, addr, NULL, 0, 0);
if (rt) {
dev = rt->dst.dev;
- dst_release(&rt->dst);
+ ip6_rt_put(rt);
} else if (ishost) {
err = -EADDRNOTAVAIL;
goto error;
@@ -128,10 +128,10 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
err = ipv6_dev_ac_inc(dev, addr);
if (!err) {
- write_lock_bh(&ipv6_sk_ac_lock);
+ spin_lock_bh(&ipv6_sk_ac_lock);
pac->acl_next = np->ipv6_ac_list;
np->ipv6_ac_list = pac;
- write_unlock_bh(&ipv6_sk_ac_lock);
+ spin_unlock_bh(&ipv6_sk_ac_lock);
pac = NULL;
}
@@ -152,7 +152,7 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
struct ipv6_ac_socklist *pac, *prev_pac;
struct net *net = sock_net(sk);
- write_lock_bh(&ipv6_sk_ac_lock);
+ spin_lock_bh(&ipv6_sk_ac_lock);
prev_pac = NULL;
for (pac = np->ipv6_ac_list; pac; pac = pac->acl_next) {
if ((ifindex == 0 || pac->acl_ifindex == ifindex) &&
@@ -161,7 +161,7 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
prev_pac = pac;
}
if (!pac) {
- write_unlock_bh(&ipv6_sk_ac_lock);
+ spin_unlock_bh(&ipv6_sk_ac_lock);
return -ENOENT;
}
if (prev_pac)
@@ -169,7 +169,7 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
else
np->ipv6_ac_list = pac->acl_next;
- write_unlock_bh(&ipv6_sk_ac_lock);
+ spin_unlock_bh(&ipv6_sk_ac_lock);
rcu_read_lock();
dev = dev_get_by_index_rcu(net, pac->acl_ifindex);
@@ -189,10 +189,13 @@ void ipv6_sock_ac_close(struct sock *sk)
struct net *net = sock_net(sk);
int prev_index;
- write_lock_bh(&ipv6_sk_ac_lock);
+ if (!np->ipv6_ac_list)
+ return;
+
+ spin_lock_bh(&ipv6_sk_ac_lock);
pac = np->ipv6_ac_list;
np->ipv6_ac_list = NULL;
- write_unlock_bh(&ipv6_sk_ac_lock);
+ spin_unlock_bh(&ipv6_sk_ac_lock);
prev_index = 0;
rcu_read_lock();
@@ -506,7 +509,7 @@ static const struct file_operations ac6_seq_fops = {
int __net_init ac6_proc_init(struct net *net)
{
- if (!proc_net_fops_create(net, "anycast6", S_IRUGO, &ac6_seq_fops))
+ if (!proc_create("anycast6", S_IRUGO, net->proc_net, &ac6_seq_fops))
return -ENOMEM;
return 0;
@@ -514,7 +517,7 @@ int __net_init ac6_proc_init(struct net *net)
void ac6_proc_exit(struct net *net)
{
- proc_net_remove(net, "anycast6");
+ remove_proc_entry("anycast6", net->proc_net);
}
#endif
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index be2b67d631e5..f5a54782a340 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -30,6 +30,7 @@
#include <net/transp_v6.h>
#include <net/ip6_route.h>
#include <net/tcp_states.h>
+#include <net/dsfield.h>
#include <linux/errqueue.h>
#include <asm/uaccess.h>
@@ -356,12 +357,11 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
sin->sin6_port = serr->port;
sin->sin6_scope_id = 0;
if (skb->protocol == htons(ETH_P_IPV6)) {
- sin->sin6_addr =
- *(struct in6_addr *)(nh + serr->addr_offset);
+ const struct ipv6hdr *ip6h = container_of((struct in6_addr *)(nh + serr->addr_offset),
+ struct ipv6hdr, daddr);
+ sin->sin6_addr = ip6h->daddr;
if (np->sndflow)
- sin->sin6_flowinfo =
- (*(__be32 *)(nh + serr->addr_offset - 24) &
- IPV6_FLOWINFO_MASK);
+ sin->sin6_flowinfo = ip6_flowinfo(ip6h);
if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL)
sin->sin6_scope_id = IP6CB(skb)->iif;
} else {
@@ -380,7 +380,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
if (skb->protocol == htons(ETH_P_IPV6)) {
sin->sin6_addr = ipv6_hdr(skb)->saddr;
if (np->rxopt.all)
- datagram_recv_ctl(sk, msg, skb);
+ ip6_datagram_recv_ctl(sk, msg, skb);
if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL)
sin->sin6_scope_id = IP6CB(skb)->iif;
} else {
@@ -468,7 +468,8 @@ out:
}
-int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
+int ip6_datagram_recv_ctl(struct sock *sk, struct msghdr *msg,
+ struct sk_buff *skb)
{
struct ipv6_pinfo *np = inet6_sk(sk);
struct inet6_skb_parm *opt = IP6CB(skb);
@@ -488,13 +489,14 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
}
if (np->rxopt.bits.rxtclass) {
- int tclass = ipv6_tclass(ipv6_hdr(skb));
+ int tclass = ipv6_get_dsfield(ipv6_hdr(skb));
put_cmsg(msg, SOL_IPV6, IPV6_TCLASS, sizeof(tclass), &tclass);
}
- if (np->rxopt.bits.rxflow && (*(__be32 *)nh & IPV6_FLOWINFO_MASK)) {
- __be32 flowinfo = *(__be32 *)nh & IPV6_FLOWINFO_MASK;
- put_cmsg(msg, SOL_IPV6, IPV6_FLOWINFO, sizeof(flowinfo), &flowinfo);
+ if (np->rxopt.bits.rxflow) {
+ __be32 flowinfo = ip6_flowinfo((struct ipv6hdr *)nh);
+ if (flowinfo)
+ put_cmsg(msg, SOL_IPV6, IPV6_FLOWINFO, sizeof(flowinfo), &flowinfo);
}
/* HbH is allowed only once */
@@ -597,11 +599,12 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
}
return 0;
}
+EXPORT_SYMBOL_GPL(ip6_datagram_recv_ctl);
-int datagram_send_ctl(struct net *net, struct sock *sk,
- struct msghdr *msg, struct flowi6 *fl6,
- struct ipv6_txoptions *opt,
- int *hlimit, int *tclass, int *dontfrag)
+int ip6_datagram_send_ctl(struct net *net, struct sock *sk,
+ struct msghdr *msg, struct flowi6 *fl6,
+ struct ipv6_txoptions *opt,
+ int *hlimit, int *tclass, int *dontfrag)
{
struct in6_pktinfo *src_info;
struct cmsghdr *cmsg;
@@ -701,7 +704,7 @@ int datagram_send_ctl(struct net *net, struct sock *sk,
err = -EINVAL;
goto exit_f;
}
- if (!capable(CAP_NET_RAW)) {
+ if (!ns_capable(net->user_ns, CAP_NET_RAW)) {
err = -EPERM;
goto exit_f;
}
@@ -721,7 +724,7 @@ int datagram_send_ctl(struct net *net, struct sock *sk,
err = -EINVAL;
goto exit_f;
}
- if (!capable(CAP_NET_RAW)) {
+ if (!ns_capable(net->user_ns, CAP_NET_RAW)) {
err = -EPERM;
goto exit_f;
}
@@ -746,7 +749,7 @@ int datagram_send_ctl(struct net *net, struct sock *sk,
err = -EINVAL;
goto exit_f;
}
- if (!capable(CAP_NET_RAW)) {
+ if (!ns_capable(net->user_ns, CAP_NET_RAW)) {
err = -EPERM;
goto exit_f;
}
@@ -769,7 +772,7 @@ int datagram_send_ctl(struct net *net, struct sock *sk,
rthdr = (struct ipv6_rt_hdr *)CMSG_DATA(cmsg);
switch (rthdr->type) {
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
case IPV6_SRCRT_TYPE_2:
if (rthdr->hdrlen != 2 ||
rthdr->segments_left != 1) {
@@ -871,4 +874,4 @@ int datagram_send_ctl(struct net *net, struct sock *sk,
exit_f:
return err;
}
-EXPORT_SYMBOL_GPL(datagram_send_ctl);
+EXPORT_SYMBOL_GPL(ip6_datagram_send_ctl);
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 282f3723ee19..40ffd72243a4 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -300,7 +300,10 @@ static int esp_input_done2(struct sk_buff *skb, int err)
pskb_trim(skb, skb->len - alen - padlen - 2);
__skb_pull(skb, hlen);
- skb_set_transport_header(skb, -hdr_len);
+ if (x->props.mode == XFRM_MODE_TUNNEL)
+ skb_reset_transport_header(skb);
+ else
+ skb_set_transport_header(skb, -hdr_len);
err = nexthdr[1];
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index fa3d9c328092..07a7d65a7cb6 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -43,56 +43,12 @@
#include <net/ndisc.h>
#include <net/ip6_route.h>
#include <net/addrconf.h>
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
#include <net/xfrm.h>
#endif
#include <asm/uaccess.h>
-int ipv6_find_tlv(struct sk_buff *skb, int offset, int type)
-{
- const unsigned char *nh = skb_network_header(skb);
- int packet_len = skb->tail - skb->network_header;
- struct ipv6_opt_hdr *hdr;
- int len;
-
- if (offset + 2 > packet_len)
- goto bad;
- hdr = (struct ipv6_opt_hdr *)(nh + offset);
- len = ((hdr->hdrlen + 1) << 3);
-
- if (offset + len > packet_len)
- goto bad;
-
- offset += 2;
- len -= 2;
-
- while (len > 0) {
- int opttype = nh[offset];
- int optlen;
-
- if (opttype == type)
- return offset;
-
- switch (opttype) {
- case IPV6_TLV_PAD1:
- optlen = 1;
- break;
- default:
- optlen = nh[offset + 1] + 2;
- if (optlen > len)
- goto bad;
- break;
- }
- offset += optlen;
- len -= optlen;
- }
- /* not_found */
- bad:
- return -1;
-}
-EXPORT_SYMBOL_GPL(ipv6_find_tlv);
-
/*
* Parsing tlv encoded headers.
*
@@ -224,7 +180,7 @@ bad:
Destination options header.
*****************************/
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
static bool ipv6_dest_hao(struct sk_buff *skb, int optoff)
{
struct ipv6_destopt_hao *hao;
@@ -288,7 +244,7 @@ static bool ipv6_dest_hao(struct sk_buff *skb, int optoff)
#endif
static const struct tlvtype_proc tlvprocdestopt_lst[] = {
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
{
.type = IPV6_TLV_HAO,
.func = ipv6_dest_hao,
@@ -300,7 +256,7 @@ static const struct tlvtype_proc tlvprocdestopt_lst[] = {
static int ipv6_destopt_rcv(struct sk_buff *skb)
{
struct inet6_skb_parm *opt = IP6CB(skb);
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
__u16 dstbuf;
#endif
struct dst_entry *dst = skb_dst(skb);
@@ -315,14 +271,14 @@ static int ipv6_destopt_rcv(struct sk_buff *skb)
}
opt->lastopt = opt->dst1 = skb_network_header_len(skb);
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
dstbuf = opt->dst1;
#endif
if (ip6_parse_tlv(tlvprocdestopt_lst, skb)) {
skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3;
opt = IP6CB(skb);
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
opt->nhoff = dstbuf;
#else
opt->nhoff = opt->dst1;
@@ -378,7 +334,7 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb)
looped_back:
if (hdr->segments_left == 0) {
switch (hdr->type) {
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
case IPV6_SRCRT_TYPE_2:
/* Silently discard type 2 header unless it was
* processed by own
@@ -404,7 +360,7 @@ looped_back:
}
switch (hdr->type) {
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
case IPV6_SRCRT_TYPE_2:
if (accept_source_route < 0)
goto unknown_rh;
@@ -461,7 +417,7 @@ looped_back:
addr += i - 1;
switch (hdr->type) {
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
case IPV6_SRCRT_TYPE_2:
if (xfrm6_input_addr(skb, (xfrm_address_t *)addr,
(xfrm_address_t *)&ipv6_hdr(skb)->saddr,
@@ -528,12 +484,12 @@ unknown_rh:
static const struct inet6_protocol rthdr_protocol = {
.handler = ipv6_rthdr_rcv,
- .flags = INET6_PROTO_NOPOLICY | INET6_PROTO_GSO_EXTHDR,
+ .flags = INET6_PROTO_NOPOLICY,
};
static const struct inet6_protocol destopt_protocol = {
.handler = ipv6_destopt_rcv,
- .flags = INET6_PROTO_NOPOLICY | INET6_PROTO_GSO_EXTHDR,
+ .flags = INET6_PROTO_NOPOLICY,
};
static const struct inet6_protocol nodata_protocol = {
@@ -559,10 +515,10 @@ int __init ipv6_exthdrs_init(void)
out:
return ret;
-out_rthdr:
- inet6_del_protocol(&rthdr_protocol, IPPROTO_ROUTING);
out_destopt:
inet6_del_protocol(&destopt_protocol, IPPROTO_DSTOPTS);
+out_rthdr:
+ inet6_del_protocol(&rthdr_protocol, IPPROTO_ROUTING);
goto out;
};
@@ -597,7 +553,8 @@ static bool ipv6_hop_ra(struct sk_buff *skb, int optoff)
const unsigned char *nh = skb_network_header(skb);
if (nh[optoff + 1] == 2) {
- IP6CB(skb)->ra = optoff;
+ IP6CB(skb)->flags |= IP6SKB_ROUTERALERT;
+ memcpy(&IP6CB(skb)->ra, nh + optoff + 2, sizeof(IP6CB(skb)->ra));
return true;
}
LIMIT_NETDEBUG(KERN_DEBUG "ipv6_hop_ra: wrong RA length %d\n",
diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c
index f73d59a14131..c5e83fae4df4 100644
--- a/net/ipv6/exthdrs_core.c
+++ b/net/ipv6/exthdrs_core.c
@@ -111,3 +111,171 @@ int ipv6_skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp,
return start;
}
EXPORT_SYMBOL(ipv6_skip_exthdr);
+
+int ipv6_find_tlv(struct sk_buff *skb, int offset, int type)
+{
+ const unsigned char *nh = skb_network_header(skb);
+ int packet_len = skb->tail - skb->network_header;
+ struct ipv6_opt_hdr *hdr;
+ int len;
+
+ if (offset + 2 > packet_len)
+ goto bad;
+ hdr = (struct ipv6_opt_hdr *)(nh + offset);
+ len = ((hdr->hdrlen + 1) << 3);
+
+ if (offset + len > packet_len)
+ goto bad;
+
+ offset += 2;
+ len -= 2;
+
+ while (len > 0) {
+ int opttype = nh[offset];
+ int optlen;
+
+ if (opttype == type)
+ return offset;
+
+ switch (opttype) {
+ case IPV6_TLV_PAD1:
+ optlen = 1;
+ break;
+ default:
+ optlen = nh[offset + 1] + 2;
+ if (optlen > len)
+ goto bad;
+ break;
+ }
+ offset += optlen;
+ len -= optlen;
+ }
+ /* not_found */
+ bad:
+ return -1;
+}
+EXPORT_SYMBOL_GPL(ipv6_find_tlv);
+
+/*
+ * find the offset to specified header or the protocol number of last header
+ * if target < 0. "last header" is transport protocol header, ESP, or
+ * "No next header".
+ *
+ * Note that *offset is used as input/output parameter. an if it is not zero,
+ * then it must be a valid offset to an inner IPv6 header. This can be used
+ * to explore inner IPv6 header, eg. ICMPv6 error messages.
+ *
+ * If target header is found, its offset is set in *offset and return protocol
+ * number. Otherwise, return -1.
+ *
+ * If the first fragment doesn't contain the final protocol header or
+ * NEXTHDR_NONE it is considered invalid.
+ *
+ * Note that non-1st fragment is special case that "the protocol number
+ * of last header" is "next header" field in Fragment header. In this case,
+ * *offset is meaningless and fragment offset is stored in *fragoff if fragoff
+ * isn't NULL.
+ *
+ * if flags is not NULL and it's a fragment, then the frag flag
+ * IP6_FH_F_FRAG will be set. If it's an AH header, the
+ * IP6_FH_F_AUTH flag is set and target < 0, then this function will
+ * stop at the AH header. If IP6_FH_F_SKIP_RH flag was passed, then this
+ * function will skip all those routing headers, where segements_left was 0.
+ */
+int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
+ int target, unsigned short *fragoff, int *flags)
+{
+ unsigned int start = skb_network_offset(skb) + sizeof(struct ipv6hdr);
+ u8 nexthdr = ipv6_hdr(skb)->nexthdr;
+ unsigned int len;
+ bool found;
+
+ if (fragoff)
+ *fragoff = 0;
+
+ if (*offset) {
+ struct ipv6hdr _ip6, *ip6;
+
+ ip6 = skb_header_pointer(skb, *offset, sizeof(_ip6), &_ip6);
+ if (!ip6 || (ip6->version != 6)) {
+ printk(KERN_ERR "IPv6 header not found\n");
+ return -EBADMSG;
+ }
+ start = *offset + sizeof(struct ipv6hdr);
+ nexthdr = ip6->nexthdr;
+ }
+ len = skb->len - start;
+
+ do {
+ struct ipv6_opt_hdr _hdr, *hp;
+ unsigned int hdrlen;
+ found = (nexthdr == target);
+
+ if ((!ipv6_ext_hdr(nexthdr)) || nexthdr == NEXTHDR_NONE) {
+ if (target < 0)
+ break;
+ return -ENOENT;
+ }
+
+ hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr);
+ if (hp == NULL)
+ return -EBADMSG;
+
+ if (nexthdr == NEXTHDR_ROUTING) {
+ struct ipv6_rt_hdr _rh, *rh;
+
+ rh = skb_header_pointer(skb, start, sizeof(_rh),
+ &_rh);
+ if (rh == NULL)
+ return -EBADMSG;
+
+ if (flags && (*flags & IP6_FH_F_SKIP_RH) &&
+ rh->segments_left == 0)
+ found = false;
+ }
+
+ if (nexthdr == NEXTHDR_FRAGMENT) {
+ unsigned short _frag_off;
+ __be16 *fp;
+
+ if (flags) /* Indicate that this is a fragment */
+ *flags |= IP6_FH_F_FRAG;
+ fp = skb_header_pointer(skb,
+ start+offsetof(struct frag_hdr,
+ frag_off),
+ sizeof(_frag_off),
+ &_frag_off);
+ if (fp == NULL)
+ return -EBADMSG;
+
+ _frag_off = ntohs(*fp) & ~0x7;
+ if (_frag_off) {
+ if (target < 0 &&
+ ((!ipv6_ext_hdr(hp->nexthdr)) ||
+ hp->nexthdr == NEXTHDR_NONE)) {
+ if (fragoff)
+ *fragoff = _frag_off;
+ return hp->nexthdr;
+ }
+ return -ENOENT;
+ }
+ hdrlen = 8;
+ } else if (nexthdr == NEXTHDR_AUTH) {
+ if (flags && (*flags & IP6_FH_F_AUTH) && (target < 0))
+ break;
+ hdrlen = (hp->hdrlen + 2) << 2;
+ } else
+ hdrlen = ipv6_optlen(hp);
+
+ if (!found) {
+ nexthdr = hp->nexthdr;
+ len -= hdrlen;
+ start += hdrlen;
+ }
+ } while (!found);
+
+ *offset = start;
+ return nexthdr;
+}
+EXPORT_SYMBOL(ipv6_find_hdr);
+
diff --git a/net/ipv6/exthdrs_offload.c b/net/ipv6/exthdrs_offload.c
new file mode 100644
index 000000000000..cf77f3abfd06
--- /dev/null
+++ b/net/ipv6/exthdrs_offload.c
@@ -0,0 +1,41 @@
+/*
+ * IPV6 GSO/GRO offload support
+ * Linux INET6 implementation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * IPV6 Extension Header GSO/GRO support
+ */
+#include <net/protocol.h>
+#include "ip6_offload.h"
+
+static const struct net_offload rthdr_offload = {
+ .flags = INET6_PROTO_GSO_EXTHDR,
+};
+
+static const struct net_offload dstopt_offload = {
+ .flags = INET6_PROTO_GSO_EXTHDR,
+};
+
+int __init ipv6_exthdrs_offload_init(void)
+{
+ int ret;
+
+ ret = inet6_add_offload(&rthdr_offload, IPPROTO_ROUTING);
+ if (!ret)
+ goto out;
+
+ ret = inet6_add_offload(&dstopt_offload, IPPROTO_DSTOPTS);
+ if (!ret)
+ goto out_rt;
+
+out:
+ return ret;
+
+out_rt:
+ inet_del_offload(&rthdr_offload, IPPROTO_ROUTING);
+ goto out;
+}
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index d9fb9110f607..2e1a432867c0 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -100,7 +100,7 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
goto out;
}
again:
- dst_release(&rt->dst);
+ ip6_rt_put(rt);
rt = NULL;
goto out;
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 24d69dbca4d6..fff5bdd8b680 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -81,10 +81,22 @@ static inline struct sock *icmpv6_sk(struct net *net)
return net->ipv6.icmp_sk[smp_processor_id()];
}
+static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ u8 type, u8 code, int offset, __be32 info)
+{
+ struct net *net = dev_net(skb->dev);
+
+ if (type == ICMPV6_PKT_TOOBIG)
+ ip6_update_pmtu(skb, net, info, 0, 0);
+ else if (type == NDISC_REDIRECT)
+ ip6_redirect(skb, net, 0, 0);
+}
+
static int icmpv6_rcv(struct sk_buff *skb);
static const struct inet6_protocol icmpv6_protocol = {
.handler = icmpv6_rcv,
+ .err_handler = icmpv6_err,
.flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
};
@@ -280,7 +292,7 @@ static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, st
return 0;
}
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
static void mip6_addr_swap(struct sk_buff *skb)
{
struct ipv6hdr *iph = ipv6_hdr(skb);
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index c4f934176cab..9bfab19ff3c0 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -31,25 +31,33 @@ int inet6_csk_bind_conflict(const struct sock *sk,
const struct inet_bind_bucket *tb, bool relax)
{
const struct sock *sk2;
- const struct hlist_node *node;
+ int reuse = sk->sk_reuse;
+ int reuseport = sk->sk_reuseport;
+ kuid_t uid = sock_i_uid((struct sock *)sk);
/* We must walk the whole port owner list in this case. -DaveM */
/*
* See comment in inet_csk_bind_conflict about sock lookup
* vs net namespaces issues.
*/
- sk_for_each_bound(sk2, node, &tb->owners) {
+ sk_for_each_bound(sk2, &tb->owners) {
if (sk != sk2 &&
(!sk->sk_bound_dev_if ||
!sk2->sk_bound_dev_if ||
- sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
- (!sk->sk_reuse || !sk2->sk_reuse ||
- sk2->sk_state == TCP_LISTEN) &&
- ipv6_rcv_saddr_equal(sk, sk2))
- break;
+ sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) {
+ if ((!reuse || !sk2->sk_reuse ||
+ sk2->sk_state == TCP_LISTEN) &&
+ (!reuseport || !sk2->sk_reuseport ||
+ (sk2->sk_state != TCP_TIME_WAIT &&
+ !uid_eq(uid,
+ sock_i_uid((struct sock *)sk2))))) {
+ if (ipv6_rcv_saddr_equal(sk, sk2))
+ break;
+ }
+ }
}
- return node != NULL;
+ return sk2 != NULL;
}
EXPORT_SYMBOL_GPL(inet6_csk_bind_conflict);
@@ -252,6 +260,7 @@ struct dst_entry *inet6_csk_update_pmtu(struct sock *sk, u32 mtu)
return NULL;
dst->ops->update_pmtu(dst, sk, NULL, mtu);
- return inet6_csk_route_socket(sk, &fl6);
+ dst = inet6_csk_route_socket(sk, &fl6);
+ return IS_ERR(dst) ? NULL : dst;
}
EXPORT_SYMBOL_GPL(inet6_csk_update_pmtu);
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 73f1a00a96af..32b4a1675d82 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -87,11 +87,13 @@ struct sock *__inet6_lookup_established(struct net *net,
rcu_read_lock();
begin:
sk_nulls_for_each_rcu(sk, node, &head->chain) {
- /* For IPV6 do the cheaper port and family tests first. */
- if (INET6_MATCH(sk, net, hash, saddr, daddr, ports, dif)) {
+ if (sk->sk_hash != hash)
+ continue;
+ if (likely(INET6_MATCH(sk, net, saddr, daddr, ports, dif))) {
if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt)))
goto begintw;
- if (!INET6_MATCH(sk, net, hash, saddr, daddr, ports, dif)) {
+ if (unlikely(!INET6_MATCH(sk, net, saddr, daddr,
+ ports, dif))) {
sock_put(sk);
goto begin;
}
@@ -104,12 +106,16 @@ begin:
begintw:
/* Must check for a TIME_WAIT'er before going to listener hash. */
sk_nulls_for_each_rcu(sk, node, &head->twchain) {
- if (INET6_TW_MATCH(sk, net, hash, saddr, daddr, ports, dif)) {
+ if (sk->sk_hash != hash)
+ continue;
+ if (likely(INET6_TW_MATCH(sk, net, saddr, daddr,
+ ports, dif))) {
if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) {
sk = NULL;
goto out;
}
- if (!INET6_TW_MATCH(sk, net, hash, saddr, daddr, ports, dif)) {
+ if (unlikely(!INET6_TW_MATCH(sk, net, saddr, daddr,
+ ports, dif))) {
sock_put(sk);
goto begintw;
}
@@ -152,25 +158,38 @@ static inline int compute_score(struct sock *sk, struct net *net,
}
struct sock *inet6_lookup_listener(struct net *net,
- struct inet_hashinfo *hashinfo, const struct in6_addr *daddr,
+ struct inet_hashinfo *hashinfo, const struct in6_addr *saddr,
+ const __be16 sport, const struct in6_addr *daddr,
const unsigned short hnum, const int dif)
{
struct sock *sk;
const struct hlist_nulls_node *node;
struct sock *result;
- int score, hiscore;
+ int score, hiscore, matches = 0, reuseport = 0;
+ u32 phash = 0;
unsigned int hash = inet_lhashfn(net, hnum);
struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
rcu_read_lock();
begin:
result = NULL;
- hiscore = -1;
+ hiscore = 0;
sk_nulls_for_each(sk, node, &ilb->head) {
score = compute_score(sk, net, hnum, daddr, dif);
if (score > hiscore) {
hiscore = score;
result = sk;
+ reuseport = sk->sk_reuseport;
+ if (reuseport) {
+ phash = inet6_ehashfn(net, daddr, hnum,
+ saddr, sport);
+ matches = 1;
+ }
+ } else if (score == hiscore && reuseport) {
+ matches++;
+ if (((u64)phash * matches) >> 32 == 0)
+ result = sk;
+ phash = next_pseudo_random32(phash);
}
}
/*
@@ -236,9 +255,12 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
/* Check TIME-WAIT sockets first. */
sk_nulls_for_each(sk2, node, &head->twchain) {
- tw = inet_twsk(sk2);
+ if (sk2->sk_hash != hash)
+ continue;
- if (INET6_TW_MATCH(sk2, net, hash, saddr, daddr, ports, dif)) {
+ if (likely(INET6_TW_MATCH(sk2, net, saddr, daddr,
+ ports, dif))) {
+ tw = inet_twsk(sk2);
if (twsk_unique(sk, sk2, twp))
goto unique;
else
@@ -249,7 +271,9 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
/* And established part... */
sk_nulls_for_each(sk2, node, &head->chain) {
- if (INET6_MATCH(sk2, net, hash, saddr, daddr, ports, dif))
+ if (sk2->sk_hash != hash)
+ continue;
+ if (likely(INET6_MATCH(sk2, net, saddr, daddr, ports, dif)))
goto not_unique;
}
diff --git a/net/ipv6/ip6_checksum.c b/net/ipv6/ip6_checksum.c
new file mode 100644
index 000000000000..72d198b8e4d2
--- /dev/null
+++ b/net/ipv6/ip6_checksum.c
@@ -0,0 +1,97 @@
+#include <net/ip.h>
+#include <net/udp.h>
+#include <net/udplite.h>
+#include <asm/checksum.h>
+
+#ifndef _HAVE_ARCH_IPV6_CSUM
+__sum16 csum_ipv6_magic(const struct in6_addr *saddr,
+ const struct in6_addr *daddr,
+ __u32 len, unsigned short proto,
+ __wsum csum)
+{
+
+ int carry;
+ __u32 ulen;
+ __u32 uproto;
+ __u32 sum = (__force u32)csum;
+
+ sum += (__force u32)saddr->s6_addr32[0];
+ carry = (sum < (__force u32)saddr->s6_addr32[0]);
+ sum += carry;
+
+ sum += (__force u32)saddr->s6_addr32[1];
+ carry = (sum < (__force u32)saddr->s6_addr32[1]);
+ sum += carry;
+
+ sum += (__force u32)saddr->s6_addr32[2];
+ carry = (sum < (__force u32)saddr->s6_addr32[2]);
+ sum += carry;
+
+ sum += (__force u32)saddr->s6_addr32[3];
+ carry = (sum < (__force u32)saddr->s6_addr32[3]);
+ sum += carry;
+
+ sum += (__force u32)daddr->s6_addr32[0];
+ carry = (sum < (__force u32)daddr->s6_addr32[0]);
+ sum += carry;
+
+ sum += (__force u32)daddr->s6_addr32[1];
+ carry = (sum < (__force u32)daddr->s6_addr32[1]);
+ sum += carry;
+
+ sum += (__force u32)daddr->s6_addr32[2];
+ carry = (sum < (__force u32)daddr->s6_addr32[2]);
+ sum += carry;
+
+ sum += (__force u32)daddr->s6_addr32[3];
+ carry = (sum < (__force u32)daddr->s6_addr32[3]);
+ sum += carry;
+
+ ulen = (__force u32)htonl((__u32) len);
+ sum += ulen;
+ carry = (sum < ulen);
+ sum += carry;
+
+ uproto = (__force u32)htonl(proto);
+ sum += uproto;
+ carry = (sum < uproto);
+ sum += carry;
+
+ return csum_fold((__force __wsum)sum);
+}
+EXPORT_SYMBOL(csum_ipv6_magic);
+#endif
+
+int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, int proto)
+{
+ int err;
+
+ UDP_SKB_CB(skb)->partial_cov = 0;
+ UDP_SKB_CB(skb)->cscov = skb->len;
+
+ if (proto == IPPROTO_UDPLITE) {
+ err = udplite_checksum_init(skb, uh);
+ if (err)
+ return err;
+ }
+
+ if (uh->check == 0) {
+ /* RFC 2460 section 8.1 says that we SHOULD log
+ this error. Well, it is reasonable.
+ */
+ LIMIT_NETDEBUG(KERN_INFO "IPv6: udp checksum is 0\n");
+ return 1;
+ }
+ if (skb->ip_summed == CHECKSUM_COMPLETE &&
+ !csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
+ skb->len, proto, skb->csum))
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+ if (!skb_csum_unnecessary(skb))
+ skb->csum = ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
+ &ipv6_hdr(skb)->daddr,
+ skb->len, proto, 0));
+
+ return 0;
+}
+EXPORT_SYMBOL(udp6_csum_init);
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 24995a93ef8c..192dd1a0e188 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -224,7 +224,6 @@ struct fib6_table *fib6_get_table(struct net *net, u32 id)
{
struct fib6_table *tb;
struct hlist_head *head;
- struct hlist_node *node;
unsigned int h;
if (id == 0)
@@ -232,7 +231,7 @@ struct fib6_table *fib6_get_table(struct net *net, u32 id)
h = id & (FIB6_TABLE_HASHSZ - 1);
rcu_read_lock();
head = &net->ipv6.fib_table_hash[h];
- hlist_for_each_entry_rcu(tb, node, head, tb6_hlist) {
+ hlist_for_each_entry_rcu(tb, head, tb6_hlist) {
if (tb->tb6_id == id) {
rcu_read_unlock();
return tb;
@@ -363,7 +362,6 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
struct rt6_rtnl_dump_arg arg;
struct fib6_walker_t *w;
struct fib6_table *tb;
- struct hlist_node *node;
struct hlist_head *head;
int res = 0;
@@ -398,7 +396,7 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
for (h = s_h; h < FIB6_TABLE_HASHSZ; h++, s_e = 0) {
e = 0;
head = &net->ipv6.fib_table_hash[h];
- hlist_for_each_entry_rcu(tb, node, head, tb6_hlist) {
+ hlist_for_each_entry_rcu(tb, head, tb6_hlist) {
if (e < s_e)
goto next;
res = fib6_dump_table(tb, skb, cb);
@@ -672,6 +670,8 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
iter->rt6i_idev == rt->rt6i_idev &&
ipv6_addr_equal(&iter->rt6i_gateway,
&rt->rt6i_gateway)) {
+ if (rt->rt6i_nsiblings)
+ rt->rt6i_nsiblings = 0;
if (!(iter->rt6i_flags & RTF_EXPIRES))
return -EEXIST;
if (!(rt->rt6i_flags & RTF_EXPIRES))
@@ -680,6 +680,21 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
rt6_set_expires(iter, rt->dst.expires);
return -EEXIST;
}
+ /* If we have the same destination and the same metric,
+ * but not the same gateway, then the route we try to
+ * add is sibling to this route, increment our counter
+ * of siblings, and later we will add our route to the
+ * list.
+ * Only static routes (which don't have flag
+ * RTF_EXPIRES) are used for ECMPv6.
+ *
+ * To avoid long list, we only had siblings if the
+ * route have a gateway.
+ */
+ if (rt->rt6i_flags & RTF_GATEWAY &&
+ !(rt->rt6i_flags & RTF_EXPIRES) &&
+ !(iter->rt6i_flags & RTF_EXPIRES))
+ rt->rt6i_nsiblings++;
}
if (iter->rt6i_metric > rt->rt6i_metric)
@@ -692,6 +707,35 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
if (ins == &fn->leaf)
fn->rr_ptr = NULL;
+ /* Link this route to others same route. */
+ if (rt->rt6i_nsiblings) {
+ unsigned int rt6i_nsiblings;
+ struct rt6_info *sibling, *temp_sibling;
+
+ /* Find the first route that have the same metric */
+ sibling = fn->leaf;
+ while (sibling) {
+ if (sibling->rt6i_metric == rt->rt6i_metric) {
+ list_add_tail(&rt->rt6i_siblings,
+ &sibling->rt6i_siblings);
+ break;
+ }
+ sibling = sibling->dst.rt6_next;
+ }
+ /* For each sibling in the list, increment the counter of
+ * siblings. BUG() if counters does not match, list of siblings
+ * is broken!
+ */
+ rt6i_nsiblings = 0;
+ list_for_each_entry_safe(sibling, temp_sibling,
+ &rt->rt6i_siblings, rt6i_siblings) {
+ sibling->rt6i_nsiblings++;
+ BUG_ON(sibling->rt6i_nsiblings != rt->rt6i_nsiblings);
+ rt6i_nsiblings++;
+ }
+ BUG_ON(rt6i_nsiblings != rt->rt6i_nsiblings);
+ }
+
/*
* insert node
*/
@@ -1193,6 +1237,17 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
if (fn->rr_ptr == rt)
fn->rr_ptr = NULL;
+ /* Remove this entry from other siblings */
+ if (rt->rt6i_nsiblings) {
+ struct rt6_info *sibling, *next_sibling;
+
+ list_for_each_entry_safe(sibling, next_sibling,
+ &rt->rt6i_siblings, rt6i_siblings)
+ sibling->rt6i_nsiblings--;
+ rt->rt6i_nsiblings = 0;
+ list_del_init(&rt->rt6i_siblings);
+ }
+
/* Adjust walkers */
read_lock(&fib6_walker_lock);
FOR_WALKERS(w) {
@@ -1463,14 +1518,13 @@ void fib6_clean_all_ro(struct net *net, int (*func)(struct rt6_info *, void *arg
int prune, void *arg)
{
struct fib6_table *table;
- struct hlist_node *node;
struct hlist_head *head;
unsigned int h;
rcu_read_lock();
for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
head = &net->ipv6.fib_table_hash[h];
- hlist_for_each_entry_rcu(table, node, head, tb6_hlist) {
+ hlist_for_each_entry_rcu(table, head, tb6_hlist) {
read_lock_bh(&table->tb6_lock);
fib6_clean_tree(net, &table->tb6_root,
func, prune, arg);
@@ -1483,14 +1537,13 @@ void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *arg),
int prune, void *arg)
{
struct fib6_table *table;
- struct hlist_node *node;
struct hlist_head *head;
unsigned int h;
rcu_read_lock();
for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
head = &net->ipv6.fib_table_hash[h];
- hlist_for_each_entry_rcu(table, node, head, tb6_hlist) {
+ hlist_for_each_entry_rcu(table, head, tb6_hlist) {
write_lock_bh(&table->tb6_lock);
fib6_clean_tree(net, &table->tb6_root,
func, prune, arg);
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index 90bbefb57943..b973ed3d06cf 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -51,25 +51,38 @@
#define FL_HASH(l) (ntohl(l)&FL_HASH_MASK)
static atomic_t fl_size = ATOMIC_INIT(0);
-static struct ip6_flowlabel *fl_ht[FL_HASH_MASK+1];
+static struct ip6_flowlabel __rcu *fl_ht[FL_HASH_MASK+1];
static void ip6_fl_gc(unsigned long dummy);
static DEFINE_TIMER(ip6_fl_gc_timer, ip6_fl_gc, 0, 0);
/* FL hash table lock: it protects only of GC */
-static DEFINE_RWLOCK(ip6_fl_lock);
+static DEFINE_SPINLOCK(ip6_fl_lock);
/* Big socket sock */
-static DEFINE_RWLOCK(ip6_sk_fl_lock);
+static DEFINE_SPINLOCK(ip6_sk_fl_lock);
+#define for_each_fl_rcu(hash, fl) \
+ for (fl = rcu_dereference_bh(fl_ht[(hash)]); \
+ fl != NULL; \
+ fl = rcu_dereference_bh(fl->next))
+#define for_each_fl_continue_rcu(fl) \
+ for (fl = rcu_dereference_bh(fl->next); \
+ fl != NULL; \
+ fl = rcu_dereference_bh(fl->next))
+
+#define for_each_sk_fl_rcu(np, sfl) \
+ for (sfl = rcu_dereference_bh(np->ipv6_fl_list); \
+ sfl != NULL; \
+ sfl = rcu_dereference_bh(sfl->next))
static inline struct ip6_flowlabel *__fl_lookup(struct net *net, __be32 label)
{
struct ip6_flowlabel *fl;
- for (fl=fl_ht[FL_HASH(label)]; fl; fl = fl->next) {
+ for_each_fl_rcu(FL_HASH(label), fl) {
if (fl->label == label && net_eq(fl->fl_net, net))
return fl;
}
@@ -80,11 +93,11 @@ static struct ip6_flowlabel *fl_lookup(struct net *net, __be32 label)
{
struct ip6_flowlabel *fl;
- read_lock_bh(&ip6_fl_lock);
+ rcu_read_lock_bh();
fl = __fl_lookup(net, label);
- if (fl)
- atomic_inc(&fl->users);
- read_unlock_bh(&ip6_fl_lock);
+ if (fl && !atomic_inc_not_zero(&fl->users))
+ fl = NULL;
+ rcu_read_unlock_bh();
return fl;
}
@@ -96,13 +109,13 @@ static void fl_free(struct ip6_flowlabel *fl)
put_pid(fl->owner.pid);
release_net(fl->fl_net);
kfree(fl->opt);
+ kfree_rcu(fl, rcu);
}
- kfree(fl);
}
static void fl_release(struct ip6_flowlabel *fl)
{
- write_lock_bh(&ip6_fl_lock);
+ spin_lock_bh(&ip6_fl_lock);
fl->lastuse = jiffies;
if (atomic_dec_and_test(&fl->users)) {
@@ -119,7 +132,7 @@ static void fl_release(struct ip6_flowlabel *fl)
time_after(ip6_fl_gc_timer.expires, ttd))
mod_timer(&ip6_fl_gc_timer, ttd);
}
- write_unlock_bh(&ip6_fl_lock);
+ spin_unlock_bh(&ip6_fl_lock);
}
static void ip6_fl_gc(unsigned long dummy)
@@ -128,12 +141,13 @@ static void ip6_fl_gc(unsigned long dummy)
unsigned long now = jiffies;
unsigned long sched = 0;
- write_lock(&ip6_fl_lock);
+ spin_lock(&ip6_fl_lock);
for (i=0; i<=FL_HASH_MASK; i++) {
struct ip6_flowlabel *fl, **flp;
flp = &fl_ht[i];
- while ((fl=*flp) != NULL) {
+ while ((fl = rcu_dereference_protected(*flp,
+ lockdep_is_held(&ip6_fl_lock))) != NULL) {
if (atomic_read(&fl->users) == 0) {
unsigned long ttd = fl->lastuse + fl->linger;
if (time_after(ttd, fl->expires))
@@ -156,18 +170,19 @@ static void ip6_fl_gc(unsigned long dummy)
if (sched) {
mod_timer(&ip6_fl_gc_timer, sched);
}
- write_unlock(&ip6_fl_lock);
+ spin_unlock(&ip6_fl_lock);
}
static void __net_exit ip6_fl_purge(struct net *net)
{
int i;
- write_lock(&ip6_fl_lock);
+ spin_lock(&ip6_fl_lock);
for (i = 0; i <= FL_HASH_MASK; i++) {
struct ip6_flowlabel *fl, **flp;
flp = &fl_ht[i];
- while ((fl = *flp) != NULL) {
+ while ((fl = rcu_dereference_protected(*flp,
+ lockdep_is_held(&ip6_fl_lock))) != NULL) {
if (net_eq(fl->fl_net, net) &&
atomic_read(&fl->users) == 0) {
*flp = fl->next;
@@ -178,7 +193,7 @@ static void __net_exit ip6_fl_purge(struct net *net)
flp = &fl->next;
}
}
- write_unlock(&ip6_fl_lock);
+ spin_unlock(&ip6_fl_lock);
}
static struct ip6_flowlabel *fl_intern(struct net *net,
@@ -188,7 +203,7 @@ static struct ip6_flowlabel *fl_intern(struct net *net,
fl->label = label & IPV6_FLOWLABEL_MASK;
- write_lock_bh(&ip6_fl_lock);
+ spin_lock_bh(&ip6_fl_lock);
if (label == 0) {
for (;;) {
fl->label = htonl(net_random())&IPV6_FLOWLABEL_MASK;
@@ -210,16 +225,16 @@ static struct ip6_flowlabel *fl_intern(struct net *net,
lfl = __fl_lookup(net, fl->label);
if (lfl != NULL) {
atomic_inc(&lfl->users);
- write_unlock_bh(&ip6_fl_lock);
+ spin_unlock_bh(&ip6_fl_lock);
return lfl;
}
}
fl->lastuse = jiffies;
fl->next = fl_ht[FL_HASH(fl->label)];
- fl_ht[FL_HASH(fl->label)] = fl;
+ rcu_assign_pointer(fl_ht[FL_HASH(fl->label)], fl);
atomic_inc(&fl_size);
- write_unlock_bh(&ip6_fl_lock);
+ spin_unlock_bh(&ip6_fl_lock);
return NULL;
}
@@ -234,17 +249,17 @@ struct ip6_flowlabel * fl6_sock_lookup(struct sock *sk, __be32 label)
label &= IPV6_FLOWLABEL_MASK;
- read_lock_bh(&ip6_sk_fl_lock);
- for (sfl=np->ipv6_fl_list; sfl; sfl = sfl->next) {
+ rcu_read_lock_bh();
+ for_each_sk_fl_rcu(np, sfl) {
struct ip6_flowlabel *fl = sfl->fl;
if (fl->label == label) {
fl->lastuse = jiffies;
atomic_inc(&fl->users);
- read_unlock_bh(&ip6_sk_fl_lock);
+ rcu_read_unlock_bh();
return fl;
}
}
- read_unlock_bh(&ip6_sk_fl_lock);
+ rcu_read_unlock_bh();
return NULL;
}
@@ -255,11 +270,21 @@ void fl6_free_socklist(struct sock *sk)
struct ipv6_pinfo *np = inet6_sk(sk);
struct ipv6_fl_socklist *sfl;
- while ((sfl = np->ipv6_fl_list) != NULL) {
+ if (!rcu_access_pointer(np->ipv6_fl_list))
+ return;
+
+ spin_lock_bh(&ip6_sk_fl_lock);
+ while ((sfl = rcu_dereference_protected(np->ipv6_fl_list,
+ lockdep_is_held(&ip6_sk_fl_lock))) != NULL) {
np->ipv6_fl_list = sfl->next;
+ spin_unlock_bh(&ip6_sk_fl_lock);
+
fl_release(sfl->fl);
- kfree(sfl);
+ kfree_rcu(sfl, rcu);
+
+ spin_lock_bh(&ip6_sk_fl_lock);
}
+ spin_unlock_bh(&ip6_sk_fl_lock);
}
/* Service routines */
@@ -365,8 +390,8 @@ fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq,
msg.msg_control = (void*)(fl->opt+1);
memset(&flowi6, 0, sizeof(flowi6));
- err = datagram_send_ctl(net, sk, &msg, &flowi6, fl->opt, &junk,
- &junk, &junk);
+ err = ip6_datagram_send_ctl(net, sk, &msg, &flowi6, fl->opt,
+ &junk, &junk, &junk);
if (err)
goto done;
err = -EINVAL;
@@ -424,7 +449,7 @@ static int mem_check(struct sock *sk)
if (room > FL_MAX_SIZE - FL_MAX_PER_SOCK)
return 0;
- for (sfl = np->ipv6_fl_list; sfl; sfl = sfl->next)
+ for_each_sk_fl_rcu(np, sfl)
count++;
if (room <= 0 ||
@@ -467,11 +492,11 @@ static bool ipv6_opt_cmp(struct ipv6_txoptions *o1, struct ipv6_txoptions *o2)
static inline void fl_link(struct ipv6_pinfo *np, struct ipv6_fl_socklist *sfl,
struct ip6_flowlabel *fl)
{
- write_lock_bh(&ip6_sk_fl_lock);
+ spin_lock_bh(&ip6_sk_fl_lock);
sfl->fl = fl;
sfl->next = np->ipv6_fl_list;
- np->ipv6_fl_list = sfl;
- write_unlock_bh(&ip6_sk_fl_lock);
+ rcu_assign_pointer(np->ipv6_fl_list, sfl);
+ spin_unlock_bh(&ip6_sk_fl_lock);
}
int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
@@ -493,33 +518,36 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
switch (freq.flr_action) {
case IPV6_FL_A_PUT:
- write_lock_bh(&ip6_sk_fl_lock);
- for (sflp = &np->ipv6_fl_list; (sfl=*sflp)!=NULL; sflp = &sfl->next) {
+ spin_lock_bh(&ip6_sk_fl_lock);
+ for (sflp = &np->ipv6_fl_list;
+ (sfl = rcu_dereference(*sflp))!=NULL;
+ sflp = &sfl->next) {
if (sfl->fl->label == freq.flr_label) {
if (freq.flr_label == (np->flow_label&IPV6_FLOWLABEL_MASK))
np->flow_label &= ~IPV6_FLOWLABEL_MASK;
- *sflp = sfl->next;
- write_unlock_bh(&ip6_sk_fl_lock);
+ *sflp = rcu_dereference(sfl->next);
+ spin_unlock_bh(&ip6_sk_fl_lock);
fl_release(sfl->fl);
- kfree(sfl);
+ kfree_rcu(sfl, rcu);
return 0;
}
}
- write_unlock_bh(&ip6_sk_fl_lock);
+ spin_unlock_bh(&ip6_sk_fl_lock);
return -ESRCH;
case IPV6_FL_A_RENEW:
- read_lock_bh(&ip6_sk_fl_lock);
- for (sfl = np->ipv6_fl_list; sfl; sfl = sfl->next) {
+ rcu_read_lock_bh();
+ for_each_sk_fl_rcu(np, sfl) {
if (sfl->fl->label == freq.flr_label) {
err = fl6_renew(sfl->fl, freq.flr_linger, freq.flr_expires);
- read_unlock_bh(&ip6_sk_fl_lock);
+ rcu_read_unlock_bh();
return err;
}
}
- read_unlock_bh(&ip6_sk_fl_lock);
+ rcu_read_unlock_bh();
- if (freq.flr_share == IPV6_FL_S_NONE && capable(CAP_NET_ADMIN)) {
+ if (freq.flr_share == IPV6_FL_S_NONE &&
+ ns_capable(net->user_ns, CAP_NET_ADMIN)) {
fl = fl_lookup(net, freq.flr_label);
if (fl) {
err = fl6_renew(fl, freq.flr_linger, freq.flr_expires);
@@ -540,11 +568,11 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
if (freq.flr_label) {
err = -EEXIST;
- read_lock_bh(&ip6_sk_fl_lock);
- for (sfl = np->ipv6_fl_list; sfl; sfl = sfl->next) {
+ rcu_read_lock_bh();
+ for_each_sk_fl_rcu(np, sfl) {
if (sfl->fl->label == freq.flr_label) {
if (freq.flr_flags&IPV6_FL_F_EXCL) {
- read_unlock_bh(&ip6_sk_fl_lock);
+ rcu_read_unlock_bh();
goto done;
}
fl1 = sfl->fl;
@@ -552,7 +580,7 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
break;
}
}
- read_unlock_bh(&ip6_sk_fl_lock);
+ rcu_read_unlock_bh();
if (fl1 == NULL)
fl1 = fl_lookup(net, freq.flr_label);
@@ -640,13 +668,13 @@ static struct ip6_flowlabel *ip6fl_get_first(struct seq_file *seq)
struct net *net = seq_file_net(seq);
for (state->bucket = 0; state->bucket <= FL_HASH_MASK; ++state->bucket) {
- fl = fl_ht[state->bucket];
-
- while (fl && !net_eq(fl->fl_net, net))
- fl = fl->next;
- if (fl)
- break;
+ for_each_fl_rcu(state->bucket, fl) {
+ if (net_eq(fl->fl_net, net))
+ goto out;
+ }
}
+ fl = NULL;
+out:
return fl;
}
@@ -655,18 +683,22 @@ static struct ip6_flowlabel *ip6fl_get_next(struct seq_file *seq, struct ip6_flo
struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
struct net *net = seq_file_net(seq);
- fl = fl->next;
+ for_each_fl_continue_rcu(fl) {
+ if (net_eq(fl->fl_net, net))
+ goto out;
+ }
+
try_again:
- while (fl && !net_eq(fl->fl_net, net))
- fl = fl->next;
-
- while (!fl) {
- if (++state->bucket <= FL_HASH_MASK) {
- fl = fl_ht[state->bucket];
- goto try_again;
- } else
- break;
+ if (++state->bucket <= FL_HASH_MASK) {
+ for_each_fl_rcu(state->bucket, fl) {
+ if (net_eq(fl->fl_net, net))
+ goto out;
+ }
+ goto try_again;
}
+ fl = NULL;
+
+out:
return fl;
}
@@ -680,9 +712,9 @@ static struct ip6_flowlabel *ip6fl_get_idx(struct seq_file *seq, loff_t pos)
}
static void *ip6fl_seq_start(struct seq_file *seq, loff_t *pos)
- __acquires(ip6_fl_lock)
+ __acquires(RCU)
{
- read_lock_bh(&ip6_fl_lock);
+ rcu_read_lock_bh();
return *pos ? ip6fl_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
}
@@ -699,9 +731,9 @@ static void *ip6fl_seq_next(struct seq_file *seq, void *v, loff_t *pos)
}
static void ip6fl_seq_stop(struct seq_file *seq, void *v)
- __releases(ip6_fl_lock)
+ __releases(RCU)
{
- read_unlock_bh(&ip6_fl_lock);
+ rcu_read_unlock_bh();
}
static int ip6fl_seq_show(struct seq_file *seq, void *v)
@@ -774,15 +806,15 @@ static const struct file_operations ip6fl_seq_fops = {
static int __net_init ip6_flowlabel_proc_init(struct net *net)
{
- if (!proc_net_fops_create(net, "ip6_flowlabel",
- S_IRUGO, &ip6fl_seq_fops))
+ if (!proc_create("ip6_flowlabel", S_IRUGO, net->proc_net,
+ &ip6fl_seq_fops))
return -ENOMEM;
return 0;
}
static void __net_exit ip6_flowlabel_proc_fini(struct net *net)
{
- proc_net_remove(net, "ip6_flowlabel");
+ remove_proc_entry("ip6_flowlabel", net->proc_net);
}
#else
static inline int ip6_flowlabel_proc_init(struct net *net)
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 0185679c5f53..e4efffe2522e 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -109,21 +109,6 @@ static u32 HASH_ADDR(const struct in6_addr *addr)
#define tunnels_r tunnels[2]
#define tunnels_l tunnels[1]
#define tunnels_wc tunnels[0]
-/*
- * Locking : hash tables are protected by RCU and RTNL
- */
-
-#define for_each_ip_tunnel_rcu(start) \
- for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
-
-/* often modified stats are per cpu, other are shared (netdev->stats) */
-struct pcpu_tstats {
- u64 rx_packets;
- u64 rx_bytes;
- u64 tx_packets;
- u64 tx_bytes;
- struct u64_stats_sync syncp;
-};
static struct rtnl_link_stats64 *ip6gre_get_stats64(struct net_device *dev,
struct rtnl_link_stats64 *tot)
@@ -181,7 +166,7 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev,
ARPHRD_ETHER : ARPHRD_IP6GRE;
int score, cand_score = 4;
- for_each_ip_tunnel_rcu(ign->tunnels_r_l[h0 ^ h1]) {
+ for_each_ip_tunnel_rcu(t, ign->tunnels_r_l[h0 ^ h1]) {
if (!ipv6_addr_equal(local, &t->parms.laddr) ||
!ipv6_addr_equal(remote, &t->parms.raddr) ||
key != t->parms.i_key ||
@@ -206,7 +191,7 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev,
}
}
- for_each_ip_tunnel_rcu(ign->tunnels_r[h0 ^ h1]) {
+ for_each_ip_tunnel_rcu(t, ign->tunnels_r[h0 ^ h1]) {
if (!ipv6_addr_equal(remote, &t->parms.raddr) ||
key != t->parms.i_key ||
!(t->dev->flags & IFF_UP))
@@ -230,7 +215,7 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev,
}
}
- for_each_ip_tunnel_rcu(ign->tunnels_l[h1]) {
+ for_each_ip_tunnel_rcu(t, ign->tunnels_l[h1]) {
if ((!ipv6_addr_equal(local, &t->parms.laddr) &&
(!ipv6_addr_equal(local, &t->parms.raddr) ||
!ipv6_addr_is_multicast(local))) ||
@@ -256,7 +241,7 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev,
}
}
- for_each_ip_tunnel_rcu(ign->tunnels_wc[h1]) {
+ for_each_ip_tunnel_rcu(t, ign->tunnels_wc[h1]) {
if (t->parms.i_key != key ||
!(t->dev->flags & IFF_UP))
continue;
@@ -773,8 +758,6 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
skb_dst_set_noref(skb, dst);
}
- skb->transport_header = skb->network_header;
-
proto = NEXTHDR_GRE;
if (encap_limit >= 0) {
init_tel_txopt(&opt, encap_limit);
@@ -783,14 +766,13 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
skb_push(skb, gre_hlen);
skb_reset_network_header(skb);
+ skb_set_transport_header(skb, sizeof(*ipv6h));
/*
* Push down and install the IP header.
*/
ipv6h = ipv6_hdr(skb);
- *(__be32 *)ipv6h = fl6->flowlabel | htonl(0x60000000);
- dsfield = INET_ECN_encapsulate(0, dsfield);
- ipv6_change_dsfield(ipv6h, ~INET_ECN_MASK, dsfield);
+ ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), fl6->flowlabel);
ipv6h->hop_limit = tunnel->parms.hop_limit;
ipv6h->nexthdr = proto;
ipv6h->saddr = fl6->saddr;
@@ -976,7 +958,7 @@ static netdev_tx_t ip6gre_tunnel_xmit(struct sk_buff *skb,
int ret;
if (!ip6_tnl_xmit_ctl(t))
- return -1;
+ goto tx_err;
switch (skb->protocol) {
case htons(ETH_P_IP):
@@ -1069,7 +1051,7 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu)
dev->mtu = IPV6_MIN_MTU;
}
}
- dst_release(&rt->dst);
+ ip6_rt_put(rt);
}
t->hlen = addend;
@@ -1161,7 +1143,7 @@ static int ip6gre_tunnel_ioctl(struct net_device *dev,
case SIOCADDTUNNEL:
case SIOCCHGTUNNEL:
err = -EPERM;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
goto done;
err = -EFAULT;
@@ -1209,7 +1191,7 @@ static int ip6gre_tunnel_ioctl(struct net_device *dev,
case SIOCDELTUNNEL:
err = -EPERM;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
goto done;
if (dev == ign->fb_tunnel_dev) {
@@ -1256,7 +1238,7 @@ static int ip6gre_header(struct sk_buff *skb, struct net_device *dev,
struct ipv6hdr *ipv6h = (struct ipv6hdr *)skb_push(skb, t->hlen);
__be16 *p = (__be16 *)(ipv6h+1);
- *(__be32 *)ipv6h = t->fl.u.ip6.flowlabel | htonl(0x60000000);
+ ip6_flow_hdr(ipv6h, 0, t->fl.u.ip6.flowlabel);
ipv6h->hop_limit = t->parms.hop_limit;
ipv6h->nexthdr = NEXTHDR_GRE;
ipv6h->saddr = t->parms.laddr;
@@ -1633,9 +1615,9 @@ static size_t ip6gre_get_size(const struct net_device *dev)
/* IFLA_GRE_OKEY */
nla_total_size(4) +
/* IFLA_GRE_LOCAL */
- nla_total_size(4) +
+ nla_total_size(sizeof(struct in6_addr)) +
/* IFLA_GRE_REMOTE */
- nla_total_size(4) +
+ nla_total_size(sizeof(struct in6_addr)) +
/* IFLA_GRE_TTL */
nla_total_size(1) +
/* IFLA_GRE_TOS */
@@ -1659,8 +1641,8 @@ static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev)
nla_put_be16(skb, IFLA_GRE_OFLAGS, p->o_flags) ||
nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
- nla_put(skb, IFLA_GRE_LOCAL, sizeof(struct in6_addr), &p->raddr) ||
- nla_put(skb, IFLA_GRE_REMOTE, sizeof(struct in6_addr), &p->laddr) ||
+ nla_put(skb, IFLA_GRE_LOCAL, sizeof(struct in6_addr), &p->laddr) ||
+ nla_put(skb, IFLA_GRE_REMOTE, sizeof(struct in6_addr), &p->raddr) ||
nla_put_u8(skb, IFLA_GRE_TTL, p->hop_limit) ||
/*nla_put_u8(skb, IFLA_GRE_TOS, t->priority) ||*/
nla_put_u8(skb, IFLA_GRE_ENCAP_LIMIT, p->encap_limit) ||
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index a52d864d562b..e33fe0ab2568 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -118,6 +118,15 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
ipv6_addr_loopback(&hdr->daddr))
goto err;
+ /* RFC4291 2.7
+ * Nodes must not originate a packet to a multicast address whose scope
+ * field contains the reserved value 0; if such a packet is received, it
+ * must be silently dropped.
+ */
+ if (ipv6_addr_is_multicast(&hdr->daddr) &&
+ IPV6_ADDR_MC_SCOPE(&hdr->daddr) == 0)
+ goto err;
+
/*
* RFC4291 2.7
* Multicast addresses must not be used as source addresses in IPv6
@@ -212,7 +221,7 @@ resubmit:
if (ipv6_addr_is_multicast(&hdr->daddr) &&
!ipv6_chk_mcast_addr(skb->dev, &hdr->daddr,
&hdr->saddr) &&
- !ipv6_is_mld(skb, nexthdr))
+ !ipv6_is_mld(skb, nexthdr, skb_network_header_len(skb)))
goto discard;
}
if (!(ipprot->flags & INET6_PROTO_NOPOLICY) &&
@@ -232,9 +241,11 @@ resubmit:
icmpv6_send(skb, ICMPV6_PARAMPROB,
ICMPV6_UNK_NEXTHDR, nhoff);
}
- } else
+ kfree_skb(skb);
+ } else {
IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INDELIVERS);
- kfree_skb(skb);
+ consume_skb(skb);
+ }
}
rcu_read_unlock();
return 0;
@@ -270,7 +281,8 @@ int ip6_mc_input(struct sk_buff *skb)
* IPv6 multicast router mode is now supported ;)
*/
if (dev_net(skb->dev)->ipv6.devconf_all->mc_forwarding &&
- !(ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) &&
+ !(ipv6_addr_type(&hdr->daddr) &
+ (IPV6_ADDR_LOOPBACK|IPV6_ADDR_LINKLOCAL)) &&
likely(!(IP6CB(skb)->flags & IP6SKB_FORWARDED))) {
/*
* Okay, we try to forward - split and duplicate
@@ -280,10 +292,8 @@ int ip6_mc_input(struct sk_buff *skb)
struct inet6_skb_parm *opt = IP6CB(skb);
/* Check for MLD */
- if (unlikely(opt->ra)) {
+ if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
/* Check if this is a mld message */
- u8 *ptr = skb_network_header(skb) + opt->ra;
- struct icmp6hdr *icmp6;
u8 nexthdr = hdr->nexthdr;
__be16 frag_off;
int offset;
@@ -291,7 +301,7 @@ int ip6_mc_input(struct sk_buff *skb)
/* Check if the value of Router Alert
* is for MLD (0x0000).
*/
- if ((ptr[2] | ptr[3]) == 0) {
+ if (opt->ra == htons(IPV6_OPT_ROUTERALERT_MLD)) {
deliver = false;
if (!ipv6_ext_hdr(nexthdr)) {
@@ -303,24 +313,10 @@ int ip6_mc_input(struct sk_buff *skb)
if (offset < 0)
goto out;
- if (nexthdr != IPPROTO_ICMPV6)
+ if (!ipv6_is_mld(skb, nexthdr, offset))
goto out;
- if (!pskb_may_pull(skb, (skb_network_header(skb) +
- offset + 1 - skb->data)))
- goto out;
-
- icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
-
- switch (icmp6->icmp6_type) {
- case ICMPV6_MGM_QUERY:
- case ICMPV6_MGM_REPORT:
- case ICMPV6_MGM_REDUCTION:
- case ICMPV6_MLD2_REPORT:
- deliver = true;
- break;
- }
- goto out;
+ deliver = true;
}
/* unknown RA - process it normally */
}
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
new file mode 100644
index 000000000000..8234c1dcdf72
--- /dev/null
+++ b/net/ipv6/ip6_offload.c
@@ -0,0 +1,283 @@
+/*
+ * IPV6 GSO/GRO offload support
+ * Linux INET6 implementation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/socket.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/printk.h>
+
+#include <net/protocol.h>
+#include <net/ipv6.h>
+
+#include "ip6_offload.h"
+
+static int ipv6_gso_pull_exthdrs(struct sk_buff *skb, int proto)
+{
+ const struct net_offload *ops = NULL;
+
+ for (;;) {
+ struct ipv6_opt_hdr *opth;
+ int len;
+
+ if (proto != NEXTHDR_HOP) {
+ ops = rcu_dereference(inet6_offloads[proto]);
+
+ if (unlikely(!ops))
+ break;
+
+ if (!(ops->flags & INET6_PROTO_GSO_EXTHDR))
+ break;
+ }
+
+ if (unlikely(!pskb_may_pull(skb, 8)))
+ break;
+
+ opth = (void *)skb->data;
+ len = ipv6_optlen(opth);
+
+ if (unlikely(!pskb_may_pull(skb, len)))
+ break;
+
+ proto = opth->nexthdr;
+ __skb_pull(skb, len);
+ }
+
+ return proto;
+}
+
+static int ipv6_gso_send_check(struct sk_buff *skb)
+{
+ const struct ipv6hdr *ipv6h;
+ const struct net_offload *ops;
+ int err = -EINVAL;
+
+ if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h))))
+ goto out;
+
+ ipv6h = ipv6_hdr(skb);
+ __skb_pull(skb, sizeof(*ipv6h));
+ err = -EPROTONOSUPPORT;
+
+ rcu_read_lock();
+ ops = rcu_dereference(inet6_offloads[
+ ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr)]);
+
+ if (likely(ops && ops->callbacks.gso_send_check)) {
+ skb_reset_transport_header(skb);
+ err = ops->callbacks.gso_send_check(skb);
+ }
+ rcu_read_unlock();
+
+out:
+ return err;
+}
+
+static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
+ netdev_features_t features)
+{
+ struct sk_buff *segs = ERR_PTR(-EINVAL);
+ struct ipv6hdr *ipv6h;
+ const struct net_offload *ops;
+ int proto;
+ struct frag_hdr *fptr;
+ unsigned int unfrag_ip6hlen;
+ u8 *prevhdr;
+ int offset = 0;
+
+ if (!(features & NETIF_F_V6_CSUM))
+ features &= ~NETIF_F_SG;
+
+ if (unlikely(skb_shinfo(skb)->gso_type &
+ ~(SKB_GSO_UDP |
+ SKB_GSO_DODGY |
+ SKB_GSO_TCP_ECN |
+ SKB_GSO_GRE |
+ SKB_GSO_TCPV6 |
+ 0)))
+ goto out;
+
+ if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h))))
+ goto out;
+
+ ipv6h = ipv6_hdr(skb);
+ __skb_pull(skb, sizeof(*ipv6h));
+ segs = ERR_PTR(-EPROTONOSUPPORT);
+
+ proto = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr);
+ rcu_read_lock();
+ ops = rcu_dereference(inet6_offloads[proto]);
+ if (likely(ops && ops->callbacks.gso_segment)) {
+ skb_reset_transport_header(skb);
+ segs = ops->callbacks.gso_segment(skb, features);
+ }
+ rcu_read_unlock();
+
+ if (IS_ERR(segs))
+ goto out;
+
+ for (skb = segs; skb; skb = skb->next) {
+ ipv6h = ipv6_hdr(skb);
+ ipv6h->payload_len = htons(skb->len - skb->mac_len -
+ sizeof(*ipv6h));
+ if (proto == IPPROTO_UDP) {
+ unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr);
+ fptr = (struct frag_hdr *)(skb_network_header(skb) +
+ unfrag_ip6hlen);
+ fptr->frag_off = htons(offset);
+ if (skb->next != NULL)
+ fptr->frag_off |= htons(IP6_MF);
+ offset += (ntohs(ipv6h->payload_len) -
+ sizeof(struct frag_hdr));
+ }
+ }
+
+out:
+ return segs;
+}
+
+static struct sk_buff **ipv6_gro_receive(struct sk_buff **head,
+ struct sk_buff *skb)
+{
+ const struct net_offload *ops;
+ struct sk_buff **pp = NULL;
+ struct sk_buff *p;
+ struct ipv6hdr *iph;
+ unsigned int nlen;
+ unsigned int hlen;
+ unsigned int off;
+ int flush = 1;
+ int proto;
+ __wsum csum;
+
+ off = skb_gro_offset(skb);
+ hlen = off + sizeof(*iph);
+ iph = skb_gro_header_fast(skb, off);
+ if (skb_gro_header_hard(skb, hlen)) {
+ iph = skb_gro_header_slow(skb, hlen, off);
+ if (unlikely(!iph))
+ goto out;
+ }
+
+ skb_gro_pull(skb, sizeof(*iph));
+ skb_set_transport_header(skb, skb_gro_offset(skb));
+
+ flush += ntohs(iph->payload_len) != skb_gro_len(skb);
+
+ rcu_read_lock();
+ proto = iph->nexthdr;
+ ops = rcu_dereference(inet6_offloads[proto]);
+ if (!ops || !ops->callbacks.gro_receive) {
+ __pskb_pull(skb, skb_gro_offset(skb));
+ proto = ipv6_gso_pull_exthdrs(skb, proto);
+ skb_gro_pull(skb, -skb_transport_offset(skb));
+ skb_reset_transport_header(skb);
+ __skb_push(skb, skb_gro_offset(skb));
+
+ ops = rcu_dereference(inet6_offloads[proto]);
+ if (!ops || !ops->callbacks.gro_receive)
+ goto out_unlock;
+
+ iph = ipv6_hdr(skb);
+ }
+
+ NAPI_GRO_CB(skb)->proto = proto;
+
+ flush--;
+ nlen = skb_network_header_len(skb);
+
+ for (p = *head; p; p = p->next) {
+ const struct ipv6hdr *iph2;
+ __be32 first_word; /* <Version:4><Traffic_Class:8><Flow_Label:20> */
+
+ if (!NAPI_GRO_CB(p)->same_flow)
+ continue;
+
+ iph2 = ipv6_hdr(p);
+ first_word = *(__be32 *)iph ^ *(__be32 *)iph2 ;
+
+ /* All fields must match except length and Traffic Class. */
+ if (nlen != skb_network_header_len(p) ||
+ (first_word & htonl(0xF00FFFFF)) ||
+ memcmp(&iph->nexthdr, &iph2->nexthdr,
+ nlen - offsetof(struct ipv6hdr, nexthdr))) {
+ NAPI_GRO_CB(p)->same_flow = 0;
+ continue;
+ }
+ /* flush if Traffic Class fields are different */
+ NAPI_GRO_CB(p)->flush |= !!(first_word & htonl(0x0FF00000));
+ NAPI_GRO_CB(p)->flush |= flush;
+ }
+
+ NAPI_GRO_CB(skb)->flush |= flush;
+
+ csum = skb->csum;
+ skb_postpull_rcsum(skb, iph, skb_network_header_len(skb));
+
+ pp = ops->callbacks.gro_receive(head, skb);
+
+ skb->csum = csum;
+
+out_unlock:
+ rcu_read_unlock();
+
+out:
+ NAPI_GRO_CB(skb)->flush |= flush;
+
+ return pp;
+}
+
+static int ipv6_gro_complete(struct sk_buff *skb)
+{
+ const struct net_offload *ops;
+ struct ipv6hdr *iph = ipv6_hdr(skb);
+ int err = -ENOSYS;
+
+ iph->payload_len = htons(skb->len - skb_network_offset(skb) -
+ sizeof(*iph));
+
+ rcu_read_lock();
+ ops = rcu_dereference(inet6_offloads[NAPI_GRO_CB(skb)->proto]);
+ if (WARN_ON(!ops || !ops->callbacks.gro_complete))
+ goto out_unlock;
+
+ err = ops->callbacks.gro_complete(skb);
+
+out_unlock:
+ rcu_read_unlock();
+
+ return err;
+}
+
+static struct packet_offload ipv6_packet_offload __read_mostly = {
+ .type = cpu_to_be16(ETH_P_IPV6),
+ .callbacks = {
+ .gso_send_check = ipv6_gso_send_check,
+ .gso_segment = ipv6_gso_segment,
+ .gro_receive = ipv6_gro_receive,
+ .gro_complete = ipv6_gro_complete,
+ },
+};
+
+static int __init ipv6_offload_init(void)
+{
+
+ if (tcpv6_offload_init() < 0)
+ pr_crit("%s: Cannot add TCP protocol offload\n", __func__);
+ if (udp_offload_init() < 0)
+ pr_crit("%s: Cannot add UDP protocol offload\n", __func__);
+ if (ipv6_exthdrs_offload_init() < 0)
+ pr_crit("%s: Cannot add EXTHDRS protocol offload\n", __func__);
+
+ dev_add_offload(&ipv6_packet_offload);
+ return 0;
+}
+
+fs_initcall(ipv6_offload_init);
diff --git a/net/ipv6/ip6_offload.h b/net/ipv6/ip6_offload.h
new file mode 100644
index 000000000000..2e155c651b35
--- /dev/null
+++ b/net/ipv6/ip6_offload.h
@@ -0,0 +1,18 @@
+/*
+ * IPV6 GSO/GRO offload support
+ * Linux INET6 implementation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef __ip6_offload_h
+#define __ip6_offload_h
+
+int ipv6_exthdrs_offload_init(void);
+int udp_offload_init(void);
+int tcpv6_offload_init(void);
+
+#endif
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index aece3e792f84..155eccfa7760 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -56,8 +56,6 @@
#include <net/checksum.h>
#include <linux/mroute6.h>
-int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
-
int __ip6_local_out(struct sk_buff *skb)
{
int len;
@@ -88,7 +86,8 @@ static int ip6_finish_output2(struct sk_buff *skb)
struct dst_entry *dst = skb_dst(skb);
struct net_device *dev = dst->dev;
struct neighbour *neigh;
- struct rt6_info *rt;
+ struct in6_addr *nexthop;
+ int ret;
skb->protocol = htons(ETH_P_IPV6);
skb->dev = dev;
@@ -121,12 +120,26 @@ static int ip6_finish_output2(struct sk_buff *skb)
IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
skb->len);
+
+ if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
+ IPV6_ADDR_SCOPE_NODELOCAL &&
+ !(dev->flags & IFF_LOOPBACK)) {
+ kfree_skb(skb);
+ return 0;
+ }
}
- rt = (struct rt6_info *) dst;
- neigh = rt->n;
- if (neigh)
- return dst_neigh_output(dst, neigh, skb);
+ rcu_read_lock_bh();
+ nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
+ neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
+ if (unlikely(!neigh))
+ neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
+ if (!IS_ERR(neigh)) {
+ ret = dst_neigh_output(dst, neigh, skb);
+ rcu_read_unlock_bh();
+ return ret;
+ }
+ rcu_read_unlock_bh();
IP6_INC_STATS_BH(dev_net(dst->dev),
ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
@@ -216,7 +229,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
if (hlimit < 0)
hlimit = ip6_dst_hoplimit(dst);
- *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl6->flowlabel;
+ ip6_flow_hdr(hdr, tclass, fl6->flowlabel);
hdr->payload_len = htons(seg_len);
hdr->nexthdr = proto;
@@ -236,9 +249,8 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
dst->dev, dst_output);
}
- net_dbg_ratelimited("IPv6: sending pkt_too_big to self\n");
skb->dev = dst->dev;
- icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+ ipv6_local_error(sk, EMSGSIZE, fl6, mtu);
IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
kfree_skb(skb);
return -EMSGSIZE;
@@ -246,39 +258,6 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
EXPORT_SYMBOL(ip6_xmit);
-/*
- * To avoid extra problems ND packets are send through this
- * routine. It's code duplication but I really want to avoid
- * extra checks since ipv6_build_header is used by TCP (which
- * is for us performance critical)
- */
-
-int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
- const struct in6_addr *saddr, const struct in6_addr *daddr,
- int proto, int len)
-{
- struct ipv6_pinfo *np = inet6_sk(sk);
- struct ipv6hdr *hdr;
-
- skb->protocol = htons(ETH_P_IPV6);
- skb->dev = dev;
-
- skb_reset_network_header(skb);
- skb_put(skb, sizeof(struct ipv6hdr));
- hdr = ipv6_hdr(skb);
-
- *(__be32*)hdr = htonl(0x60000000);
-
- hdr->payload_len = htons(len);
- hdr->nexthdr = proto;
- hdr->hop_limit = np->hop_limit;
-
- hdr->saddr = *saddr;
- hdr->daddr = *daddr;
-
- return 0;
-}
-
static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
{
struct ip6_ra_chain *ra;
@@ -538,78 +517,12 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
to->tc_index = from->tc_index;
#endif
nf_copy(to, from);
-#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
- defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
+#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
to->nf_trace = from->nf_trace;
#endif
skb_copy_secmark(to, from);
}
-int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
-{
- u16 offset = sizeof(struct ipv6hdr);
- struct ipv6_opt_hdr *exthdr =
- (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
- unsigned int packet_len = skb->tail - skb->network_header;
- int found_rhdr = 0;
- *nexthdr = &ipv6_hdr(skb)->nexthdr;
-
- while (offset + 1 <= packet_len) {
-
- switch (**nexthdr) {
-
- case NEXTHDR_HOP:
- break;
- case NEXTHDR_ROUTING:
- found_rhdr = 1;
- break;
- case NEXTHDR_DEST:
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
- if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
- break;
-#endif
- if (found_rhdr)
- return offset;
- break;
- default :
- return offset;
- }
-
- offset += ipv6_optlen(exthdr);
- *nexthdr = &exthdr->nexthdr;
- exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
- offset);
- }
-
- return offset;
-}
-
-void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt)
-{
- static atomic_t ipv6_fragmentation_id;
- int old, new;
-
- if (rt && !(rt->dst.flags & DST_NOPEER)) {
- struct inet_peer *peer;
- struct net *net;
-
- net = dev_net(rt->dst.dev);
- peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
- if (peer) {
- fhdr->identification = htonl(inet_getid(peer, 0));
- inet_putpeer(peer);
- return;
- }
- }
- do {
- old = atomic_read(&ipv6_fragmentation_id);
- new = old + 1;
- if (!new)
- new = 1;
- } while (atomic_cmpxchg(&ipv6_fragmentation_id, old, new) != old);
- fhdr->identification = htonl(new);
-}
-
int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
{
struct sk_buff *frag;
@@ -756,7 +669,7 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
if (err == 0) {
IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
IPSTATS_MIB_FRAGOKS);
- dst_release(&rt->dst);
+ ip6_rt_put(rt);
return 0;
}
@@ -768,7 +681,7 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
IPSTATS_MIB_FRAGFAILS);
- dst_release(&rt->dst);
+ ip6_rt_put(rt);
return err;
slow_path_clean:
@@ -979,8 +892,12 @@ static int ip6_dst_lookup_tail(struct sock *sk,
* dst entry of the nexthop router
*/
rt = (struct rt6_info *) *dst;
- n = rt->n;
- if (n && !(n->nud_state & NUD_VALID)) {
+ rcu_read_lock_bh();
+ n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt, &fl6->daddr));
+ err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
+ rcu_read_unlock_bh();
+
+ if (err) {
struct inet6_ifaddr *ifp;
struct flowi6 fl_gw6;
int redirect;
@@ -1279,10 +1196,10 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
if (dst_allfrag(rt->dst.path))
cork->flags |= IPCORK_ALLFRAG;
cork->length = 0;
- exthdrlen = (opt ? opt->opt_flen : 0) - rt->rt6i_nfheader_len;
+ exthdrlen = (opt ? opt->opt_flen : 0);
length += exthdrlen;
transhdrlen += exthdrlen;
- dst_exthdrlen = rt->dst.header_len;
+ dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
} else {
rt = (struct rt6_info *)cork->dst;
fl6 = &inet->cork.fl.u.ip6;
@@ -1614,9 +1531,7 @@ int ip6_push_pending_frames(struct sock *sk)
skb_reset_network_header(skb);
hdr = ipv6_hdr(skb);
- *(__be32*)hdr = fl6->flowlabel |
- htonl(0x60000000 | ((int)np->cork.tclass << 20));
-
+ ip6_flow_hdr(hdr, np->cork.tclass, fl6->flowlabel);
hdr->hop_limit = np->cork.hop_limit;
hdr->nexthdr = proto;
hdr->saddr = fl6->saddr;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index cb7e2ded6f08..fff83cbc197f 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -74,6 +74,10 @@ MODULE_ALIAS_NETDEV("ip6tnl0");
#define HASH_SIZE_SHIFT 5
#define HASH_SIZE (1 << HASH_SIZE_SHIFT)
+static bool log_ecn_error = true;
+module_param(log_ecn_error, bool, 0644);
+MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
+
static u32 HASH(const struct in6_addr *addr1, const struct in6_addr *addr2)
{
u32 hash = ipv6_addr_hash(addr1) ^ ipv6_addr_hash(addr2);
@@ -83,6 +87,7 @@ static u32 HASH(const struct in6_addr *addr1, const struct in6_addr *addr2)
static int ip6_tnl_dev_init(struct net_device *dev);
static void ip6_tnl_dev_setup(struct net_device *dev);
+static struct rtnl_link_ops ip6_link_ops __read_mostly;
static int ip6_tnl_net_id __read_mostly;
struct ip6_tnl_net {
@@ -94,14 +99,6 @@ struct ip6_tnl_net {
struct ip6_tnl __rcu **tnls[2];
};
-/* often modified stats are per cpu, other are shared (netdev->stats) */
-struct pcpu_tstats {
- unsigned long rx_packets;
- unsigned long rx_bytes;
- unsigned long tx_packets;
- unsigned long tx_bytes;
-} __attribute__((aligned(4*sizeof(unsigned long))));
-
static struct net_device_stats *ip6_get_stats(struct net_device *dev)
{
struct pcpu_tstats sum = { 0 };
@@ -258,6 +255,33 @@ static void ip6_dev_free(struct net_device *dev)
free_netdev(dev);
}
+static int ip6_tnl_create2(struct net_device *dev)
+{
+ struct ip6_tnl *t = netdev_priv(dev);
+ struct net *net = dev_net(dev);
+ struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
+ int err;
+
+ t = netdev_priv(dev);
+ err = ip6_tnl_dev_init(dev);
+ if (err < 0)
+ goto out;
+
+ err = register_netdevice(dev);
+ if (err < 0)
+ goto out;
+
+ strcpy(t->parms.name, dev->name);
+ dev->rtnl_link_ops = &ip6_link_ops;
+
+ dev_hold(dev);
+ ip6_tnl_link(ip6n, t);
+ return 0;
+
+out:
+ return err;
+}
+
/**
* ip6_tnl_create - create a new tunnel
* @p: tunnel parameters
@@ -276,7 +300,6 @@ static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p)
struct ip6_tnl *t;
char name[IFNAMSIZ];
int err;
- struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
if (p->name[0])
strlcpy(name, p->name, IFNAMSIZ);
@@ -291,17 +314,10 @@ static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p)
t = netdev_priv(dev);
t->parms = *p;
- err = ip6_tnl_dev_init(dev);
+ err = ip6_tnl_create2(dev);
if (err < 0)
goto failed_free;
- if ((err = register_netdevice(dev)) < 0)
- goto failed_free;
-
- strcpy(t->parms.name, dev->name);
-
- dev_hold(dev);
- ip6_tnl_link(ip6n, t);
return t;
failed_free:
@@ -663,8 +679,7 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
icmpv6_send(skb2, rel_type, rel_code, rel_info);
- if (rt)
- dst_release(&rt->dst);
+ ip6_rt_put(rt);
kfree_skb(skb2);
}
@@ -672,28 +687,26 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
return 0;
}
-static void ip4ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t,
- const struct ipv6hdr *ipv6h,
- struct sk_buff *skb)
+static int ip4ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t,
+ const struct ipv6hdr *ipv6h,
+ struct sk_buff *skb)
{
__u8 dsfield = ipv6_get_dsfield(ipv6h) & ~INET_ECN_MASK;
if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY)
ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, dsfield);
- if (INET_ECN_is_ce(dsfield))
- IP_ECN_set_ce(ip_hdr(skb));
+ return IP6_ECN_decapsulate(ipv6h, skb);
}
-static void ip6ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t,
- const struct ipv6hdr *ipv6h,
- struct sk_buff *skb)
+static int ip6ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t,
+ const struct ipv6hdr *ipv6h,
+ struct sk_buff *skb)
{
if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY)
ipv6_copy_dscp(ipv6_get_dsfield(ipv6h), ipv6_hdr(skb));
- if (INET_ECN_is_ce(ipv6_get_dsfield(ipv6h)))
- IP6_ECN_set_ce(ipv6_hdr(skb));
+ return IP6_ECN_decapsulate(ipv6h, skb);
}
__u32 ip6_tnl_get_cap(struct ip6_tnl *t,
@@ -757,12 +770,13 @@ EXPORT_SYMBOL_GPL(ip6_tnl_rcv_ctl);
static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol,
__u8 ipproto,
- void (*dscp_ecn_decapsulate)(const struct ip6_tnl *t,
- const struct ipv6hdr *ipv6h,
- struct sk_buff *skb))
+ int (*dscp_ecn_decapsulate)(const struct ip6_tnl *t,
+ const struct ipv6hdr *ipv6h,
+ struct sk_buff *skb))
{
struct ip6_tnl *t;
const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+ int err;
rcu_read_lock();
@@ -792,14 +806,26 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol,
skb->pkt_type = PACKET_HOST;
memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
+ __skb_tunnel_rx(skb, t->dev);
+
+ err = dscp_ecn_decapsulate(t, ipv6h, skb);
+ if (unlikely(err)) {
+ if (log_ecn_error)
+ net_info_ratelimited("non-ECT from %pI6 with dsfield=%#x\n",
+ &ipv6h->saddr,
+ ipv6_get_dsfield(ipv6h));
+ if (err > 1) {
+ ++t->dev->stats.rx_frame_errors;
+ ++t->dev->stats.rx_errors;
+ rcu_read_unlock();
+ goto discard;
+ }
+ }
+
tstats = this_cpu_ptr(t->dev->tstats);
tstats->rx_packets++;
tstats->rx_bytes += skb->len;
- __skb_tunnel_rx(skb, t->dev);
-
- dscp_ecn_decapsulate(t, ipv6h, skb);
-
netif_rx(skb);
rcu_read_unlock();
@@ -1004,9 +1030,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
skb_push(skb, sizeof(struct ipv6hdr));
skb_reset_network_header(skb);
ipv6h = ipv6_hdr(skb);
- *(__be32*)ipv6h = fl6->flowlabel | htonl(0x60000000);
- dsfield = INET_ECN_encapsulate(0, dsfield);
- ipv6_change_dsfield(ipv6h, ~INET_ECN_MASK, dsfield);
+ ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), fl6->flowlabel);
ipv6h->hop_limit = t->parms.hop_limit;
ipv6h->nexthdr = proto;
ipv6h->saddr = fl6->saddr;
@@ -1208,7 +1232,7 @@ static void ip6_tnl_link_config(struct ip6_tnl *t)
if (dev->mtu < IPV6_MIN_MTU)
dev->mtu = IPV6_MIN_MTU;
}
- dst_release(&rt->dst);
+ ip6_rt_put(rt);
}
}
@@ -1237,6 +1261,20 @@ ip6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p)
return 0;
}
+static int ip6_tnl_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p)
+{
+ struct net *net = dev_net(t->dev);
+ struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
+ int err;
+
+ ip6_tnl_unlink(ip6n, t);
+ synchronize_net();
+ err = ip6_tnl_change(t, p);
+ ip6_tnl_link(ip6n, t);
+ netdev_state_change(t->dev);
+ return err;
+}
+
static void
ip6_tnl_parm_from_user(struct __ip6_tnl_parm *p, const struct ip6_tnl_parm *u)
{
@@ -1325,7 +1363,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
case SIOCADDTUNNEL:
case SIOCCHGTUNNEL:
err = -EPERM;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
break;
err = -EFAULT;
if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p)))
@@ -1345,11 +1383,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
} else
t = netdev_priv(dev);
- ip6_tnl_unlink(ip6n, t);
- synchronize_net();
- err = ip6_tnl_change(t, &p1);
- ip6_tnl_link(ip6n, t);
- netdev_state_change(dev);
+ err = ip6_tnl_update(t, &p1);
}
if (t) {
err = 0;
@@ -1362,7 +1396,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
break;
case SIOCDELTUNNEL:
err = -EPERM;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
break;
if (dev == ip6n->fb_tnl_dev) {
@@ -1505,6 +1539,164 @@ static int __net_init ip6_fb_tnl_dev_init(struct net_device *dev)
return 0;
}
+static int ip6_tnl_validate(struct nlattr *tb[], struct nlattr *data[])
+{
+ u8 proto;
+
+ if (!data)
+ return 0;
+
+ proto = nla_get_u8(data[IFLA_IPTUN_PROTO]);
+ if (proto != IPPROTO_IPV6 &&
+ proto != IPPROTO_IPIP &&
+ proto != 0)
+ return -EINVAL;
+
+ return 0;
+}
+
+static void ip6_tnl_netlink_parms(struct nlattr *data[],
+ struct __ip6_tnl_parm *parms)
+{
+ memset(parms, 0, sizeof(*parms));
+
+ if (!data)
+ return;
+
+ if (data[IFLA_IPTUN_LINK])
+ parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]);
+
+ if (data[IFLA_IPTUN_LOCAL])
+ nla_memcpy(&parms->laddr, data[IFLA_IPTUN_LOCAL],
+ sizeof(struct in6_addr));
+
+ if (data[IFLA_IPTUN_REMOTE])
+ nla_memcpy(&parms->raddr, data[IFLA_IPTUN_REMOTE],
+ sizeof(struct in6_addr));
+
+ if (data[IFLA_IPTUN_TTL])
+ parms->hop_limit = nla_get_u8(data[IFLA_IPTUN_TTL]);
+
+ if (data[IFLA_IPTUN_ENCAP_LIMIT])
+ parms->encap_limit = nla_get_u8(data[IFLA_IPTUN_ENCAP_LIMIT]);
+
+ if (data[IFLA_IPTUN_FLOWINFO])
+ parms->flowinfo = nla_get_be32(data[IFLA_IPTUN_FLOWINFO]);
+
+ if (data[IFLA_IPTUN_FLAGS])
+ parms->flags = nla_get_u32(data[IFLA_IPTUN_FLAGS]);
+
+ if (data[IFLA_IPTUN_PROTO])
+ parms->proto = nla_get_u8(data[IFLA_IPTUN_PROTO]);
+}
+
+static int ip6_tnl_newlink(struct net *src_net, struct net_device *dev,
+ struct nlattr *tb[], struct nlattr *data[])
+{
+ struct net *net = dev_net(dev);
+ struct ip6_tnl *nt;
+
+ nt = netdev_priv(dev);
+ ip6_tnl_netlink_parms(data, &nt->parms);
+
+ if (ip6_tnl_locate(net, &nt->parms, 0))
+ return -EEXIST;
+
+ return ip6_tnl_create2(dev);
+}
+
+static int ip6_tnl_changelink(struct net_device *dev, struct nlattr *tb[],
+ struct nlattr *data[])
+{
+ struct ip6_tnl *t;
+ struct __ip6_tnl_parm p;
+ struct net *net = dev_net(dev);
+ struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
+
+ if (dev == ip6n->fb_tnl_dev)
+ return -EINVAL;
+
+ ip6_tnl_netlink_parms(data, &p);
+
+ t = ip6_tnl_locate(net, &p, 0);
+
+ if (t) {
+ if (t->dev != dev)
+ return -EEXIST;
+ } else
+ t = netdev_priv(dev);
+
+ return ip6_tnl_update(t, &p);
+}
+
+static size_t ip6_tnl_get_size(const struct net_device *dev)
+{
+ return
+ /* IFLA_IPTUN_LINK */
+ nla_total_size(4) +
+ /* IFLA_IPTUN_LOCAL */
+ nla_total_size(sizeof(struct in6_addr)) +
+ /* IFLA_IPTUN_REMOTE */
+ nla_total_size(sizeof(struct in6_addr)) +
+ /* IFLA_IPTUN_TTL */
+ nla_total_size(1) +
+ /* IFLA_IPTUN_ENCAP_LIMIT */
+ nla_total_size(1) +
+ /* IFLA_IPTUN_FLOWINFO */
+ nla_total_size(4) +
+ /* IFLA_IPTUN_FLAGS */
+ nla_total_size(4) +
+ /* IFLA_IPTUN_PROTO */
+ nla_total_size(1) +
+ 0;
+}
+
+static int ip6_tnl_fill_info(struct sk_buff *skb, const struct net_device *dev)
+{
+ struct ip6_tnl *tunnel = netdev_priv(dev);
+ struct __ip6_tnl_parm *parm = &tunnel->parms;
+
+ if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) ||
+ nla_put(skb, IFLA_IPTUN_LOCAL, sizeof(struct in6_addr),
+ &parm->raddr) ||
+ nla_put(skb, IFLA_IPTUN_REMOTE, sizeof(struct in6_addr),
+ &parm->laddr) ||
+ nla_put_u8(skb, IFLA_IPTUN_TTL, parm->hop_limit) ||
+ nla_put_u8(skb, IFLA_IPTUN_ENCAP_LIMIT, parm->encap_limit) ||
+ nla_put_be32(skb, IFLA_IPTUN_FLOWINFO, parm->flowinfo) ||
+ nla_put_u32(skb, IFLA_IPTUN_FLAGS, parm->flags) ||
+ nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->proto))
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return -EMSGSIZE;
+}
+
+static const struct nla_policy ip6_tnl_policy[IFLA_IPTUN_MAX + 1] = {
+ [IFLA_IPTUN_LINK] = { .type = NLA_U32 },
+ [IFLA_IPTUN_LOCAL] = { .len = sizeof(struct in6_addr) },
+ [IFLA_IPTUN_REMOTE] = { .len = sizeof(struct in6_addr) },
+ [IFLA_IPTUN_TTL] = { .type = NLA_U8 },
+ [IFLA_IPTUN_ENCAP_LIMIT] = { .type = NLA_U8 },
+ [IFLA_IPTUN_FLOWINFO] = { .type = NLA_U32 },
+ [IFLA_IPTUN_FLAGS] = { .type = NLA_U32 },
+ [IFLA_IPTUN_PROTO] = { .type = NLA_U8 },
+};
+
+static struct rtnl_link_ops ip6_link_ops __read_mostly = {
+ .kind = "ip6tnl",
+ .maxtype = IFLA_IPTUN_MAX,
+ .policy = ip6_tnl_policy,
+ .priv_size = sizeof(struct ip6_tnl),
+ .setup = ip6_tnl_dev_setup,
+ .validate = ip6_tnl_validate,
+ .newlink = ip6_tnl_newlink,
+ .changelink = ip6_tnl_changelink,
+ .get_size = ip6_tnl_get_size,
+ .fill_info = ip6_tnl_fill_info,
+};
+
static struct xfrm6_tunnel ip4ip6_handler __read_mostly = {
.handler = ip4ip6_rcv,
.err_handler = ip4ip6_err,
@@ -1613,9 +1805,14 @@ static int __init ip6_tunnel_init(void)
pr_err("%s: can't register ip6ip6\n", __func__);
goto out_ip6ip6;
}
+ err = rtnl_link_register(&ip6_link_ops);
+ if (err < 0)
+ goto rtnl_link_failed;
return 0;
+rtnl_link_failed:
+ xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6);
out_ip6ip6:
xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET);
out_ip4ip6:
@@ -1630,6 +1827,7 @@ out_pernet:
static void __exit ip6_tunnel_cleanup(void)
{
+ rtnl_link_unregister(&ip6_link_ops);
if (xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET))
pr_info("%s: can't deregister ip4ip6\n", __func__);
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index f7c7c6319720..96bfb4e4b820 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -52,6 +52,7 @@
#include <linux/netfilter_ipv6.h>
#include <linux/export.h>
#include <net/ip6_checksum.h>
+#include <linux/netconf.h>
struct mr6_table {
struct list_head list;
@@ -66,8 +67,8 @@ struct mr6_table {
struct mif_device vif6_table[MAXMIFS];
int maxvif;
atomic_t cache_resolve_queue_len;
- int mroute_do_assert;
- int mroute_do_pim;
+ bool mroute_do_assert;
+ bool mroute_do_pim;
#ifdef CONFIG_IPV6_PIMSM_V2
int mroute_reg_vif_num;
#endif
@@ -115,6 +116,8 @@ static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
mifi_t mifi, int assert);
static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
struct mfc6_cache *c, struct rtmsg *rtm);
+static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
+ int cmd);
static int ip6mr_rtm_dumproute(struct sk_buff *skb,
struct netlink_callback *cb);
static void mroute_clean_tables(struct mr6_table *mrt);
@@ -805,8 +808,12 @@ static int mif6_delete(struct mr6_table *mrt, int vifi, struct list_head *head)
dev_set_allmulti(dev, -1);
in6_dev = __in6_dev_get(dev);
- if (in6_dev)
+ if (in6_dev) {
in6_dev->cnf.mc_forwarding--;
+ inet6_netconf_notify_devconf(dev_net(dev),
+ NETCONFA_MC_FORWARDING,
+ dev->ifindex, &in6_dev->cnf);
+ }
if (v->flags & MIFF_REGISTER)
unregister_netdevice_queue(dev, head);
@@ -865,6 +872,7 @@ static void ipmr_do_expire_process(struct mr6_table *mrt)
}
list_del(&c->list);
+ mr6_netlink_event(mrt, c, RTM_DELROUTE);
ip6mr_destroy_unres(mrt, c);
}
@@ -958,8 +966,12 @@ static int mif6_add(struct net *net, struct mr6_table *mrt,
}
in6_dev = __in6_dev_get(dev);
- if (in6_dev)
+ if (in6_dev) {
in6_dev->cnf.mc_forwarding++;
+ inet6_netconf_notify_devconf(dev_net(dev),
+ NETCONFA_MC_FORWARDING,
+ dev->ifindex, &in6_dev->cnf);
+ }
/*
* Fill in the VIF structures
@@ -1005,6 +1017,50 @@ static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt,
return NULL;
}
+/* Look for a (*,*,oif) entry */
+static struct mfc6_cache *ip6mr_cache_find_any_parent(struct mr6_table *mrt,
+ mifi_t mifi)
+{
+ int line = MFC6_HASH(&in6addr_any, &in6addr_any);
+ struct mfc6_cache *c;
+
+ list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
+ if (ipv6_addr_any(&c->mf6c_origin) &&
+ ipv6_addr_any(&c->mf6c_mcastgrp) &&
+ (c->mfc_un.res.ttls[mifi] < 255))
+ return c;
+
+ return NULL;
+}
+
+/* Look for a (*,G) entry */
+static struct mfc6_cache *ip6mr_cache_find_any(struct mr6_table *mrt,
+ struct in6_addr *mcastgrp,
+ mifi_t mifi)
+{
+ int line = MFC6_HASH(mcastgrp, &in6addr_any);
+ struct mfc6_cache *c, *proxy;
+
+ if (ipv6_addr_any(mcastgrp))
+ goto skip;
+
+ list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
+ if (ipv6_addr_any(&c->mf6c_origin) &&
+ ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp)) {
+ if (c->mfc_un.res.ttls[mifi] < 255)
+ return c;
+
+ /* It's ok if the mifi is part of the static tree */
+ proxy = ip6mr_cache_find_any_parent(mrt,
+ c->mf6c_parent);
+ if (proxy && proxy->mfc_un.res.ttls[mifi] < 255)
+ return c;
+ }
+
+skip:
+ return ip6mr_cache_find_any_parent(mrt, mifi);
+}
+
/*
* Allocate a multicast cache entry
*/
@@ -1211,6 +1267,7 @@ ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
atomic_inc(&mrt->cache_resolve_queue_len);
list_add(&c->list, &mrt->mfc6_unres_queue);
+ mr6_netlink_event(mrt, c, RTM_NEWROUTE);
ipmr_do_expire_process(mrt);
}
@@ -1234,7 +1291,8 @@ ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
* MFC6 cache manipulation by user space
*/
-static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc)
+static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc,
+ int parent)
{
int line;
struct mfc6_cache *c, *next;
@@ -1243,11 +1301,14 @@ static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc)
list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[line], list) {
if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
- ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
+ ipv6_addr_equal(&c->mf6c_mcastgrp,
+ &mfc->mf6cc_mcastgrp.sin6_addr) &&
+ (parent == -1 || parent == c->mf6c_parent)) {
write_lock_bh(&mrt_lock);
list_del(&c->list);
write_unlock_bh(&mrt_lock);
+ mr6_netlink_event(mrt, c, RTM_DELROUTE);
ip6mr_cache_free(c);
return 0;
}
@@ -1298,9 +1359,9 @@ static int __net_init ip6mr_net_init(struct net *net)
#ifdef CONFIG_PROC_FS
err = -ENOMEM;
- if (!proc_net_fops_create(net, "ip6_mr_vif", 0, &ip6mr_vif_fops))
+ if (!proc_create("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_fops))
goto proc_vif_fail;
- if (!proc_net_fops_create(net, "ip6_mr_cache", 0, &ip6mr_mfc_fops))
+ if (!proc_create("ip6_mr_cache", 0, net->proc_net, &ip6mr_mfc_fops))
goto proc_cache_fail;
#endif
@@ -1308,7 +1369,7 @@ static int __net_init ip6mr_net_init(struct net *net)
#ifdef CONFIG_PROC_FS
proc_cache_fail:
- proc_net_remove(net, "ip6_mr_vif");
+ remove_proc_entry("ip6_mr_vif", net->proc_net);
proc_vif_fail:
ip6mr_rules_exit(net);
#endif
@@ -1319,8 +1380,8 @@ fail:
static void __net_exit ip6mr_net_exit(struct net *net)
{
#ifdef CONFIG_PROC_FS
- proc_net_remove(net, "ip6_mr_cache");
- proc_net_remove(net, "ip6_mr_vif");
+ remove_proc_entry("ip6_mr_cache", net->proc_net);
+ remove_proc_entry("ip6_mr_vif", net->proc_net);
#endif
ip6mr_rules_exit(net);
}
@@ -1377,7 +1438,7 @@ void ip6_mr_cleanup(void)
}
static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
- struct mf6cctl *mfc, int mrtsock)
+ struct mf6cctl *mfc, int mrtsock, int parent)
{
bool found = false;
int line;
@@ -1399,7 +1460,9 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
- ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
+ ipv6_addr_equal(&c->mf6c_mcastgrp,
+ &mfc->mf6cc_mcastgrp.sin6_addr) &&
+ (parent == -1 || parent == mfc->mf6cc_parent)) {
found = true;
break;
}
@@ -1412,10 +1475,12 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
if (!mrtsock)
c->mfc_flags |= MFC_STATIC;
write_unlock_bh(&mrt_lock);
+ mr6_netlink_event(mrt, c, RTM_NEWROUTE);
return 0;
}
- if (!ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
+ if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) &&
+ !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
return -EINVAL;
c = ip6mr_cache_alloc();
@@ -1456,6 +1521,7 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
ip6mr_cache_resolve(net, mrt, uc, c);
ip6mr_cache_free(uc);
}
+ mr6_netlink_event(mrt, c, RTM_NEWROUTE);
return 0;
}
@@ -1489,6 +1555,7 @@ static void mroute_clean_tables(struct mr6_table *mrt)
list_del(&c->list);
write_unlock_bh(&mrt_lock);
+ mr6_netlink_event(mrt, c, RTM_DELROUTE);
ip6mr_cache_free(c);
}
}
@@ -1497,6 +1564,7 @@ static void mroute_clean_tables(struct mr6_table *mrt)
spin_lock_bh(&mfc_unres_lock);
list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
list_del(&c->list);
+ mr6_netlink_event(mrt, c, RTM_DELROUTE);
ip6mr_destroy_unres(mrt, c);
}
spin_unlock_bh(&mfc_unres_lock);
@@ -1513,6 +1581,9 @@ static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk)
if (likely(mrt->mroute6_sk == NULL)) {
mrt->mroute6_sk = sk;
net->ipv6.devconf_all->mc_forwarding++;
+ inet6_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING,
+ NETCONFA_IFINDEX_ALL,
+ net->ipv6.devconf_all);
}
else
err = -EADDRINUSE;
@@ -1535,6 +1606,10 @@ int ip6mr_sk_done(struct sock *sk)
write_lock_bh(&mrt_lock);
mrt->mroute6_sk = NULL;
net->ipv6.devconf_all->mc_forwarding--;
+ inet6_netconf_notify_devconf(net,
+ NETCONFA_MC_FORWARDING,
+ NETCONFA_IFINDEX_ALL,
+ net->ipv6.devconf_all);
write_unlock_bh(&mrt_lock);
mroute_clean_tables(mrt);
@@ -1571,7 +1646,7 @@ struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
{
- int ret;
+ int ret, parent = 0;
struct mif6ctl vif;
struct mf6cctl mfc;
mifi_t mifi;
@@ -1583,7 +1658,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
return -ENOENT;
if (optname != MRT6_INIT) {
- if (sk != mrt->mroute6_sk && !capable(CAP_NET_ADMIN))
+ if (sk != mrt->mroute6_sk && !ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EACCES;
}
@@ -1628,15 +1703,21 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
*/
case MRT6_ADD_MFC:
case MRT6_DEL_MFC:
+ parent = -1;
+ case MRT6_ADD_MFC_PROXY:
+ case MRT6_DEL_MFC_PROXY:
if (optlen < sizeof(mfc))
return -EINVAL;
if (copy_from_user(&mfc, optval, sizeof(mfc)))
return -EFAULT;
+ if (parent == 0)
+ parent = mfc.mf6cc_parent;
rtnl_lock();
- if (optname == MRT6_DEL_MFC)
- ret = ip6mr_mfc_delete(mrt, &mfc);
+ if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY)
+ ret = ip6mr_mfc_delete(mrt, &mfc, parent);
else
- ret = ip6mr_mfc_add(net, mrt, &mfc, sk == mrt->mroute6_sk);
+ ret = ip6mr_mfc_add(net, mrt, &mfc,
+ sk == mrt->mroute6_sk, parent);
rtnl_unlock();
return ret;
@@ -1646,9 +1727,12 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
case MRT6_ASSERT:
{
int v;
+
+ if (optlen != sizeof(v))
+ return -EINVAL;
if (get_user(v, (int __user *)optval))
return -EFAULT;
- mrt->mroute_do_assert = !!v;
+ mrt->mroute_do_assert = v;
return 0;
}
@@ -1656,6 +1740,9 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
case MRT6_PIM:
{
int v;
+
+ if (optlen != sizeof(v))
+ return -EINVAL;
if (get_user(v, (int __user *)optval))
return -EFAULT;
v = !!v;
@@ -1679,6 +1766,9 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
return -EINVAL;
if (get_user(v, (u32 __user *)optval))
return -EFAULT;
+ /* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */
+ if (v != RT_TABLE_DEFAULT && v >= 100000000)
+ return -EINVAL;
if (sk == mrt->mroute6_sk)
return -EBUSY;
@@ -1984,19 +2074,29 @@ static int ip6_mr_forward(struct net *net, struct mr6_table *mrt,
{
int psend = -1;
int vif, ct;
+ int true_vifi = ip6mr_find_vif(mrt, skb->dev);
vif = cache->mf6c_parent;
cache->mfc_un.res.pkt++;
cache->mfc_un.res.bytes += skb->len;
+ if (ipv6_addr_any(&cache->mf6c_origin) && true_vifi >= 0) {
+ struct mfc6_cache *cache_proxy;
+
+ /* For an (*,G) entry, we only check that the incomming
+ * interface is part of the static tree.
+ */
+ cache_proxy = ip6mr_cache_find_any_parent(mrt, vif);
+ if (cache_proxy &&
+ cache_proxy->mfc_un.res.ttls[true_vifi] < 255)
+ goto forward;
+ }
+
/*
* Wrong interface: drop packet and (maybe) send PIM assert.
*/
if (mrt->vif6_table[vif].dev != skb->dev) {
- int true_vifi;
-
cache->mfc_un.res.wrong_if++;
- true_vifi = ip6mr_find_vif(mrt, skb->dev);
if (true_vifi >= 0 && mrt->mroute_do_assert &&
/* pimsm uses asserts, when switching from RPT to SPT,
@@ -2014,14 +2114,32 @@ static int ip6_mr_forward(struct net *net, struct mr6_table *mrt,
goto dont_forward;
}
+forward:
mrt->vif6_table[vif].pkt_in++;
mrt->vif6_table[vif].bytes_in += skb->len;
/*
* Forward the frame
*/
+ if (ipv6_addr_any(&cache->mf6c_origin) &&
+ ipv6_addr_any(&cache->mf6c_mcastgrp)) {
+ if (true_vifi >= 0 &&
+ true_vifi != cache->mf6c_parent &&
+ ipv6_hdr(skb)->hop_limit >
+ cache->mfc_un.res.ttls[cache->mf6c_parent]) {
+ /* It's an (*,*) entry and the packet is not coming from
+ * the upstream: forward the packet to the upstream
+ * only.
+ */
+ psend = cache->mf6c_parent;
+ goto last_forward;
+ }
+ goto dont_forward;
+ }
for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
- if (ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
+ /* For (*,G) entry, don't forward to the incoming interface */
+ if ((!ipv6_addr_any(&cache->mf6c_origin) || ct != true_vifi) &&
+ ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
if (psend != -1) {
struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
if (skb2)
@@ -2030,6 +2148,7 @@ static int ip6_mr_forward(struct net *net, struct mr6_table *mrt,
psend = ct;
}
}
+last_forward:
if (psend != -1) {
ip6mr_forward2(net, mrt, skb, cache, psend);
return 0;
@@ -2065,6 +2184,14 @@ int ip6_mr_input(struct sk_buff *skb)
read_lock(&mrt_lock);
cache = ip6mr_cache_find(mrt,
&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
+ if (cache == NULL) {
+ int vif = ip6mr_find_vif(mrt, skb->dev);
+
+ if (vif >= 0)
+ cache = ip6mr_cache_find_any(mrt,
+ &ipv6_hdr(skb)->daddr,
+ vif);
+ }
/*
* No usable cache entry
@@ -2097,8 +2224,8 @@ static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
{
int ct;
struct rtnexthop *nhp;
- u8 *b = skb_tail_pointer(skb);
- struct rtattr *mp_head;
+ struct nlattr *mp_attr;
+ struct rta_mfc_stats mfcs;
/* If cache is unresolved, don't try to parse IIF and OIF */
if (c->mf6c_parent >= MAXMIFS)
@@ -2107,28 +2234,35 @@ static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
if (MIF_EXISTS(mrt, c->mf6c_parent) &&
nla_put_u32(skb, RTA_IIF, mrt->vif6_table[c->mf6c_parent].dev->ifindex) < 0)
return -EMSGSIZE;
-
- mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
+ mp_attr = nla_nest_start(skb, RTA_MULTIPATH);
+ if (mp_attr == NULL)
+ return -EMSGSIZE;
for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
if (MIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
- if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
- goto rtattr_failure;
- nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
+ nhp = nla_reserve_nohdr(skb, sizeof(*nhp));
+ if (nhp == NULL) {
+ nla_nest_cancel(skb, mp_attr);
+ return -EMSGSIZE;
+ }
+
nhp->rtnh_flags = 0;
nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
nhp->rtnh_ifindex = mrt->vif6_table[ct].dev->ifindex;
nhp->rtnh_len = sizeof(*nhp);
}
}
- mp_head->rta_type = RTA_MULTIPATH;
- mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
+
+ nla_nest_end(skb, mp_attr);
+
+ mfcs.mfcs_packets = c->mfc_un.res.pkt;
+ mfcs.mfcs_bytes = c->mfc_un.res.bytes;
+ mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
+ if (nla_put(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs) < 0)
+ return -EMSGSIZE;
+
rtm->rtm_type = RTN_MULTICAST;
return 1;
-
-rtattr_failure:
- nlmsg_trim(skb, b);
- return -EMSGSIZE;
}
int ip6mr_get_route(struct net *net,
@@ -2145,6 +2279,13 @@ int ip6mr_get_route(struct net *net,
read_lock(&mrt_lock);
cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
+ if (!cache && skb->dev) {
+ int vif = ip6mr_find_vif(mrt, skb->dev);
+
+ if (vif >= 0)
+ cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr,
+ vif);
+ }
if (!cache) {
struct sk_buff *skb2;
@@ -2202,31 +2343,38 @@ int ip6mr_get_route(struct net *net,
}
static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
- u32 portid, u32 seq, struct mfc6_cache *c)
+ u32 portid, u32 seq, struct mfc6_cache *c, int cmd)
{
struct nlmsghdr *nlh;
struct rtmsg *rtm;
+ int err;
- nlh = nlmsg_put(skb, portid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI);
+ nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), NLM_F_MULTI);
if (nlh == NULL)
return -EMSGSIZE;
rtm = nlmsg_data(nlh);
- rtm->rtm_family = RTNL_FAMILY_IPMR;
+ rtm->rtm_family = RTNL_FAMILY_IP6MR;
rtm->rtm_dst_len = 128;
rtm->rtm_src_len = 128;
rtm->rtm_tos = 0;
rtm->rtm_table = mrt->id;
if (nla_put_u32(skb, RTA_TABLE, mrt->id))
goto nla_put_failure;
+ rtm->rtm_type = RTN_MULTICAST;
rtm->rtm_scope = RT_SCOPE_UNIVERSE;
- rtm->rtm_protocol = RTPROT_UNSPEC;
+ if (c->mfc_flags & MFC_STATIC)
+ rtm->rtm_protocol = RTPROT_STATIC;
+ else
+ rtm->rtm_protocol = RTPROT_MROUTED;
rtm->rtm_flags = 0;
if (nla_put(skb, RTA_SRC, 16, &c->mf6c_origin) ||
nla_put(skb, RTA_DST, 16, &c->mf6c_mcastgrp))
goto nla_put_failure;
- if (__ip6mr_fill_mroute(mrt, skb, c, rtm) < 0)
+ err = __ip6mr_fill_mroute(mrt, skb, c, rtm);
+ /* do not break the dump if cache is unresolved */
+ if (err < 0 && err != -ENOENT)
goto nla_put_failure;
return nlmsg_end(skb, nlh);
@@ -2236,6 +2384,52 @@ nla_put_failure:
return -EMSGSIZE;
}
+static int mr6_msgsize(bool unresolved, int maxvif)
+{
+ size_t len =
+ NLMSG_ALIGN(sizeof(struct rtmsg))
+ + nla_total_size(4) /* RTA_TABLE */
+ + nla_total_size(sizeof(struct in6_addr)) /* RTA_SRC */
+ + nla_total_size(sizeof(struct in6_addr)) /* RTA_DST */
+ ;
+
+ if (!unresolved)
+ len = len
+ + nla_total_size(4) /* RTA_IIF */
+ + nla_total_size(0) /* RTA_MULTIPATH */
+ + maxvif * NLA_ALIGN(sizeof(struct rtnexthop))
+ /* RTA_MFC_STATS */
+ + nla_total_size(sizeof(struct rta_mfc_stats))
+ ;
+
+ return len;
+}
+
+static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
+ int cmd)
+{
+ struct net *net = read_pnet(&mrt->net);
+ struct sk_buff *skb;
+ int err = -ENOBUFS;
+
+ skb = nlmsg_new(mr6_msgsize(mfc->mf6c_parent >= MAXMIFS, mrt->maxvif),
+ GFP_ATOMIC);
+ if (skb == NULL)
+ goto errout;
+
+ err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd);
+ if (err < 0)
+ goto errout;
+
+ rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE, NULL, GFP_ATOMIC);
+ return;
+
+errout:
+ kfree_skb(skb);
+ if (err < 0)
+ rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err);
+}
+
static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
{
struct net *net = sock_net(skb->sk);
@@ -2262,13 +2456,29 @@ static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
if (ip6mr_fill_mroute(mrt, skb,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
- mfc) < 0)
+ mfc, RTM_NEWROUTE) < 0)
goto done;
next_entry:
e++;
}
e = s_e = 0;
}
+ spin_lock_bh(&mfc_unres_lock);
+ list_for_each_entry(mfc, &mrt->mfc6_unres_queue, list) {
+ if (e < s_e)
+ goto next_entry2;
+ if (ip6mr_fill_mroute(mrt, skb,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ mfc, RTM_NEWROUTE) < 0) {
+ spin_unlock_bh(&mfc_unres_lock);
+ goto done;
+ }
+next_entry2:
+ e++;
+ }
+ spin_unlock_bh(&mfc_unres_lock);
+ e = s_e = 0;
s_h = 0;
next_table:
t++;
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index ba6d13d1f1e1..d1e2e8ef29c5 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -343,7 +343,8 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
break;
case IPV6_TRANSPARENT:
- if (valbool && !capable(CAP_NET_ADMIN) && !capable(CAP_NET_RAW)) {
+ if (valbool && !ns_capable(net->user_ns, CAP_NET_ADMIN) &&
+ !ns_capable(net->user_ns, CAP_NET_RAW)) {
retv = -EPERM;
break;
}
@@ -381,7 +382,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
/* hop-by-hop / destination options are privileged option */
retv = -EPERM;
- if (optname != IPV6_RTHDR && !capable(CAP_NET_RAW))
+ if (optname != IPV6_RTHDR && !ns_capable(net->user_ns, CAP_NET_RAW))
break;
opt = ipv6_renew_options(sk, np->opt, optname,
@@ -397,7 +398,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
if (optname == IPV6_RTHDR && opt && opt->srcrt) {
struct ipv6_rt_hdr *rthdr = opt->srcrt;
switch (rthdr->type) {
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
case IPV6_SRCRT_TYPE_2:
if (rthdr->hdrlen != 2 ||
rthdr->segments_left != 1)
@@ -475,8 +476,8 @@ sticky_done:
msg.msg_controllen = optlen;
msg.msg_control = (void*)(opt+1);
- retv = datagram_send_ctl(net, sk, &msg, &fl6, opt, &junk, &junk,
- &junk);
+ retv = ip6_datagram_send_ctl(net, sk, &msg, &fl6, opt, &junk,
+ &junk, &junk);
if (retv)
goto done;
update:
@@ -754,7 +755,7 @@ done:
case IPV6_IPSEC_POLICY:
case IPV6_XFRM_POLICY:
retv = -EPERM;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
break;
retv = xfrm_user_policy(sk, optname, optval, optlen);
break;
@@ -827,6 +828,7 @@ pref_skip_coa:
if (val < 0 || val > 255)
goto e_inval;
np->min_hopcount = val;
+ retv = 0;
break;
case IPV6_DONTFRAG:
np->dontfrag = valbool;
@@ -1000,7 +1002,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
release_sock(sk);
if (skb) {
- int err = datagram_recv_ctl(sk, &msg, skb);
+ int err = ip6_datagram_recv_ctl(sk, &msg, skb);
kfree_skb(skb);
if (err)
return err;
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 92f8e48e4ba4..bfa6cc36ef2a 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -163,7 +163,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
rt = rt6_lookup(net, addr, NULL, 0, 0);
if (rt) {
dev = rt->dst.dev;
- dst_release(&rt->dst);
+ ip6_rt_put(rt);
}
} else
dev = dev_get_by_index_rcu(net, ifindex);
@@ -260,7 +260,7 @@ static struct inet6_dev *ip6_mc_find_dev_rcu(struct net *net,
if (rt) {
dev = rt->dst.dev;
- dst_release(&rt->dst);
+ ip6_rt_put(rt);
}
} else
dev = dev_get_by_index_rcu(net, ifindex);
@@ -284,6 +284,9 @@ void ipv6_sock_mc_close(struct sock *sk)
struct ipv6_mc_socklist *mc_lst;
struct net *net = sock_net(sk);
+ if (!rcu_access_pointer(np->ipv6_mc_list))
+ return;
+
spin_lock(&ipv6_sk_mc_lock);
while ((mc_lst = rcu_dereference_protected(np->ipv6_mc_list,
lockdep_is_held(&ipv6_sk_mc_lock))) != NULL) {
@@ -373,8 +376,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
goto done; /* err = -EADDRNOTAVAIL */
rv = !0;
for (i=0; i<psl->sl_count; i++) {
- rv = memcmp(&psl->sl_addr[i], source,
- sizeof(struct in6_addr));
+ rv = !ipv6_addr_equal(&psl->sl_addr[i], source);
if (rv == 0)
break;
}
@@ -424,12 +426,10 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
}
rv = 1; /* > 0 for insert logic below if sl_count is 0 */
for (i=0; i<psl->sl_count; i++) {
- rv = memcmp(&psl->sl_addr[i], source, sizeof(struct in6_addr));
- if (rv == 0)
- break;
+ rv = !ipv6_addr_equal(&psl->sl_addr[i], source);
+ if (rv == 0) /* There is an error in the address. */
+ goto done;
}
- if (rv == 0) /* address already there is an error */
- goto done;
for (j=psl->sl_count-1; j>=i; j--)
psl->sl_addr[j+1] = psl->sl_addr[j];
psl->sl_addr[i] = *source;
@@ -661,6 +661,10 @@ static void igmp6_group_added(struct ifmcaddr6 *mc)
struct net_device *dev = mc->idev->dev;
char buf[MAX_ADDR_LEN];
+ if (IPV6_ADDR_MC_SCOPE(&mc->mca_addr) <
+ IPV6_ADDR_SCOPE_LINKLOCAL)
+ return;
+
spin_lock_bh(&mc->mca_lock);
if (!(mc->mca_flags&MAF_LOADED)) {
mc->mca_flags |= MAF_LOADED;
@@ -687,6 +691,10 @@ static void igmp6_group_dropped(struct ifmcaddr6 *mc)
struct net_device *dev = mc->idev->dev;
char buf[MAX_ADDR_LEN];
+ if (IPV6_ADDR_MC_SCOPE(&mc->mca_addr) <
+ IPV6_ADDR_SCOPE_LINKLOCAL)
+ return;
+
spin_lock_bh(&mc->mca_lock);
if (mc->mca_flags&MAF_LOADED) {
mc->mca_flags &= ~MAF_LOADED;
@@ -932,33 +940,6 @@ int ipv6_dev_mc_dec(struct net_device *dev, const struct in6_addr *addr)
}
/*
- * identify MLD packets for MLD filter exceptions
- */
-bool ipv6_is_mld(struct sk_buff *skb, int nexthdr)
-{
- struct icmp6hdr *pic;
-
- if (nexthdr != IPPROTO_ICMPV6)
- return false;
-
- if (!pskb_may_pull(skb, sizeof(struct icmp6hdr)))
- return false;
-
- pic = icmp6_hdr(skb);
-
- switch (pic->icmp6_type) {
- case ICMPV6_MGM_QUERY:
- case ICMPV6_MGM_REPORT:
- case ICMPV6_MGM_REDUCTION:
- case ICMPV6_MLD2_REPORT:
- return true;
- default:
- break;
- }
- return false;
-}
-
-/*
* check if the interface/address pair is valid
*/
bool ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group,
@@ -1337,6 +1318,31 @@ mld_scount(struct ifmcaddr6 *pmc, int type, int gdeleted, int sdeleted)
return scount;
}
+static void ip6_mc_hdr(struct sock *sk, struct sk_buff *skb,
+ struct net_device *dev,
+ const struct in6_addr *saddr,
+ const struct in6_addr *daddr,
+ int proto, int len)
+{
+ struct ipv6hdr *hdr;
+
+ skb->protocol = htons(ETH_P_IPV6);
+ skb->dev = dev;
+
+ skb_reset_network_header(skb);
+ skb_put(skb, sizeof(struct ipv6hdr));
+ hdr = ipv6_hdr(skb);
+
+ ip6_flow_hdr(hdr, 0, 0);
+
+ hdr->payload_len = htons(len);
+ hdr->nexthdr = proto;
+ hdr->hop_limit = inet6_sk(sk)->hop_limit;
+
+ hdr->saddr = *saddr;
+ hdr->daddr = *daddr;
+}
+
static struct sk_buff *mld_newpack(struct net_device *dev, int size)
{
struct net *net = dev_net(dev);
@@ -1372,7 +1378,7 @@ static struct sk_buff *mld_newpack(struct net_device *dev, int size)
} else
saddr = &addr_buf;
- ip6_nd_hdr(sk, skb, dev, saddr, &mld2_all_mcr, NEXTHDR_HOP, 0);
+ ip6_mc_hdr(sk, skb, dev, saddr, &mld2_all_mcr, NEXTHDR_HOP, 0);
memcpy(skb_put(skb, sizeof(ra)), ra, sizeof(ra));
@@ -1415,7 +1421,7 @@ static void mld_sendpack(struct sk_buff *skb)
icmpv6_flow_init(net->ipv6.igmp_sk, &fl6, ICMPV6_MLD2_REPORT,
&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
skb->dev->ifindex);
- dst = icmp6_dst_alloc(skb->dev, NULL, &fl6);
+ dst = icmp6_dst_alloc(skb->dev, &fl6);
err = 0;
if (IS_ERR(dst)) {
@@ -1764,7 +1770,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
} else
saddr = &addr_buf;
- ip6_nd_hdr(sk, skb, dev, saddr, snd_addr, NEXTHDR_HOP, payload_len);
+ ip6_mc_hdr(sk, skb, dev, saddr, snd_addr, NEXTHDR_HOP, payload_len);
memcpy(skb_put(skb, sizeof(ra)), ra, sizeof(ra));
@@ -1783,7 +1789,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
icmpv6_flow_init(sk, &fl6, type,
&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
skb->dev->ifindex);
- dst = icmp6_dst_alloc(skb->dev, NULL, &fl6);
+ dst = icmp6_dst_alloc(skb->dev, &fl6);
if (IS_ERR(dst)) {
err = PTR_ERR(dst);
goto err_out;
@@ -2593,10 +2599,10 @@ static int __net_init igmp6_proc_init(struct net *net)
int err;
err = -ENOMEM;
- if (!proc_net_fops_create(net, "igmp6", S_IRUGO, &igmp6_mc_seq_fops))
+ if (!proc_create("igmp6", S_IRUGO, net->proc_net, &igmp6_mc_seq_fops))
goto out;
- if (!proc_net_fops_create(net, "mcfilter6", S_IRUGO,
- &igmp6_mcf_seq_fops))
+ if (!proc_create("mcfilter6", S_IRUGO, net->proc_net,
+ &igmp6_mcf_seq_fops))
goto out_proc_net_igmp6;
err = 0;
@@ -2604,14 +2610,14 @@ out:
return err;
out_proc_net_igmp6:
- proc_net_remove(net, "igmp6");
+ remove_proc_entry("igmp6", net->proc_net);
goto out;
}
static void __net_exit igmp6_proc_exit(struct net *net)
{
- proc_net_remove(net, "mcfilter6");
- proc_net_remove(net, "igmp6");
+ remove_proc_entry("mcfilter6", net->proc_net);
+ remove_proc_entry("igmp6", net->proc_net);
}
#else
static inline int igmp6_proc_init(struct net *net)
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index ff36194a71aa..76ef4353d518 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -143,16 +143,12 @@ struct neigh_table nd_tbl = {
.gc_thresh3 = 1024,
};
-static inline int ndisc_opt_addr_space(struct net_device *dev)
+static void ndisc_fill_addr_option(struct sk_buff *skb, int type, void *data)
{
- return NDISC_OPT_SPACE(dev->addr_len + ndisc_addr_option_pad(dev->type));
-}
-
-static u8 *ndisc_fill_addr_option(u8 *opt, int type, void *data, int data_len,
- unsigned short addr_type)
-{
- int space = NDISC_OPT_SPACE(data_len);
- int pad = ndisc_addr_option_pad(addr_type);
+ int pad = ndisc_addr_option_pad(skb->dev->type);
+ int data_len = skb->dev->addr_len;
+ int space = ndisc_opt_addr_space(skb->dev);
+ u8 *opt = skb_put(skb, space);
opt[0] = type;
opt[1] = space>>3;
@@ -166,7 +162,6 @@ static u8 *ndisc_fill_addr_option(u8 *opt, int type, void *data, int data_len,
opt += data_len;
if ((space -= data_len) > 0)
memset(opt, 0, space);
- return opt + space;
}
static struct nd_opt_hdr *ndisc_next_option(struct nd_opt_hdr *cur,
@@ -370,94 +365,88 @@ static void pndisc_destructor(struct pneigh_entry *n)
ipv6_dev_mc_dec(dev, &maddr);
}
-struct sk_buff *ndisc_build_skb(struct net_device *dev,
- const struct in6_addr *daddr,
- const struct in6_addr *saddr,
- struct icmp6hdr *icmp6h,
- const struct in6_addr *target,
- int llinfo)
+static struct sk_buff *ndisc_alloc_skb(struct net_device *dev,
+ int len)
{
- struct net *net = dev_net(dev);
- struct sock *sk = net->ipv6.ndisc_sk;
- struct sk_buff *skb;
- struct icmp6hdr *hdr;
int hlen = LL_RESERVED_SPACE(dev);
int tlen = dev->needed_tailroom;
- int len;
+ struct sock *sk = dev_net(dev)->ipv6.ndisc_sk;
+ struct sk_buff *skb;
int err;
- u8 *opt;
-
- if (!dev->addr_len)
- llinfo = 0;
-
- len = sizeof(struct icmp6hdr) + (target ? sizeof(*target) : 0);
- if (llinfo)
- len += ndisc_opt_addr_space(dev);
skb = sock_alloc_send_skb(sk,
- (MAX_HEADER + sizeof(struct ipv6hdr) +
- len + hlen + tlen),
+ hlen + sizeof(struct ipv6hdr) + len + tlen,
1, &err);
if (!skb) {
- ND_PRINTK(0, err, "ND: %s failed to allocate an skb, err=%d\n",
+ ND_PRINTK(0, err, "ndisc: %s failed to allocate an skb, err=%d\n",
__func__, err);
return NULL;
}
- skb_reserve(skb, hlen);
- ip6_nd_hdr(sk, skb, dev, saddr, daddr, IPPROTO_ICMPV6, len);
+ skb->protocol = htons(ETH_P_IPV6);
+ skb->dev = dev;
- skb->transport_header = skb->tail;
- skb_put(skb, len);
+ skb_reserve(skb, hlen + sizeof(struct ipv6hdr));
+ skb_reset_transport_header(skb);
- hdr = (struct icmp6hdr *)skb_transport_header(skb);
- memcpy(hdr, icmp6h, sizeof(*hdr));
+ return skb;
+}
- opt = skb_transport_header(skb) + sizeof(struct icmp6hdr);
- if (target) {
- *(struct in6_addr *)opt = *target;
- opt += sizeof(*target);
- }
+static void ip6_nd_hdr(struct sk_buff *skb,
+ const struct in6_addr *saddr,
+ const struct in6_addr *daddr,
+ int hop_limit, int len)
+{
+ struct ipv6hdr *hdr;
- if (llinfo)
- ndisc_fill_addr_option(opt, llinfo, dev->dev_addr,
- dev->addr_len, dev->type);
+ skb_push(skb, sizeof(*hdr));
+ skb_reset_network_header(skb);
+ hdr = ipv6_hdr(skb);
- hdr->icmp6_cksum = csum_ipv6_magic(saddr, daddr, len,
- IPPROTO_ICMPV6,
- csum_partial(hdr,
- len, 0));
+ ip6_flow_hdr(hdr, 0, 0);
- return skb;
-}
+ hdr->payload_len = htons(len);
+ hdr->nexthdr = IPPROTO_ICMPV6;
+ hdr->hop_limit = hop_limit;
-EXPORT_SYMBOL(ndisc_build_skb);
+ hdr->saddr = *saddr;
+ hdr->daddr = *daddr;
+}
-void ndisc_send_skb(struct sk_buff *skb,
- struct net_device *dev,
- struct neighbour *neigh,
- const struct in6_addr *daddr,
- const struct in6_addr *saddr,
- struct icmp6hdr *icmp6h)
+static void ndisc_send_skb(struct sk_buff *skb,
+ const struct in6_addr *daddr,
+ const struct in6_addr *saddr)
{
- struct flowi6 fl6;
- struct dst_entry *dst;
- struct net *net = dev_net(dev);
+ struct dst_entry *dst = skb_dst(skb);
+ struct net *net = dev_net(skb->dev);
struct sock *sk = net->ipv6.ndisc_sk;
struct inet6_dev *idev;
int err;
+ struct icmp6hdr *icmp6h = icmp6_hdr(skb);
u8 type;
type = icmp6h->icmp6_type;
- icmpv6_flow_init(sk, &fl6, type, saddr, daddr, dev->ifindex);
- dst = icmp6_dst_alloc(dev, neigh, &fl6);
- if (IS_ERR(dst)) {
- kfree_skb(skb);
- return;
+ if (!dst) {
+ struct sock *sk = net->ipv6.ndisc_sk;
+ struct flowi6 fl6;
+
+ icmpv6_flow_init(sk, &fl6, type, saddr, daddr, skb->dev->ifindex);
+ dst = icmp6_dst_alloc(skb->dev, &fl6);
+ if (IS_ERR(dst)) {
+ kfree_skb(skb);
+ return;
+ }
+
+ skb_dst_set(skb, dst);
}
- skb_dst_set(skb, dst);
+ icmp6h->icmp6_cksum = csum_ipv6_magic(saddr, daddr, skb->len,
+ IPPROTO_ICMPV6,
+ csum_partial(icmp6h,
+ skb->len, 0));
+
+ ip6_nd_hdr(skb, saddr, daddr, inet6_sk(sk)->hop_limit, skb->len);
rcu_read_lock();
idev = __in6_dev_get(dst->dev);
@@ -473,38 +462,17 @@ void ndisc_send_skb(struct sk_buff *skb,
rcu_read_unlock();
}
-EXPORT_SYMBOL(ndisc_send_skb);
-
-/*
- * Send a Neighbour Discover packet
- */
-static void __ndisc_send(struct net_device *dev,
- struct neighbour *neigh,
- const struct in6_addr *daddr,
- const struct in6_addr *saddr,
- struct icmp6hdr *icmp6h, const struct in6_addr *target,
- int llinfo)
-{
- struct sk_buff *skb;
-
- skb = ndisc_build_skb(dev, daddr, saddr, icmp6h, target, llinfo);
- if (!skb)
- return;
-
- ndisc_send_skb(skb, dev, neigh, daddr, saddr, icmp6h);
-}
-
static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
const struct in6_addr *daddr,
const struct in6_addr *solicited_addr,
- int router, int solicited, int override, int inc_opt)
+ bool router, bool solicited, bool override, bool inc_opt)
{
+ struct sk_buff *skb;
struct in6_addr tmpaddr;
struct inet6_ifaddr *ifp;
const struct in6_addr *src_addr;
- struct icmp6hdr icmp6h = {
- .icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT,
- };
+ struct nd_msg *msg;
+ int optlen = 0;
/* for anycast or proxy, solicited_addr != src_addr */
ifp = ipv6_get_ifaddr(dev_net(dev), solicited_addr, dev, 1);
@@ -522,20 +490,38 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
src_addr = &tmpaddr;
}
- icmp6h.icmp6_router = router;
- icmp6h.icmp6_solicited = solicited;
- icmp6h.icmp6_override = override;
+ if (!dev->addr_len)
+ inc_opt = 0;
+ if (inc_opt)
+ optlen += ndisc_opt_addr_space(dev);
- __ndisc_send(dev, neigh, daddr, src_addr,
- &icmp6h, solicited_addr,
- inc_opt ? ND_OPT_TARGET_LL_ADDR : 0);
+ skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen);
+ if (!skb)
+ return;
+
+ msg = (struct nd_msg *)skb_put(skb, sizeof(*msg));
+ *msg = (struct nd_msg) {
+ .icmph = {
+ .icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT,
+ .icmp6_router = router,
+ .icmp6_solicited = solicited,
+ .icmp6_override = override,
+ },
+ .target = *solicited_addr,
+ };
+
+ if (inc_opt)
+ ndisc_fill_addr_option(skb, ND_OPT_TARGET_LL_ADDR,
+ dev->dev_addr);
+
+
+ ndisc_send_skb(skb, daddr, src_addr);
}
static void ndisc_send_unsol_na(struct net_device *dev)
{
struct inet6_dev *idev;
struct inet6_ifaddr *ifa;
- struct in6_addr mcaddr;
idev = in6_dev_get(dev);
if (!idev)
@@ -543,8 +529,7 @@ static void ndisc_send_unsol_na(struct net_device *dev)
read_lock_bh(&idev->lock);
list_for_each_entry(ifa, &idev->addr_list, if_list) {
- addrconf_addr_solict_mult(&ifa->addr, &mcaddr);
- ndisc_send_na(dev, NULL, &mcaddr, &ifa->addr,
+ ndisc_send_na(dev, NULL, &in6addr_linklocal_allnodes, &ifa->addr,
/*router=*/ !!idev->cnf.forwarding,
/*solicited=*/ false, /*override=*/ true,
/*inc_opt=*/ true);
@@ -558,10 +543,11 @@ void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh,
const struct in6_addr *solicit,
const struct in6_addr *daddr, const struct in6_addr *saddr)
{
+ struct sk_buff *skb;
struct in6_addr addr_buf;
- struct icmp6hdr icmp6h = {
- .icmp6_type = NDISC_NEIGHBOUR_SOLICITATION,
- };
+ int inc_opt = dev->addr_len;
+ int optlen = 0;
+ struct nd_msg *msg;
if (saddr == NULL) {
if (ipv6_get_lladdr(dev, &addr_buf,
@@ -570,18 +556,37 @@ void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh,
saddr = &addr_buf;
}
- __ndisc_send(dev, neigh, daddr, saddr,
- &icmp6h, solicit,
- !ipv6_addr_any(saddr) ? ND_OPT_SOURCE_LL_ADDR : 0);
+ if (ipv6_addr_any(saddr))
+ inc_opt = 0;
+ if (inc_opt)
+ optlen += ndisc_opt_addr_space(dev);
+
+ skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen);
+ if (!skb)
+ return;
+
+ msg = (struct nd_msg *)skb_put(skb, sizeof(*msg));
+ *msg = (struct nd_msg) {
+ .icmph = {
+ .icmp6_type = NDISC_NEIGHBOUR_SOLICITATION,
+ },
+ .target = *solicit,
+ };
+
+ if (inc_opt)
+ ndisc_fill_addr_option(skb, ND_OPT_SOURCE_LL_ADDR,
+ dev->dev_addr);
+
+ ndisc_send_skb(skb, daddr, saddr);
}
void ndisc_send_rs(struct net_device *dev, const struct in6_addr *saddr,
const struct in6_addr *daddr)
{
- struct icmp6hdr icmp6h = {
- .icmp6_type = NDISC_ROUTER_SOLICITATION,
- };
+ struct sk_buff *skb;
+ struct rs_msg *msg;
int send_sllao = dev->addr_len;
+ int optlen = 0;
#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
/*
@@ -605,9 +610,27 @@ void ndisc_send_rs(struct net_device *dev, const struct in6_addr *saddr,
}
}
#endif
- __ndisc_send(dev, NULL, daddr, saddr,
- &icmp6h, NULL,
- send_sllao ? ND_OPT_SOURCE_LL_ADDR : 0);
+ if (!dev->addr_len)
+ send_sllao = 0;
+ if (send_sllao)
+ optlen += ndisc_opt_addr_space(dev);
+
+ skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen);
+ if (!skb)
+ return;
+
+ msg = (struct rs_msg *)skb_put(skb, sizeof(*msg));
+ *msg = (struct rs_msg) {
+ .icmph = {
+ .icmp6_type = NDISC_ROUTER_SOLICITATION,
+ },
+ };
+
+ if (send_sllao)
+ ndisc_fill_addr_option(skb, ND_OPT_SOURCE_LL_ADDR,
+ dev->dev_addr);
+
+ ndisc_send_skb(skb, daddr, saddr);
}
@@ -683,6 +706,11 @@ static void ndisc_recv_ns(struct sk_buff *skb)
bool inc;
int is_router = -1;
+ if (skb->len < sizeof(struct nd_msg)) {
+ ND_PRINTK(2, warn, "NS: packet too short\n");
+ return;
+ }
+
if (ipv6_addr_is_multicast(&msg->target)) {
ND_PRINTK(2, warn, "NS: multicast target address\n");
return;
@@ -692,11 +720,7 @@ static void ndisc_recv_ns(struct sk_buff *skb)
* RFC2461 7.1.1:
* DAD has to be destined for solicited node multicast address.
*/
- if (dad &&
- !(daddr->s6_addr32[0] == htonl(0xff020000) &&
- daddr->s6_addr32[1] == htonl(0x00000000) &&
- daddr->s6_addr32[2] == htonl(0x00000001) &&
- daddr->s6_addr [12] == 0xff )) {
+ if (dad && !ipv6_addr_is_solict_mult(daddr)) {
ND_PRINTK(2, warn, "NS: bad DAD packet (wrong destination)\n");
return;
}
@@ -787,11 +811,11 @@ static void ndisc_recv_ns(struct sk_buff *skb)
}
if (is_router < 0)
- is_router = !!idev->cnf.forwarding;
+ is_router = idev->cnf.forwarding;
if (dad) {
ndisc_send_na(dev, NULL, &in6addr_linklocal_allnodes, &msg->target,
- is_router, 0, (ifp != NULL), 1);
+ !!is_router, false, (ifp != NULL), true);
goto out;
}
@@ -812,8 +836,8 @@ static void ndisc_recv_ns(struct sk_buff *skb)
NEIGH_UPDATE_F_OVERRIDE);
if (neigh || !dev->header_ops) {
ndisc_send_na(dev, neigh, saddr, &msg->target,
- is_router,
- 1, (ifp != NULL && inc), inc);
+ !!is_router,
+ true, (ifp != NULL && inc), inc);
if (neigh)
neigh_release(neigh);
}
@@ -906,7 +930,7 @@ static void ndisc_recv_na(struct sk_buff *skb)
if (lladdr && !memcmp(lladdr, dev->dev_addr, dev->addr_len) &&
net->ipv6.devconf_all->forwarding && net->ipv6.devconf_all->proxy_ndp &&
pneigh_lookup(&nd_tbl, net, &msg->target, dev, 0)) {
- /* XXX: idev->cnf.prixy_ndp */
+ /* XXX: idev->cnf.proxy_ndp */
goto out;
}
@@ -1034,18 +1058,6 @@ errout:
rtnl_set_sk_err(net, RTNLGRP_ND_USEROPT, err);
}
-static inline int accept_ra(struct inet6_dev *in6_dev)
-{
- /*
- * If forwarding is enabled, RA are not accepted unless the special
- * hybrid mode (accept_ra=2) is enabled.
- */
- if (in6_dev->cnf.forwarding && in6_dev->cnf.accept_ra < 2)
- return 0;
-
- return in6_dev->cnf.accept_ra;
-}
-
static void ndisc_router_discovery(struct sk_buff *skb)
{
struct ra_msg *ra_msg = (struct ra_msg *)skb_transport_header(skb);
@@ -1093,7 +1105,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
return;
}
- if (!accept_ra(in6_dev))
+ if (!ipv6_accept_ra(in6_dev))
goto skip_linkparms;
#ifdef CONFIG_IPV6_NDISC_NODETYPE
@@ -1145,7 +1157,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
ND_PRINTK(0, err,
"RA: %s got default router without neighbour\n",
__func__);
- dst_release(&rt->dst);
+ ip6_rt_put(rt);
return;
}
}
@@ -1170,7 +1182,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
ND_PRINTK(0, err,
"RA: %s got default router without neighbour\n",
__func__);
- dst_release(&rt->dst);
+ ip6_rt_put(rt);
return;
}
neigh->flags |= NTF_ROUTER;
@@ -1249,7 +1261,7 @@ skip_linkparms:
NEIGH_UPDATE_F_ISROUTER);
}
- if (!accept_ra(in6_dev))
+ if (!ipv6_accept_ra(in6_dev))
goto out;
#ifdef CONFIG_IPV6_ROUTE_INFO
@@ -1326,14 +1338,19 @@ skip_routeinfo:
ND_PRINTK(2, warn, "RA: invalid RA options\n");
}
out:
- if (rt)
- dst_release(&rt->dst);
+ ip6_rt_put(rt);
if (neigh)
neigh_release(neigh);
}
static void ndisc_redirect_rcv(struct sk_buff *skb)
{
+ u8 *hdr;
+ struct ndisc_options ndopts;
+ struct rd_msg *msg = (struct rd_msg *)skb_transport_header(skb);
+ u32 ndoptlen = skb->tail - (skb->transport_header +
+ offsetof(struct rd_msg, opt));
+
#ifdef CONFIG_IPV6_NDISC_NODETYPE
switch (skb->ndisc_nodetype) {
case NDISC_NODETYPE_HOST:
@@ -1350,28 +1367,48 @@ static void ndisc_redirect_rcv(struct sk_buff *skb)
return;
}
+ if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts))
+ return;
+
+ if (!ndopts.nd_opts_rh)
+ return;
+
+ hdr = (u8 *)ndopts.nd_opts_rh;
+ hdr += 8;
+ if (!pskb_pull(skb, hdr - skb_transport_header(skb)))
+ return;
+
icmpv6_notify(skb, NDISC_REDIRECT, 0, 0);
}
+static void ndisc_fill_redirect_hdr_option(struct sk_buff *skb,
+ struct sk_buff *orig_skb,
+ int rd_len)
+{
+ u8 *opt = skb_put(skb, rd_len);
+
+ memset(opt, 0, 8);
+ *(opt++) = ND_OPT_REDIRECT_HDR;
+ *(opt++) = (rd_len >> 3);
+ opt += 6;
+
+ memcpy(opt, ipv6_hdr(orig_skb), rd_len - 8);
+}
+
void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
{
struct net_device *dev = skb->dev;
struct net *net = dev_net(dev);
struct sock *sk = net->ipv6.ndisc_sk;
- int len = sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
+ int optlen = 0;
struct inet_peer *peer;
struct sk_buff *buff;
- struct icmp6hdr *icmph;
+ struct rd_msg *msg;
struct in6_addr saddr_buf;
- struct in6_addr *addrp;
struct rt6_info *rt;
struct dst_entry *dst;
- struct inet6_dev *idev;
struct flowi6 fl6;
- u8 *opt;
- int hlen, tlen;
int rd_len;
- int err;
u8 ha_buf[MAX_ADDR_LEN], *ha = NULL;
bool ret;
@@ -1427,7 +1464,7 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
memcpy(ha_buf, neigh->ha, dev->addr_len);
read_unlock_bh(&neigh->lock);
ha = ha_buf;
- len += ndisc_opt_addr_space(dev);
+ optlen += ndisc_opt_addr_space(dev);
} else
read_unlock_bh(&neigh->lock);
@@ -1435,80 +1472,40 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
}
rd_len = min_t(unsigned int,
- IPV6_MIN_MTU-sizeof(struct ipv6hdr)-len, skb->len + 8);
+ IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(*msg) - optlen,
+ skb->len + 8);
rd_len &= ~0x7;
- len += rd_len;
-
- hlen = LL_RESERVED_SPACE(dev);
- tlen = dev->needed_tailroom;
- buff = sock_alloc_send_skb(sk,
- (MAX_HEADER + sizeof(struct ipv6hdr) +
- len + hlen + tlen),
- 1, &err);
- if (buff == NULL) {
- ND_PRINTK(0, err,
- "Redirect: %s failed to allocate an skb, err=%d\n",
- __func__, err);
- goto release;
- }
-
- skb_reserve(buff, hlen);
- ip6_nd_hdr(sk, buff, dev, &saddr_buf, &ipv6_hdr(skb)->saddr,
- IPPROTO_ICMPV6, len);
-
- skb_set_transport_header(buff, skb_tail_pointer(buff) - buff->data);
- skb_put(buff, len);
- icmph = icmp6_hdr(buff);
-
- memset(icmph, 0, sizeof(struct icmp6hdr));
- icmph->icmp6_type = NDISC_REDIRECT;
+ optlen += rd_len;
- /*
- * copy target and destination addresses
- */
-
- addrp = (struct in6_addr *)(icmph + 1);
- *addrp = *target;
- addrp++;
- *addrp = ipv6_hdr(skb)->daddr;
+ buff = ndisc_alloc_skb(dev, sizeof(*msg) + optlen);
+ if (!buff)
+ goto release;
- opt = (u8*) (addrp + 1);
+ msg = (struct rd_msg *)skb_put(buff, sizeof(*msg));
+ *msg = (struct rd_msg) {
+ .icmph = {
+ .icmp6_type = NDISC_REDIRECT,
+ },
+ .target = *target,
+ .dest = ipv6_hdr(skb)->daddr,
+ };
/*
* include target_address option
*/
if (ha)
- opt = ndisc_fill_addr_option(opt, ND_OPT_TARGET_LL_ADDR, ha,
- dev->addr_len, dev->type);
+ ndisc_fill_addr_option(skb, ND_OPT_TARGET_LL_ADDR, ha);
/*
* build redirect option and copy skb over to the new packet.
*/
- memset(opt, 0, 8);
- *(opt++) = ND_OPT_REDIRECT_HDR;
- *(opt++) = (rd_len >> 3);
- opt += 6;
-
- memcpy(opt, ipv6_hdr(skb), rd_len - 8);
-
- icmph->icmp6_cksum = csum_ipv6_magic(&saddr_buf, &ipv6_hdr(skb)->saddr,
- len, IPPROTO_ICMPV6,
- csum_partial(icmph, len, 0));
+ if (rd_len)
+ ndisc_fill_redirect_hdr_option(buff, skb, rd_len);
skb_dst_set(buff, dst);
- rcu_read_lock();
- idev = __in6_dev_get(dst->dev);
- IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
- err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, buff, NULL, dst->dev,
- dst_output);
- if (!err) {
- ICMP6MSGOUT_INC_STATS(net, idev, NDISC_REDIRECT);
- ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
- }
-
- rcu_read_unlock();
+ ndisc_send_skb(buff, &ipv6_hdr(skb)->saddr, &saddr_buf);
return;
release:
@@ -1525,7 +1522,7 @@ int ndisc_rcv(struct sk_buff *skb)
{
struct nd_msg *msg;
- if (!pskb_may_pull(skb, skb->len))
+ if (skb_linearize(skb))
return 0;
msg = (struct nd_msg *)skb_transport_header(skb);
@@ -1575,11 +1572,18 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event,
{
struct net_device *dev = ptr;
struct net *net = dev_net(dev);
+ struct inet6_dev *idev;
switch (event) {
case NETDEV_CHANGEADDR:
neigh_changeaddr(&nd_tbl, dev);
fib6_run_gc(~0UL, net);
+ idev = in6_dev_get(dev);
+ if (!idev)
+ break;
+ if (idev->cnf.ndisc_notify)
+ ndisc_send_unsol_na(dev);
+ in6_dev_put(idev);
break;
case NETDEV_DOWN:
neigh_ifdown(&nd_tbl, dev);
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index d7cb04506c3d..341b54ade72c 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -207,8 +207,7 @@ ip6t_get_target_c(const struct ip6t_entry *e)
return ip6t_get_target((struct ip6t_entry *)e);
}
-#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
- defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
+#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
/* This cries for unification! */
static const char *const hooknames[] = {
[NF_INET_PRE_ROUTING] = "PREROUTING",
@@ -381,8 +380,7 @@ ip6t_do_table(struct sk_buff *skb,
t = ip6t_get_target_c(e);
IP_NF_ASSERT(t->u.kernel.target);
-#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
- defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
+#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
/* The packet is traced: log it */
if (unlikely(skb->nf_trace))
trace_packet(skb, hook, in, out,
@@ -1100,7 +1098,7 @@ static int get_info(struct net *net, void __user *user,
#endif
t = try_then_request_module(xt_find_table_lock(net, AF_INET6, name),
"ip6table_%s", name);
- if (t && !IS_ERR(t)) {
+ if (!IS_ERR_OR_NULL(t)) {
struct ip6t_getinfo info;
const struct xt_table_info *private = t->private;
#ifdef CONFIG_COMPAT
@@ -1159,7 +1157,7 @@ get_entries(struct net *net, struct ip6t_get_entries __user *uptr,
}
t = xt_find_table_lock(net, AF_INET6, get.name);
- if (t && !IS_ERR(t)) {
+ if (!IS_ERR_OR_NULL(t)) {
struct xt_table_info *private = t->private;
duprintf("t->private->number = %u\n", private->number);
if (get.size == private->size)
@@ -1199,7 +1197,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
t = try_then_request_module(xt_find_table_lock(net, AF_INET6, name),
"ip6table_%s", name);
- if (!t || IS_ERR(t)) {
+ if (IS_ERR_OR_NULL(t)) {
ret = t ? PTR_ERR(t) : -ENOENT;
goto free_newinfo_counters_untrans;
}
@@ -1357,7 +1355,7 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len,
}
t = xt_find_table_lock(net, AF_INET6, name);
- if (!t || IS_ERR(t)) {
+ if (IS_ERR_OR_NULL(t)) {
ret = t ? PTR_ERR(t) : -ENOENT;
goto free;
}
@@ -1856,7 +1854,7 @@ compat_do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user,
{
int ret;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
return -EPERM;
switch (cmd) {
@@ -1941,7 +1939,7 @@ compat_get_entries(struct net *net, struct compat_ip6t_get_entries __user *uptr,
xt_compat_lock(AF_INET6);
t = xt_find_table_lock(net, AF_INET6, get.name);
- if (t && !IS_ERR(t)) {
+ if (!IS_ERR_OR_NULL(t)) {
const struct xt_table_info *private = t->private;
struct xt_table_info info;
duprintf("t->private->number = %u\n", private->number);
@@ -1971,7 +1969,7 @@ compat_do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
{
int ret;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
return -EPERM;
switch (cmd) {
@@ -1993,7 +1991,7 @@ do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
{
int ret;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
return -EPERM;
switch (cmd) {
@@ -2018,7 +2016,7 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
{
int ret;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
return -EPERM;
switch (cmd) {
@@ -2273,112 +2271,9 @@ static void __exit ip6_tables_fini(void)
unregister_pernet_subsys(&ip6_tables_net_ops);
}
-/*
- * find the offset to specified header or the protocol number of last header
- * if target < 0. "last header" is transport protocol header, ESP, or
- * "No next header".
- *
- * Note that *offset is used as input/output parameter. an if it is not zero,
- * then it must be a valid offset to an inner IPv6 header. This can be used
- * to explore inner IPv6 header, eg. ICMPv6 error messages.
- *
- * If target header is found, its offset is set in *offset and return protocol
- * number. Otherwise, return -1.
- *
- * If the first fragment doesn't contain the final protocol header or
- * NEXTHDR_NONE it is considered invalid.
- *
- * Note that non-1st fragment is special case that "the protocol number
- * of last header" is "next header" field in Fragment header. In this case,
- * *offset is meaningless and fragment offset is stored in *fragoff if fragoff
- * isn't NULL.
- *
- * if flags is not NULL and it's a fragment, then the frag flag IP6T_FH_F_FRAG
- * will be set. If it's an AH header, the IP6T_FH_F_AUTH flag is set and
- * target < 0, then this function will stop at the AH header.
- */
-int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
- int target, unsigned short *fragoff, int *flags)
-{
- unsigned int start = skb_network_offset(skb) + sizeof(struct ipv6hdr);
- u8 nexthdr = ipv6_hdr(skb)->nexthdr;
- unsigned int len;
-
- if (fragoff)
- *fragoff = 0;
-
- if (*offset) {
- struct ipv6hdr _ip6, *ip6;
-
- ip6 = skb_header_pointer(skb, *offset, sizeof(_ip6), &_ip6);
- if (!ip6 || (ip6->version != 6)) {
- printk(KERN_ERR "IPv6 header not found\n");
- return -EBADMSG;
- }
- start = *offset + sizeof(struct ipv6hdr);
- nexthdr = ip6->nexthdr;
- }
- len = skb->len - start;
-
- while (nexthdr != target) {
- struct ipv6_opt_hdr _hdr, *hp;
- unsigned int hdrlen;
-
- if ((!ipv6_ext_hdr(nexthdr)) || nexthdr == NEXTHDR_NONE) {
- if (target < 0)
- break;
- return -ENOENT;
- }
-
- hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr);
- if (hp == NULL)
- return -EBADMSG;
- if (nexthdr == NEXTHDR_FRAGMENT) {
- unsigned short _frag_off;
- __be16 *fp;
-
- if (flags) /* Indicate that this is a fragment */
- *flags |= IP6T_FH_F_FRAG;
- fp = skb_header_pointer(skb,
- start+offsetof(struct frag_hdr,
- frag_off),
- sizeof(_frag_off),
- &_frag_off);
- if (fp == NULL)
- return -EBADMSG;
-
- _frag_off = ntohs(*fp) & ~0x7;
- if (_frag_off) {
- if (target < 0 &&
- ((!ipv6_ext_hdr(hp->nexthdr)) ||
- hp->nexthdr == NEXTHDR_NONE)) {
- if (fragoff)
- *fragoff = _frag_off;
- return hp->nexthdr;
- }
- return -ENOENT;
- }
- hdrlen = 8;
- } else if (nexthdr == NEXTHDR_AUTH) {
- if (flags && (*flags & IP6T_FH_F_AUTH) && (target < 0))
- break;
- hdrlen = (hp->hdrlen + 2) << 2;
- } else
- hdrlen = ipv6_optlen(hp);
-
- nexthdr = hp->nexthdr;
- len -= hdrlen;
- start += hdrlen;
- }
-
- *offset = start;
- return nexthdr;
-}
-
EXPORT_SYMBOL(ip6t_register_table);
EXPORT_SYMBOL(ip6t_unregister_table);
EXPORT_SYMBOL(ip6t_do_table);
-EXPORT_SYMBOL(ipv6_find_hdr);
module_init(ip6_tables_init);
module_exit(ip6_tables_fini);
diff --git a/net/ipv6/netfilter/ip6t_NPT.c b/net/ipv6/netfilter/ip6t_NPT.c
index e9486915eff6..83acc1405a18 100644
--- a/net/ipv6/netfilter/ip6t_NPT.c
+++ b/net/ipv6/netfilter/ip6t_NPT.c
@@ -9,47 +9,38 @@
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/ipv6.h>
+#include <net/ipv6.h>
#include <linux/netfilter.h>
#include <linux/netfilter_ipv6.h>
#include <linux/netfilter_ipv6/ip6t_NPT.h>
#include <linux/netfilter/x_tables.h>
-static __sum16 csum16_complement(__sum16 a)
-{
- return (__force __sum16)(0xffff - (__force u16)a);
-}
-
-static __sum16 csum16_add(__sum16 a, __sum16 b)
-{
- u16 sum;
-
- sum = (__force u16)a + (__force u16)b;
- sum += (__force u16)a < (__force u16)b;
- return (__force __sum16)sum;
-}
-
-static __sum16 csum16_sub(__sum16 a, __sum16 b)
-{
- return csum16_add(a, csum16_complement(b));
-}
-
static int ip6t_npt_checkentry(const struct xt_tgchk_param *par)
{
struct ip6t_npt_tginfo *npt = par->targinfo;
- __sum16 src_sum = 0, dst_sum = 0;
+ __wsum src_sum = 0, dst_sum = 0;
+ struct in6_addr pfx;
unsigned int i;
if (npt->src_pfx_len > 64 || npt->dst_pfx_len > 64)
return -EINVAL;
+ /* Ensure that LSB of prefix is zero */
+ ipv6_addr_prefix(&pfx, &npt->src_pfx.in6, npt->src_pfx_len);
+ if (!ipv6_addr_equal(&pfx, &npt->src_pfx.in6))
+ return -EINVAL;
+ ipv6_addr_prefix(&pfx, &npt->dst_pfx.in6, npt->dst_pfx_len);
+ if (!ipv6_addr_equal(&pfx, &npt->dst_pfx.in6))
+ return -EINVAL;
+
for (i = 0; i < ARRAY_SIZE(npt->src_pfx.in6.s6_addr16); i++) {
- src_sum = csum16_add(src_sum,
- (__force __sum16)npt->src_pfx.in6.s6_addr16[i]);
- dst_sum = csum16_add(dst_sum,
- (__force __sum16)npt->dst_pfx.in6.s6_addr16[i]);
+ src_sum = csum_add(src_sum,
+ (__force __wsum)npt->src_pfx.in6.s6_addr16[i]);
+ dst_sum = csum_add(dst_sum,
+ (__force __wsum)npt->dst_pfx.in6.s6_addr16[i]);
}
- npt->adjustment = csum16_sub(src_sum, dst_sum);
+ npt->adjustment = ~csum_fold(csum_sub(src_sum, dst_sum));
return 0;
}
@@ -70,7 +61,7 @@ static bool ip6t_npt_map_pfx(const struct ip6t_npt_tginfo *npt,
idx = i / 32;
addr->s6_addr32[idx] &= mask;
- addr->s6_addr32[idx] |= npt->dst_pfx.in6.s6_addr32[idx];
+ addr->s6_addr32[idx] |= ~mask & npt->dst_pfx.in6.s6_addr32[idx];
}
if (pfx_len <= 48)
@@ -85,8 +76,8 @@ static bool ip6t_npt_map_pfx(const struct ip6t_npt_tginfo *npt,
return false;
}
- sum = csum16_add((__force __sum16)addr->s6_addr16[idx],
- npt->adjustment);
+ sum = ~csum_fold(csum_add(csum_unfold((__force __sum16)addr->s6_addr16[idx]),
+ csum_unfold(npt->adjustment)));
if (sum == CSUM_MANGLED_0)
sum = 0;
*(__force __sum16 *)&addr->s6_addr16[idx] = sum;
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index fd4fb34c51c7..ed3b427b2841 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -126,12 +126,13 @@ static void send_reset(struct net *net, struct sk_buff *oldskb)
skb_put(nskb, sizeof(struct ipv6hdr));
skb_reset_network_header(nskb);
ip6h = ipv6_hdr(nskb);
- *(__be32 *)ip6h = htonl(0x60000000 | (tclass << 20));
+ ip6_flow_hdr(ip6h, tclass, 0);
ip6h->hop_limit = ip6_dst_hoplimit(dst);
ip6h->nexthdr = IPPROTO_TCP;
ip6h->saddr = oip6h->daddr;
ip6h->daddr = oip6h->saddr;
+ skb_reset_transport_header(nskb);
tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr));
/* Truncate to length (no data) */
tcph->doff = sizeof(struct tcphdr)/4;
diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c
index 5d1d8b04d694..5060d54199ab 100644
--- a/net/ipv6/netfilter/ip6t_rpfilter.c
+++ b/net/ipv6/netfilter/ip6t_rpfilter.c
@@ -67,7 +67,7 @@ static bool rpfilter_lookup_reverse6(const struct sk_buff *skb,
if (rt->rt6i_idev->dev == dev || (flags & XT_RPFILTER_LOOSE))
ret = true;
out:
- dst_release(&rt->dst);
+ ip6_rt_put(rt);
return ret;
}
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index 7431121b87de..6134a1ebfb1b 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -11,6 +11,7 @@
#include <linux/module.h>
#include <linux/netfilter_ipv6/ip6_tables.h>
#include <linux/slab.h>
+#include <net/ipv6.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
@@ -60,8 +61,8 @@ ip6t_mangle_out(struct sk_buff *skb, const struct net_device *out)
dev_net(out)->ipv6.ip6table_mangle);
if (ret != NF_DROP && ret != NF_STOLEN &&
- (memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr)) ||
- memcmp(&ipv6_hdr(skb)->daddr, &daddr, sizeof(daddr)) ||
+ (!ipv6_addr_equal(&ipv6_hdr(skb)->saddr, &saddr) ||
+ !ipv6_addr_equal(&ipv6_hdr(skb)->daddr, &daddr) ||
skb->mark != mark ||
ipv6_hdr(skb)->hop_limit != hop_limit ||
flowlabel != *((u_int32_t *)ipv6_hdr(skb))))
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
index d57dab17a182..e0e788d25b14 100644
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -127,19 +127,28 @@ nf_nat_ipv6_fn(unsigned int hooknum,
ret = nf_nat_rule_find(skb, hooknum, in, out, ct);
if (ret != NF_ACCEPT)
return ret;
- } else
+ } else {
pr_debug("Already setup manip %s for ct %p\n",
maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST",
ct);
+ if (nf_nat_oif_changed(hooknum, ctinfo, nat, out))
+ goto oif_changed;
+ }
break;
default:
/* ESTABLISHED */
NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
ctinfo == IP_CT_ESTABLISHED_REPLY);
+ if (nf_nat_oif_changed(hooknum, ctinfo, nat, out))
+ goto oif_changed;
}
return nf_nat_packet(ct, ctinfo, hooknum, skb);
+
+oif_changed:
+ nf_ct_kill_acct(ct, ctinfo, skb);
+ return NF_DROP;
}
static unsigned int
@@ -277,9 +286,7 @@ static int __net_init ip6table_nat_net_init(struct net *net)
return -ENOMEM;
net->ipv6.ip6table_nat = ip6t_register_table(net, &nf_nat_ipv6_table, repl);
kfree(repl);
- if (IS_ERR(net->ipv6.ip6table_nat))
- return PTR_ERR(net->ipv6.ip6table_nat);
- return 0;
+ return PTR_RET(net->ipv6.ip6table_nat);
}
static void __net_exit ip6table_nat_net_exit(struct net *net)
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 8860d23e61cf..2b6c226f5198 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -21,6 +21,7 @@
#include <linux/netfilter_bridge.h>
#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
@@ -80,8 +81,8 @@ static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
}
protoff = ipv6_skip_exthdr(skb, extoff, &nexthdr, &frag_off);
/*
- * (protoff == skb->len) mean that the packet doesn't have no data
- * except of IPv6 & ext headers. but it's tracked anyway. - YK
+ * (protoff == skb->len) means the packet has not data, just
+ * IPv6 and possibly extensions headers, but it is tracked anyway
*/
if (protoff < 0 || (frag_off & htons(~0x7)) != 0) {
pr_debug("ip6_conntrack_core: can't find proto in pkt\n");
@@ -103,7 +104,6 @@ static unsigned int ipv6_helper(unsigned int hooknum,
const struct nf_conn_help *help;
const struct nf_conntrack_helper *helper;
enum ip_conntrack_info ctinfo;
- unsigned int ret;
__be16 frag_off;
int protoff;
u8 nexthdr;
@@ -129,12 +129,7 @@ static unsigned int ipv6_helper(unsigned int hooknum,
return NF_ACCEPT;
}
- ret = helper->help(skb, protoff, ct, ctinfo);
- if (ret != NF_ACCEPT && (ret & NF_VERDICT_MASK) != NF_QUEUE) {
- nf_log_packet(NFPROTO_IPV6, hooknum, skb, in, out, NULL,
- "nf_ct_%s: dropping packet", helper->name);
- }
- return ret;
+ return helper->help(skb, protoff, ct, ctinfo);
}
static unsigned int ipv6_confirm(unsigned int hooknum,
@@ -295,7 +290,56 @@ static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = {
},
};
-#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+static int
+ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len)
+{
+ const struct inet_sock *inet = inet_sk(sk);
+ const struct ipv6_pinfo *inet6 = inet6_sk(sk);
+ const struct nf_conntrack_tuple_hash *h;
+ struct sockaddr_in6 sin6;
+ struct nf_conntrack_tuple tuple = { .src.l3num = NFPROTO_IPV6 };
+ struct nf_conn *ct;
+
+ tuple.src.u3.in6 = inet6->rcv_saddr;
+ tuple.src.u.tcp.port = inet->inet_sport;
+ tuple.dst.u3.in6 = inet6->daddr;
+ tuple.dst.u.tcp.port = inet->inet_dport;
+ tuple.dst.protonum = sk->sk_protocol;
+
+ if (sk->sk_protocol != IPPROTO_TCP && sk->sk_protocol != IPPROTO_SCTP)
+ return -ENOPROTOOPT;
+
+ if (*len < 0 || (unsigned int) *len < sizeof(sin6))
+ return -EINVAL;
+
+ h = nf_conntrack_find_get(sock_net(sk), NF_CT_DEFAULT_ZONE, &tuple);
+ if (!h) {
+ pr_debug("IP6T_SO_ORIGINAL_DST: Can't find %pI6c/%u-%pI6c/%u.\n",
+ &tuple.src.u3.ip6, ntohs(tuple.src.u.tcp.port),
+ &tuple.dst.u3.ip6, ntohs(tuple.dst.u.tcp.port));
+ return -ENOENT;
+ }
+
+ ct = nf_ct_tuplehash_to_ctrack(h);
+
+ sin6.sin6_family = AF_INET6;
+ sin6.sin6_port = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.tcp.port;
+ sin6.sin6_flowinfo = inet6->flow_label & IPV6_FLOWINFO_MASK;
+ memcpy(&sin6.sin6_addr,
+ &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in6,
+ sizeof(sin6.sin6_addr));
+
+ nf_ct_put(ct);
+
+ if (ipv6_addr_type(&sin6.sin6_addr) & IPV6_ADDR_LINKLOCAL)
+ sin6.sin6_scope_id = sk->sk_bound_dev_if;
+ else
+ sin6.sin6_scope_id = 0;
+
+ return copy_to_user(user, &sin6, sizeof(sin6)) ? -EFAULT : 0;
+}
+
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/nfnetlink_conntrack.h>
@@ -346,7 +390,7 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 __read_mostly = {
.invert_tuple = ipv6_invert_tuple,
.print_tuple = ipv6_print_tuple,
.get_l4proto = ipv6_get_l4proto,
-#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.tuple_to_nlattr = ipv6_tuple_to_nlattr,
.nlattr_tuple_size = ipv6_nlattr_tuple_size,
.nlattr_to_tuple = ipv6_nlattr_to_tuple,
@@ -359,58 +403,55 @@ MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET6));
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Yasuyuki KOZAKAI @USAGI <yasuyuki.kozakai@toshiba.co.jp>");
+static struct nf_sockopt_ops so_getorigdst6 = {
+ .pf = NFPROTO_IPV6,
+ .get_optmin = IP6T_SO_ORIGINAL_DST,
+ .get_optmax = IP6T_SO_ORIGINAL_DST + 1,
+ .get = ipv6_getorigdst,
+ .owner = THIS_MODULE,
+};
+
static int ipv6_net_init(struct net *net)
{
int ret = 0;
- ret = nf_conntrack_l4proto_register(net,
- &nf_conntrack_l4proto_tcp6);
+ ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_tcp6);
if (ret < 0) {
- printk(KERN_ERR "nf_conntrack_l4proto_tcp6: protocol register failed\n");
+ pr_err("nf_conntrack_tcp6: pernet registration failed\n");
goto out;
}
- ret = nf_conntrack_l4proto_register(net,
- &nf_conntrack_l4proto_udp6);
+ ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_udp6);
if (ret < 0) {
- printk(KERN_ERR "nf_conntrack_l4proto_udp6: protocol register failed\n");
+ pr_err("nf_conntrack_udp6: pernet registration failed\n");
goto cleanup_tcp6;
}
- ret = nf_conntrack_l4proto_register(net,
- &nf_conntrack_l4proto_icmpv6);
+ ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_icmpv6);
if (ret < 0) {
- printk(KERN_ERR "nf_conntrack_l4proto_icmp6: protocol register failed\n");
+ pr_err("nf_conntrack_icmp6: pernet registration failed\n");
goto cleanup_udp6;
}
- ret = nf_conntrack_l3proto_register(net,
- &nf_conntrack_l3proto_ipv6);
+ ret = nf_ct_l3proto_pernet_register(net, &nf_conntrack_l3proto_ipv6);
if (ret < 0) {
- printk(KERN_ERR "nf_conntrack_l3proto_ipv6: protocol register failed\n");
+ pr_err("nf_conntrack_ipv6: pernet registration failed.\n");
goto cleanup_icmpv6;
}
return 0;
cleanup_icmpv6:
- nf_conntrack_l4proto_unregister(net,
- &nf_conntrack_l4proto_icmpv6);
+ nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_icmpv6);
cleanup_udp6:
- nf_conntrack_l4proto_unregister(net,
- &nf_conntrack_l4proto_udp6);
+ nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udp6);
cleanup_tcp6:
- nf_conntrack_l4proto_unregister(net,
- &nf_conntrack_l4proto_tcp6);
+ nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_tcp6);
out:
return ret;
}
static void ipv6_net_exit(struct net *net)
{
- nf_conntrack_l3proto_unregister(net,
- &nf_conntrack_l3proto_ipv6);
- nf_conntrack_l4proto_unregister(net,
- &nf_conntrack_l4proto_icmpv6);
- nf_conntrack_l4proto_unregister(net,
- &nf_conntrack_l4proto_udp6);
- nf_conntrack_l4proto_unregister(net,
- &nf_conntrack_l4proto_tcp6);
+ nf_ct_l3proto_pernet_unregister(net, &nf_conntrack_l3proto_ipv6);
+ nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_icmpv6);
+ nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udp6);
+ nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_tcp6);
}
static struct pernet_operations ipv6_net_ops = {
@@ -425,29 +466,74 @@ static int __init nf_conntrack_l3proto_ipv6_init(void)
need_conntrack();
nf_defrag_ipv6_enable();
+ ret = nf_register_sockopt(&so_getorigdst6);
+ if (ret < 0) {
+ pr_err("Unable to register netfilter socket option\n");
+ return ret;
+ }
+
ret = register_pernet_subsys(&ipv6_net_ops);
if (ret < 0)
- goto cleanup_pernet;
+ goto cleanup_sockopt;
+
ret = nf_register_hooks(ipv6_conntrack_ops,
ARRAY_SIZE(ipv6_conntrack_ops));
if (ret < 0) {
pr_err("nf_conntrack_ipv6: can't register pre-routing defrag "
"hook.\n");
- goto cleanup_ipv6;
+ goto cleanup_pernet;
+ }
+
+ ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_tcp6);
+ if (ret < 0) {
+ pr_err("nf_conntrack_ipv6: can't register tcp6 proto.\n");
+ goto cleanup_hooks;
+ }
+
+ ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_udp6);
+ if (ret < 0) {
+ pr_err("nf_conntrack_ipv6: can't register udp6 proto.\n");
+ goto cleanup_tcp6;
+ }
+
+ ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_icmpv6);
+ if (ret < 0) {
+ pr_err("nf_conntrack_ipv6: can't register icmpv6 proto.\n");
+ goto cleanup_udp6;
+ }
+
+ ret = nf_ct_l3proto_register(&nf_conntrack_l3proto_ipv6);
+ if (ret < 0) {
+ pr_err("nf_conntrack_ipv6: can't register ipv6 proto.\n");
+ goto cleanup_icmpv6;
}
return ret;
- cleanup_ipv6:
- unregister_pernet_subsys(&ipv6_net_ops);
+ cleanup_icmpv6:
+ nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmpv6);
+ cleanup_udp6:
+ nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udp6);
+ cleanup_tcp6:
+ nf_ct_l4proto_unregister(&nf_conntrack_l4proto_tcp6);
+ cleanup_hooks:
+ nf_unregister_hooks(ipv6_conntrack_ops, ARRAY_SIZE(ipv6_conntrack_ops));
cleanup_pernet:
+ unregister_pernet_subsys(&ipv6_net_ops);
+ cleanup_sockopt:
+ nf_unregister_sockopt(&so_getorigdst6);
return ret;
}
static void __exit nf_conntrack_l3proto_ipv6_fini(void)
{
synchronize_net();
+ nf_ct_l3proto_unregister(&nf_conntrack_l3proto_ipv6);
+ nf_ct_l4proto_unregister(&nf_conntrack_l4proto_tcp6);
+ nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udp6);
+ nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmpv6);
nf_unregister_hooks(ipv6_conntrack_ops, ARRAY_SIZE(ipv6_conntrack_ops));
unregister_pernet_subsys(&ipv6_net_ops);
+ nf_unregister_sockopt(&so_getorigdst6);
}
module_init(nf_conntrack_l3proto_ipv6_init);
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index 2d54b2061d68..24df3dde0076 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -232,7 +232,7 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl,
return icmpv6_error_message(net, tmpl, skb, dataoff, ctinfo, hooknum);
}
-#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/nfnetlink_conntrack.h>
@@ -375,7 +375,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 __read_mostly =
.get_timeouts = icmpv6_get_timeouts,
.new = icmpv6_new,
.error = icmpv6_error,
-#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.tuple_to_nlattr = icmpv6_tuple_to_nlattr,
.nlattr_tuple_size = icmpv6_nlattr_tuple_size,
.nlattr_to_tuple = icmpv6_nlattr_to_tuple,
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 22c8ea951185..54087e96d7b8 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -97,9 +97,9 @@ static int nf_ct_frag6_sysctl_register(struct net *net)
if (table == NULL)
goto err_alloc;
- table[0].data = &net->ipv6.frags.high_thresh;
- table[1].data = &net->ipv6.frags.low_thresh;
- table[2].data = &net->ipv6.frags.timeout;
+ table[0].data = &net->nf_frag.frags.timeout;
+ table[1].data = &net->nf_frag.frags.low_thresh;
+ table[2].data = &net->nf_frag.frags.high_thresh;
}
hdr = register_net_sysctl(net, "net/netfilter", table);
@@ -311,12 +311,15 @@ found:
else
fq->q.fragments = skb;
- skb->dev = NULL;
+ if (skb->dev) {
+ fq->iif = skb->dev->ifindex;
+ skb->dev = NULL;
+ }
fq->q.stamp = skb->tstamp;
fq->q.meat += skb->len;
if (payload_len > fq->q.max_size)
fq->q.max_size = payload_len;
- atomic_add(skb->truesize, &fq->q.net->mem);
+ add_frag_mem_limit(&fq->q, skb->truesize);
/* The first fragment.
* nhoffset is obtained from the first fragment, of course.
@@ -325,9 +328,8 @@ found:
fq->nhoffset = nhoff;
fq->q.last_in |= INET_FRAG_FIRST_IN;
}
- write_lock(&nf_frags.lock);
- list_move_tail(&fq->q.lru_list, &fq->q.net->lru_list);
- write_unlock(&nf_frags.lock);
+
+ inet_frag_lru_move(&fq->q);
return 0;
discard_fq:
@@ -366,7 +368,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
}
/* Head of list must not be cloned. */
- if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC)) {
+ if (skb_unclone(head, GFP_ATOMIC)) {
pr_debug("skb is cloned but can't expand head");
goto out_oom;
}
@@ -395,7 +397,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
clone->ip_summed = head->ip_summed;
NFCT_FRAG6_CB(clone)->orig = NULL;
- atomic_add(clone->truesize, &fq->q.net->mem);
+ add_frag_mem_limit(&fq->q, clone->truesize);
}
/* We have to remove fragment header from datagram and to relocate
@@ -419,7 +421,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
head->csum = csum_add(head->csum, fp->csum);
head->truesize += fp->truesize;
}
- atomic_sub(head->truesize, &fq->q.net->mem);
+ sub_frag_mem_limit(&fq->q, head->truesize);
head->local_df = 1;
head->next = NULL;
diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
index cdd6d045e42e..aacd121fe8c5 100644
--- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
+++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
@@ -19,7 +19,7 @@
#include <linux/netfilter_ipv6.h>
#include <linux/netfilter_bridge.h>
-#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
@@ -35,7 +35,7 @@ static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum,
{
u16 zone = NF_CT_DEFAULT_ZONE;
-#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
if (skb->nfct)
zone = nf_ct_zone((struct nf_conn *)skb->nfct);
#endif
@@ -60,7 +60,7 @@ static unsigned int ipv6_defrag(unsigned int hooknum,
{
struct sk_buff *reasm;
-#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
/* Previously seen (loopback)? */
if (skb->nfct && !nf_ct_is_template((struct nf_conn *)skb->nfct))
return NF_ACCEPT;
diff --git a/net/ipv6/netfilter/nf_nat_proto_icmpv6.c b/net/ipv6/netfilter/nf_nat_proto_icmpv6.c
index 5d6da784305b..61aaf70f376e 100644
--- a/net/ipv6/netfilter/nf_nat_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_nat_proto_icmpv6.c
@@ -84,7 +84,7 @@ const struct nf_nat_l4proto nf_nat_l4proto_icmpv6 = {
.manip_pkt = icmpv6_manip_pkt,
.in_range = icmpv6_in_range,
.unique_tuple = icmpv6_unique_tuple,
-#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.nlattr_to_range = nf_nat_l4proto_nlattr_to_range,
#endif
};
diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c
new file mode 100644
index 000000000000..c2e73e647e44
--- /dev/null
+++ b/net/ipv6/output_core.c
@@ -0,0 +1,76 @@
+/*
+ * IPv6 library code, needed by static components when full IPv6 support is
+ * not configured or static. These functions are needed by GSO/GRO implementation.
+ */
+#include <linux/export.h>
+#include <net/ipv6.h>
+#include <net/ip6_fib.h>
+
+void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt)
+{
+ static atomic_t ipv6_fragmentation_id;
+ int old, new;
+
+#if IS_ENABLED(CONFIG_IPV6)
+ if (rt && !(rt->dst.flags & DST_NOPEER)) {
+ struct inet_peer *peer;
+ struct net *net;
+
+ net = dev_net(rt->dst.dev);
+ peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
+ if (peer) {
+ fhdr->identification = htonl(inet_getid(peer, 0));
+ inet_putpeer(peer);
+ return;
+ }
+ }
+#endif
+ do {
+ old = atomic_read(&ipv6_fragmentation_id);
+ new = old + 1;
+ if (!new)
+ new = 1;
+ } while (atomic_cmpxchg(&ipv6_fragmentation_id, old, new) != old);
+ fhdr->identification = htonl(new);
+}
+EXPORT_SYMBOL(ipv6_select_ident);
+
+int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
+{
+ u16 offset = sizeof(struct ipv6hdr);
+ struct ipv6_opt_hdr *exthdr =
+ (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
+ unsigned int packet_len = skb->tail - skb->network_header;
+ int found_rhdr = 0;
+ *nexthdr = &ipv6_hdr(skb)->nexthdr;
+
+ while (offset + 1 <= packet_len) {
+
+ switch (**nexthdr) {
+
+ case NEXTHDR_HOP:
+ break;
+ case NEXTHDR_ROUTING:
+ found_rhdr = 1;
+ break;
+ case NEXTHDR_DEST:
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
+ if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
+ break;
+#endif
+ if (found_rhdr)
+ return offset;
+ break;
+ default :
+ return offset;
+ }
+
+ offset += ipv6_optlen(exthdr);
+ *nexthdr = &exthdr->nexthdr;
+ exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
+ offset);
+ }
+
+ return offset;
+}
+EXPORT_SYMBOL(ip6_find_1stfragopt);
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index 745a32042950..bbbe53a99b57 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -295,11 +295,11 @@ int snmp6_unregister_dev(struct inet6_dev *idev)
static int __net_init ipv6_proc_init_net(struct net *net)
{
- if (!proc_net_fops_create(net, "sockstat6", S_IRUGO,
- &sockstat6_seq_fops))
+ if (!proc_create("sockstat6", S_IRUGO, net->proc_net,
+ &sockstat6_seq_fops))
return -ENOMEM;
- if (!proc_net_fops_create(net, "snmp6", S_IRUGO, &snmp6_seq_fops))
+ if (!proc_create("snmp6", S_IRUGO, net->proc_net, &snmp6_seq_fops))
goto proc_snmp6_fail;
net->mib.proc_net_devsnmp6 = proc_mkdir("dev_snmp6", net->proc_net);
@@ -308,17 +308,17 @@ static int __net_init ipv6_proc_init_net(struct net *net)
return 0;
proc_dev_snmp6_fail:
- proc_net_remove(net, "snmp6");
+ remove_proc_entry("snmp6", net->proc_net);
proc_snmp6_fail:
- proc_net_remove(net, "sockstat6");
+ remove_proc_entry("sockstat6", net->proc_net);
return -ENOMEM;
}
static void __net_exit ipv6_proc_exit_net(struct net *net)
{
- proc_net_remove(net, "sockstat6");
- proc_net_remove(net, "dev_snmp6");
- proc_net_remove(net, "snmp6");
+ remove_proc_entry("sockstat6", net->proc_net);
+ remove_proc_entry("dev_snmp6", net->proc_net);
+ remove_proc_entry("snmp6", net->proc_net);
}
static struct pernet_operations ipv6_proc_ops = {
diff --git a/net/ipv6/protocol.c b/net/ipv6/protocol.c
index 053082dfc93e..22d1bd4670da 100644
--- a/net/ipv6/protocol.c
+++ b/net/ipv6/protocol.c
@@ -25,7 +25,9 @@
#include <linux/spinlock.h>
#include <net/protocol.h>
+#if IS_ENABLED(CONFIG_IPV6)
const struct inet6_protocol __rcu *inet6_protos[MAX_INET_PROTOS] __read_mostly;
+EXPORT_SYMBOL(inet6_protos);
int inet6_add_protocol(const struct inet6_protocol *prot, unsigned char protocol)
{
@@ -50,3 +52,26 @@ int inet6_del_protocol(const struct inet6_protocol *prot, unsigned char protocol
return ret;
}
EXPORT_SYMBOL(inet6_del_protocol);
+#endif
+
+const struct net_offload __rcu *inet6_offloads[MAX_INET_PROTOS] __read_mostly;
+
+int inet6_add_offload(const struct net_offload *prot, unsigned char protocol)
+{
+ return !cmpxchg((const struct net_offload **)&inet6_offloads[protocol],
+ NULL, prot) ? 0 : -1;
+}
+EXPORT_SYMBOL(inet6_add_offload);
+
+int inet6_del_offload(const struct net_offload *prot, unsigned char protocol)
+{
+ int ret;
+
+ ret = (cmpxchg((const struct net_offload **)&inet6_offloads[protocol],
+ prot, NULL) == prot) ? 0 : -1;
+
+ synchronize_net();
+
+ return ret;
+}
+EXPORT_SYMBOL(inet6_del_offload);
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index d8e95c77db99..330b5e7b7df6 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -50,7 +50,7 @@
#include <net/udp.h>
#include <net/inet_common.h>
#include <net/tcp_states.h>
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
#include <net/mip6.h>
#endif
#include <linux/mroute6.h>
@@ -71,10 +71,9 @@ static struct sock *__raw_v6_lookup(struct net *net, struct sock *sk,
unsigned short num, const struct in6_addr *loc_addr,
const struct in6_addr *rmt_addr, int dif)
{
- struct hlist_node *node;
bool is_multicast = ipv6_addr_is_multicast(loc_addr);
- sk_for_each_from(sk, node)
+ sk_for_each_from(sk)
if (inet_sk(sk)->inet_num == num) {
struct ipv6_pinfo *np = inet6_sk(sk);
@@ -123,7 +122,7 @@ static int icmpv6_filter(const struct sock *sk, const struct sk_buff *skb)
return 1;
}
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
typedef int mh_filter_t(struct sock *sock, struct sk_buff *skb);
static mh_filter_t __rcu *mh_filter __read_mostly;
@@ -184,7 +183,7 @@ static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
filtered = icmpv6_filter(sk, skb);
break;
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
case IPPROTO_MH:
{
/* XXX: To validate MH only once for each packet,
@@ -507,7 +506,7 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk,
sock_recv_ts_and_drops(msg, sk, skb);
if (np->rxopt.all)
- datagram_recv_ctl(sk, msg, skb);
+ ip6_datagram_recv_ctl(sk, msg, skb);
err = copied;
if (flags & MSG_TRUNC)
@@ -822,8 +821,8 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
memset(opt, 0, sizeof(struct ipv6_txoptions));
opt->tot_len = sizeof(struct ipv6_txoptions);
- err = datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt,
- &hlimit, &tclass, &dontfrag);
+ err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt,
+ &hlimit, &tclass, &dontfrag);
if (err < 0) {
fl6_sock_release(flowlabel);
return err;
@@ -1292,7 +1291,7 @@ static const struct file_operations raw6_seq_fops = {
static int __net_init raw6_init_net(struct net *net)
{
- if (!proc_net_fops_create(net, "raw6", S_IRUGO, &raw6_seq_fops))
+ if (!proc_create("raw6", S_IRUGO, net->proc_net, &raw6_seq_fops))
return -ENOMEM;
return 0;
@@ -1300,7 +1299,7 @@ static int __net_init raw6_init_net(struct net *net)
static void __net_exit raw6_exit_net(struct net *net)
{
- proc_net_remove(net, "raw6");
+ remove_proc_entry("raw6", net->proc_net);
}
static struct pernet_operations raw6_net_ops = {
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index da8a4e301b1b..3c6a77290c6e 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -79,20 +79,8 @@ unsigned int inet6_hash_frag(__be32 id, const struct in6_addr *saddr,
{
u32 c;
- c = jhash_3words((__force u32)saddr->s6_addr32[0],
- (__force u32)saddr->s6_addr32[1],
- (__force u32)saddr->s6_addr32[2],
- rnd);
-
- c = jhash_3words((__force u32)saddr->s6_addr32[3],
- (__force u32)daddr->s6_addr32[0],
- (__force u32)daddr->s6_addr32[1],
- c);
-
- c = jhash_3words((__force u32)daddr->s6_addr32[2],
- (__force u32)daddr->s6_addr32[3],
- (__force u32)id,
- c);
+ c = jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr),
+ (__force u32)id, rnd);
return c & (INETFRAGS_HASHSZ - 1);
}
@@ -327,7 +315,7 @@ found:
}
fq->q.stamp = skb->tstamp;
fq->q.meat += skb->len;
- atomic_add(skb->truesize, &fq->q.net->mem);
+ add_frag_mem_limit(&fq->q, skb->truesize);
/* The first fragment.
* nhoffset is obtained from the first fragment, of course.
@@ -341,9 +329,7 @@ found:
fq->q.meat == fq->q.len)
return ip6_frag_reasm(fq, prev, dev);
- write_lock(&ip6_frags.lock);
- list_move_tail(&fq->q.lru_list, &fq->q.net->lru_list);
- write_unlock(&ip6_frags.lock);
+ inet_frag_lru_move(&fq->q);
return -1;
discard_fq:
@@ -406,7 +392,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
goto out_oversize;
/* Head of list must not be cloned. */
- if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC))
+ if (skb_unclone(head, GFP_ATOMIC))
goto out_oom;
/* If the first fragment is fragmented itself, we split
@@ -429,7 +415,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
head->len -= clone->len;
clone->csum = 0;
clone->ip_summed = head->ip_summed;
- atomic_add(clone->truesize, &fq->q.net->mem);
+ add_frag_mem_limit(&fq->q, clone->truesize);
}
/* We have to remove fragment header from datagram and to relocate
@@ -467,7 +453,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
}
fp = next;
}
- atomic_sub(sum_truesize, &fq->q.net->mem);
+ sub_frag_mem_limit(&fq->q, sum_truesize);
head->next = NULL;
head->dev = dev;
@@ -616,6 +602,10 @@ static int __net_init ip6_frags_ns_sysctl_register(struct net *net)
table[0].data = &net->ipv6.frags.high_thresh;
table[1].data = &net->ipv6.frags.low_thresh;
table[2].data = &net->ipv6.frags.timeout;
+
+ /* Don't export sysctls to unprivileged users */
+ if (net->user_ns != &init_user_ns)
+ table[0].procname = NULL;
}
hdr = register_net_sysctl(net, "net/ipv6", table);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index b1e6cf0b95fd..e5fe0041adfa 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -57,6 +57,7 @@
#include <net/xfrm.h>
#include <net/netevent.h>
#include <net/netlink.h>
+#include <net/nexthop.h>
#include <asm/uaccess.h>
@@ -144,25 +145,12 @@ static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
struct neighbour *n;
daddr = choose_neigh_daddr(rt, skb, daddr);
- n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
+ n = __ipv6_neigh_lookup(dst->dev, daddr);
if (n)
return n;
return neigh_create(&nd_tbl, daddr, dst->dev);
}
-static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
-{
- struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
- if (!n) {
- n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
- if (IS_ERR(n))
- return PTR_ERR(n);
- }
- rt->n = n;
-
- return 0;
-}
-
static struct dst_ops ip6_dst_ops_template = {
.family = AF_INET6,
.protocol = cpu_to_be16(ETH_P_IPV6),
@@ -289,6 +277,8 @@ static inline struct rt6_info *ip6_dst_alloc(struct net *net,
memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
rt->rt6i_genid = rt_genid(net);
+ INIT_LIST_HEAD(&rt->rt6i_siblings);
+ rt->rt6i_nsiblings = 0;
}
return rt;
}
@@ -297,9 +287,7 @@ static void ip6_dst_destroy(struct dst_entry *dst)
{
struct rt6_info *rt = (struct rt6_info *)dst;
struct inet6_dev *idev = rt->rt6i_idev;
-
- if (rt->n)
- neigh_release(rt->n);
+ struct dst_entry *from = dst->from;
if (!(rt->dst.flags & DST_HOST))
dst_destroy_metrics_generic(dst);
@@ -309,8 +297,8 @@ static void ip6_dst_destroy(struct dst_entry *dst)
in6_dev_put(idev);
}
- if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
- dst_release(dst->from);
+ dst->from = NULL;
+ dst_release(from);
if (rt6_has_peer(rt)) {
struct inet_peer *peer = rt6_peer_ptr(rt);
@@ -318,13 +306,6 @@ static void ip6_dst_destroy(struct dst_entry *dst)
}
}
-static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
-
-static u32 rt6_peer_genid(void)
-{
- return atomic_read(&__rt6_peer_genid);
-}
-
void rt6_bind_peer(struct rt6_info *rt, int create)
{
struct inet_peer_base *base;
@@ -338,8 +319,6 @@ void rt6_bind_peer(struct rt6_info *rt, int create)
if (peer) {
if (!rt6_set_peer(rt, peer))
inet_putpeer(peer);
- else
- rt->rt6i_peer_genid = rt6_peer_genid();
}
}
@@ -360,11 +339,6 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
in6_dev_put(idev);
}
}
- if (rt->n && rt->n->dev == dev) {
- rt->n->dev = loopback_dev;
- dev_hold(loopback_dev);
- dev_put(dev);
- }
}
}
@@ -385,6 +359,62 @@ static bool rt6_need_strict(const struct in6_addr *daddr)
(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
}
+/* Multipath route selection:
+ * Hash based function using packet header and flowlabel.
+ * Adapted from fib_info_hashfn()
+ */
+static int rt6_info_hash_nhsfn(unsigned int candidate_count,
+ const struct flowi6 *fl6)
+{
+ unsigned int val = fl6->flowi6_proto;
+
+ val ^= ipv6_addr_hash(&fl6->daddr);
+ val ^= ipv6_addr_hash(&fl6->saddr);
+
+ /* Work only if this not encapsulated */
+ switch (fl6->flowi6_proto) {
+ case IPPROTO_UDP:
+ case IPPROTO_TCP:
+ case IPPROTO_SCTP:
+ val ^= (__force u16)fl6->fl6_sport;
+ val ^= (__force u16)fl6->fl6_dport;
+ break;
+
+ case IPPROTO_ICMPV6:
+ val ^= (__force u16)fl6->fl6_icmp_type;
+ val ^= (__force u16)fl6->fl6_icmp_code;
+ break;
+ }
+ /* RFC6438 recommands to use flowlabel */
+ val ^= (__force u32)fl6->flowlabel;
+
+ /* Perhaps, we need to tune, this function? */
+ val = val ^ (val >> 7) ^ (val >> 12);
+ return val % candidate_count;
+}
+
+static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
+ struct flowi6 *fl6)
+{
+ struct rt6_info *sibling, *next_sibling;
+ int route_choosen;
+
+ route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
+ /* Don't change the route, if route_choosen == 0
+ * (siblings does not include ourself)
+ */
+ if (route_choosen)
+ list_for_each_entry_safe(sibling, next_sibling,
+ &match->rt6i_siblings, rt6i_siblings) {
+ route_choosen--;
+ if (route_choosen == 0) {
+ match = sibling;
+ break;
+ }
+ }
+ return match;
+}
+
/*
* Route lookup. Any table->tb6_lock is implied.
*/
@@ -448,24 +478,34 @@ static void rt6_probe(struct rt6_info *rt)
* Router Reachability Probe MUST be rate-limited
* to no more than one per minute.
*/
- neigh = rt ? rt->n : NULL;
- if (!neigh || (neigh->nud_state & NUD_VALID))
+ if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
return;
- read_lock_bh(&neigh->lock);
- if (!(neigh->nud_state & NUD_VALID) &&
+ rcu_read_lock_bh();
+ neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
+ if (neigh) {
+ write_lock(&neigh->lock);
+ if (neigh->nud_state & NUD_VALID)
+ goto out;
+ }
+
+ if (!neigh ||
time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
struct in6_addr mcaddr;
struct in6_addr *target;
- neigh->updated = jiffies;
- read_unlock_bh(&neigh->lock);
+ if (neigh) {
+ neigh->updated = jiffies;
+ write_unlock(&neigh->lock);
+ }
- target = (struct in6_addr *)&neigh->primary_key;
+ target = (struct in6_addr *)&rt->rt6i_gateway;
addrconf_addr_solict_mult(target, &mcaddr);
ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
} else {
- read_unlock_bh(&neigh->lock);
+out:
+ write_unlock(&neigh->lock);
}
+ rcu_read_unlock_bh();
}
#else
static inline void rt6_probe(struct rt6_info *rt)
@@ -487,35 +527,36 @@ static inline int rt6_check_dev(struct rt6_info *rt, int oif)
return 0;
}
-static inline int rt6_check_neigh(struct rt6_info *rt)
+static inline bool rt6_check_neigh(struct rt6_info *rt)
{
struct neighbour *neigh;
- int m;
+ bool ret = false;
- neigh = rt->n;
if (rt->rt6i_flags & RTF_NONEXTHOP ||
!(rt->rt6i_flags & RTF_GATEWAY))
- m = 1;
- else if (neigh) {
- read_lock_bh(&neigh->lock);
+ return true;
+
+ rcu_read_lock_bh();
+ neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
+ if (neigh) {
+ read_lock(&neigh->lock);
if (neigh->nud_state & NUD_VALID)
- m = 2;
+ ret = true;
#ifdef CONFIG_IPV6_ROUTER_PREF
- else if (neigh->nud_state & NUD_FAILED)
- m = 0;
+ else if (!(neigh->nud_state & NUD_FAILED))
+ ret = true;
#endif
- else
- m = 1;
- read_unlock_bh(&neigh->lock);
- } else
- m = 0;
- return m;
+ read_unlock(&neigh->lock);
+ }
+ rcu_read_unlock_bh();
+
+ return ret;
}
static int rt6_score_route(struct rt6_info *rt, int oif,
int strict)
{
- int m, n;
+ int m;
m = rt6_check_dev(rt, oif);
if (!m && (strict & RT6_LOOKUP_F_IFACE))
@@ -523,8 +564,7 @@ static int rt6_score_route(struct rt6_info *rt, int oif,
#ifdef CONFIG_IPV6_ROUTER_PREF
m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
#endif
- n = rt6_check_neigh(rt);
- if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
+ if (!rt6_check_neigh(rt) && (strict & RT6_LOOKUP_F_REACHABLE))
return -1;
return m;
}
@@ -666,7 +706,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
else
rt6_set_expires(rt, jiffies + HZ * lifetime);
- dst_release(&rt->dst);
+ ip6_rt_put(rt);
}
return 0;
}
@@ -702,6 +742,8 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net,
restart:
rt = fn->leaf;
rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
+ if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
+ rt = rt6_multipath_select(rt, fl6);
BACKTRACK(net, &fl6->saddr);
out:
dst_use(&rt->dst, jiffies);
@@ -783,8 +825,6 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
rt = ip6_rt_copy(ort, daddr);
if (rt) {
- int attempts = !in_softirq();
-
if (!(rt->rt6i_flags & RTF_GATEWAY)) {
if (ort->rt6i_dst.plen != 128 &&
ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
@@ -800,32 +840,6 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
rt->rt6i_src.plen = 128;
}
#endif
-
- retry:
- if (rt6_bind_neighbour(rt, rt->dst.dev)) {
- struct net *net = dev_net(rt->dst.dev);
- int saved_rt_min_interval =
- net->ipv6.sysctl.ip6_rt_gc_min_interval;
- int saved_rt_elasticity =
- net->ipv6.sysctl.ip6_rt_gc_elasticity;
-
- if (attempts-- > 0) {
- net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
- net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
-
- ip6_dst_gc(&net->ipv6.ip6_dst_ops);
-
- net->ipv6.sysctl.ip6_rt_gc_elasticity =
- saved_rt_elasticity;
- net->ipv6.sysctl.ip6_rt_gc_min_interval =
- saved_rt_min_interval;
- goto retry;
- }
-
- net_warn_ratelimited("Neighbour table overflow\n");
- dst_free(&rt->dst);
- return NULL;
- }
}
return rt;
@@ -836,10 +850,8 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
{
struct rt6_info *rt = ip6_rt_copy(ort, daddr);
- if (rt) {
+ if (rt)
rt->rt6i_flags |= RTF_CACHE;
- rt->n = neigh_clone(ort->n);
- }
return rt;
}
@@ -863,7 +875,8 @@ restart_2:
restart:
rt = rt6_select(fn, oif, strict | reachable);
-
+ if (rt->rt6i_nsiblings && oif == 0)
+ rt = rt6_multipath_select(rt, fl6);
BACKTRACK(net, &fl6->saddr);
if (rt == net->ipv6.ip6_null_entry ||
rt->rt6i_flags & RTF_CACHE)
@@ -872,14 +885,14 @@ restart:
dst_hold(&rt->dst);
read_unlock_bh(&table->tb6_lock);
- if (!rt->n && !(rt->rt6i_flags & RTF_NONEXTHOP))
+ if (!(rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY)))
nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
else if (!(rt->dst.flags & DST_HOST))
nrt = rt6_alloc_clone(rt, &fl6->daddr);
else
goto out2;
- dst_release(&rt->dst);
+ ip6_rt_put(rt);
rt = nrt ? : net->ipv6.ip6_null_entry;
dst_hold(&rt->dst);
@@ -896,7 +909,7 @@ restart:
* Race condition! In the gap, when table->tb6_lock was
* released someone could insert this route. Relookup.
*/
- dst_release(&rt->dst);
+ ip6_rt_put(rt);
goto relookup;
out:
@@ -938,7 +951,7 @@ void ip6_route_input(struct sk_buff *skb)
.flowi6_iif = skb->dev->ifindex,
.daddr = iph->daddr,
.saddr = iph->saddr,
- .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
+ .flowlabel = ip6_flowinfo(iph),
.flowi6_mark = skb->mark,
.flowi6_proto = iph->nexthdr,
};
@@ -998,7 +1011,6 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori
rt->rt6i_gateway = ort->rt6i_gateway;
rt->rt6i_flags = ort->rt6i_flags;
- rt6_clean_expires(rt);
rt->rt6i_metric = 0;
memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
@@ -1030,14 +1042,9 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
if (rt->rt6i_genid != rt_genid(dev_net(rt->dst.dev)))
return NULL;
- if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
- if (rt->rt6i_peer_genid != rt6_peer_genid()) {
- if (!rt6_has_peer(rt))
- rt6_bind_peer(rt, 0);
- rt->rt6i_peer_genid = rt6_peer_genid();
- }
+ if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
return dst;
- }
+
return NULL;
}
@@ -1108,7 +1115,7 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
fl6.flowi6_flags = 0;
fl6.daddr = iph->daddr;
fl6.saddr = iph->saddr;
- fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
+ fl6.flowlabel = ip6_flowinfo(iph);
dst = ip6_route_output(net, NULL, &fl6);
if (!dst->error)
@@ -1136,7 +1143,7 @@ void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
fl6.flowi6_flags = 0;
fl6.daddr = iph->daddr;
fl6.saddr = iph->saddr;
- fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
+ fl6.flowlabel = ip6_flowinfo(iph);
dst = ip6_route_output(net, NULL, &fl6);
if (!dst->error)
@@ -1196,7 +1203,6 @@ static struct dst_entry *icmp6_dst_gc_list;
static DEFINE_SPINLOCK(icmp6_dst_lock);
struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
- struct neighbour *neigh,
struct flowi6 *fl6)
{
struct dst_entry *dst;
@@ -1214,20 +1220,8 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
goto out;
}
- if (neigh)
- neigh_hold(neigh);
- else {
- neigh = ip6_neigh_lookup(&rt->dst, NULL, &fl6->daddr);
- if (IS_ERR(neigh)) {
- in6_dev_put(idev);
- dst_free(&rt->dst);
- return ERR_CAST(neigh);
- }
- }
-
rt->dst.flags |= DST_HOST;
rt->dst.output = ip6_output;
- rt->n = neigh;
atomic_set(&rt->dst.__refcnt, 1);
rt->rt6i_dst.addr = fl6->daddr;
rt->rt6i_dst.plen = 128;
@@ -1316,12 +1310,6 @@ out:
return entries > rt_max_size;
}
-/* Clean host part of a prefix. Not necessary in radix tree,
- but results in cleaner routing tables.
-
- Remove it only when all the things will work!
- */
-
int ip6_dst_hoplimit(struct dst_entry *dst)
{
int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
@@ -1507,7 +1495,7 @@ int ip6_route_add(struct fib6_config *cfg)
goto out;
if (dev) {
if (dev != grt->dst.dev) {
- dst_release(&grt->dst);
+ ip6_rt_put(grt);
goto out;
}
} else {
@@ -1518,7 +1506,7 @@ int ip6_route_add(struct fib6_config *cfg)
}
if (!(grt->rt6i_flags & RTF_GATEWAY))
err = 0;
- dst_release(&grt->dst);
+ ip6_rt_put(grt);
if (err)
goto out;
@@ -1542,12 +1530,6 @@ int ip6_route_add(struct fib6_config *cfg)
} else
rt->rt6i_prefsrc.plen = 0;
- if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
- err = rt6_bind_neighbour(rt, dev);
- if (err)
- goto out;
- }
-
rt->rt6i_flags = cfg->fc_flags;
install_route:
@@ -1604,7 +1586,7 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
write_unlock_bh(&table->tb6_lock);
out:
- dst_release(&rt->dst);
+ ip6_rt_put(rt);
return err;
}
@@ -1660,37 +1642,32 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
struct net *net = dev_net(skb->dev);
struct netevent_redirect netevent;
struct rt6_info *rt, *nrt = NULL;
- const struct in6_addr *target;
struct ndisc_options ndopts;
- const struct in6_addr *dest;
- struct neighbour *old_neigh;
struct inet6_dev *in6_dev;
struct neighbour *neigh;
- struct icmp6hdr *icmph;
+ struct rd_msg *msg;
int optlen, on_link;
u8 *lladdr;
optlen = skb->tail - skb->transport_header;
- optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
+ optlen -= sizeof(*msg);
if (optlen < 0) {
net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
return;
}
- icmph = icmp6_hdr(skb);
- target = (const struct in6_addr *) (icmph + 1);
- dest = target + 1;
+ msg = (struct rd_msg *)icmp6_hdr(skb);
- if (ipv6_addr_is_multicast(dest)) {
+ if (ipv6_addr_is_multicast(&msg->dest)) {
net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
return;
}
on_link = 0;
- if (ipv6_addr_equal(dest, target)) {
+ if (ipv6_addr_equal(&msg->dest, &msg->target)) {
on_link = 1;
- } else if (ipv6_addr_type(target) !=
+ } else if (ipv6_addr_type(&msg->target) !=
(IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
return;
@@ -1707,7 +1684,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
* first-hop router for the specified ICMP Destination Address.
*/
- if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) {
+ if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
return;
}
@@ -1734,15 +1711,10 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
*/
dst_confirm(&rt->dst);
- neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1);
+ neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
if (!neigh)
return;
- /* Duplicate redirect: silently ignore. */
- old_neigh = rt->n;
- if (neigh == old_neigh)
- goto out;
-
/*
* We have finally decided to accept it.
*/
@@ -1754,7 +1726,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
NEIGH_UPDATE_F_ISROUTER))
);
- nrt = ip6_rt_copy(rt, dest);
+ nrt = ip6_rt_copy(rt, &msg->dest);
if (!nrt)
goto out;
@@ -1763,16 +1735,14 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
nrt->rt6i_flags &= ~RTF_GATEWAY;
nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
- nrt->n = neigh_clone(neigh);
if (ip6_ins_rt(nrt))
goto out;
netevent.old = &rt->dst;
- netevent.old_neigh = old_neigh;
netevent.new = &nrt->dst;
- netevent.new_neigh = neigh;
- netevent.daddr = dest;
+ netevent.daddr = &msg->dest;
+ netevent.neigh = neigh;
call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
if (rt->rt6i_flags & RTF_CACHE) {
@@ -1814,8 +1784,6 @@ static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
(RTF_DEFAULT | RTF_ADDRCONF))
rt6_set_from(rt, ort);
- else
- rt6_clean_expires(rt);
rt->rt6i_metric = 0;
#ifdef CONFIG_IPV6_SUBTREES
@@ -1947,7 +1915,8 @@ void rt6_purge_dflt_routers(struct net *net)
restart:
read_lock_bh(&table->tb6_lock);
for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
- if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
+ if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
+ (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
dst_hold(&rt->dst);
read_unlock_bh(&table->tb6_lock);
ip6_del_rt(rt);
@@ -1987,7 +1956,7 @@ int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
switch(cmd) {
case SIOCADDRT: /* Add a route */
case SIOCDELRT: /* Delete a route */
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EPERM;
err = copy_from_user(&rtmsg, arg,
sizeof(struct in6_rtmsg));
@@ -2078,7 +2047,6 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
{
struct net *net = dev_net(idev->dev);
struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
- int err;
if (!rt) {
net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
@@ -2097,11 +2065,6 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
rt->rt6i_flags |= RTF_ANYCAST;
else
rt->rt6i_flags |= RTF_LOCAL;
- err = rt6_bind_neighbour(rt, rt->dst.dev);
- if (err) {
- dst_free(&rt->dst);
- return ERR_PTR(err);
- }
rt->rt6i_dst.addr = *addr;
rt->rt6i_dst.plen = 128;
@@ -2249,6 +2212,7 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
[RTA_IIF] = { .type = NLA_U32 },
[RTA_PRIORITY] = { .type = NLA_U32 },
[RTA_METRICS] = { .type = NLA_NESTED },
+ [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
};
static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
@@ -2326,11 +2290,71 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
if (tb[RTA_TABLE])
cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
+ if (tb[RTA_MULTIPATH]) {
+ cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
+ cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
+ }
+
err = 0;
errout:
return err;
}
+static int ip6_route_multipath(struct fib6_config *cfg, int add)
+{
+ struct fib6_config r_cfg;
+ struct rtnexthop *rtnh;
+ int remaining;
+ int attrlen;
+ int err = 0, last_err = 0;
+
+beginning:
+ rtnh = (struct rtnexthop *)cfg->fc_mp;
+ remaining = cfg->fc_mp_len;
+
+ /* Parse a Multipath Entry */
+ while (rtnh_ok(rtnh, remaining)) {
+ memcpy(&r_cfg, cfg, sizeof(*cfg));
+ if (rtnh->rtnh_ifindex)
+ r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
+
+ attrlen = rtnh_attrlen(rtnh);
+ if (attrlen > 0) {
+ struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
+
+ nla = nla_find(attrs, attrlen, RTA_GATEWAY);
+ if (nla) {
+ nla_memcpy(&r_cfg.fc_gateway, nla, 16);
+ r_cfg.fc_flags |= RTF_GATEWAY;
+ }
+ }
+ err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
+ if (err) {
+ last_err = err;
+ /* If we are trying to remove a route, do not stop the
+ * loop when ip6_route_del() fails (because next hop is
+ * already gone), we should try to remove all next hops.
+ */
+ if (add) {
+ /* If add fails, we should try to delete all
+ * next hops that have been already added.
+ */
+ add = 0;
+ goto beginning;
+ }
+ }
+ /* Because each route is added like a single route we remove
+ * this flag after the first nexthop (if there is a collision,
+ * we have already fail to add the first nexthop:
+ * fib6_add_rt2node() has reject it).
+ */
+ cfg->fc_nlinfo.nlh->nlmsg_flags &= ~NLM_F_EXCL;
+ rtnh = rtnh_next(rtnh, &remaining);
+ }
+
+ return last_err;
+}
+
static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
{
struct fib6_config cfg;
@@ -2340,7 +2364,10 @@ static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *a
if (err < 0)
return err;
- return ip6_route_del(&cfg);
+ if (cfg.fc_mp)
+ return ip6_route_multipath(&cfg, 0);
+ else
+ return ip6_route_del(&cfg);
}
static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
@@ -2352,7 +2379,10 @@ static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *a
if (err < 0)
return err;
- return ip6_route_add(&cfg);
+ if (cfg.fc_mp)
+ return ip6_route_multipath(&cfg, 1);
+ else
+ return ip6_route_add(&cfg);
}
static inline size_t rt6_nlmsg_size(void)
@@ -2380,7 +2410,6 @@ static int rt6_fill_node(struct net *net,
struct nlmsghdr *nlh;
long expires;
u32 table;
- struct neighbour *n;
if (prefix) { /* user wants prefix routes only */
if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
@@ -2493,9 +2522,8 @@ static int rt6_fill_node(struct net *net,
if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
goto nla_put_failure;
- n = rt->n;
- if (n) {
- if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0)
+ if (rt->rt6i_flags & RTF_GATEWAY) {
+ if (nla_put(skb, RTA_GATEWAY, 16, &rt->rt6i_gateway) < 0)
goto nla_put_failure;
}
@@ -2596,7 +2624,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (!skb) {
- dst_release(&rt->dst);
+ ip6_rt_put(rt);
err = -ENOBUFS;
goto errout;
}
@@ -2690,7 +2718,6 @@ struct rt6_proc_arg
static int rt6_info_route(struct rt6_info *rt, void *p_arg)
{
struct seq_file *m = p_arg;
- struct neighbour *n;
seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
@@ -2699,9 +2726,8 @@ static int rt6_info_route(struct rt6_info *rt, void *p_arg)
#else
seq_puts(m, "00000000000000000000000000000000 00 ");
#endif
- n = rt->n;
- if (n) {
- seq_printf(m, "%pi6", n->primary_key);
+ if (rt->rt6i_flags & RTF_GATEWAY) {
+ seq_printf(m, "%pi6", &rt->rt6i_gateway);
} else {
seq_puts(m, "00000000000000000000000000000000");
}
@@ -2873,6 +2899,10 @@ struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
+
+ /* Don't export sysctls to unprivileged users */
+ if (net->user_ns != &init_user_ns)
+ table[0].procname = NULL;
}
return table;
@@ -2964,8 +2994,8 @@ static void __net_exit ip6_route_net_exit(struct net *net)
static int __net_init ip6_route_net_init_late(struct net *net)
{
#ifdef CONFIG_PROC_FS
- proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
- proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
+ proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
+ proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
#endif
return 0;
}
@@ -2973,8 +3003,8 @@ static int __net_init ip6_route_net_init_late(struct net *net)
static void __net_exit ip6_route_net_exit_late(struct net *net)
{
#ifdef CONFIG_PROC_FS
- proc_net_remove(net, "ipv6_route");
- proc_net_remove(net, "rt6_stats");
+ remove_proc_entry("ipv6_route", net->proc_net);
+ remove_proc_entry("rt6_stats", net->proc_net);
#endif
}
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 3ed54ffd8d50..02f96dcbcf02 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -65,9 +65,16 @@
#define HASH_SIZE 16
#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
+static bool log_ecn_error = true;
+module_param(log_ecn_error, bool, 0644);
+MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
+
static int ipip6_tunnel_init(struct net_device *dev);
static void ipip6_tunnel_setup(struct net_device *dev);
static void ipip6_dev_free(struct net_device *dev);
+static bool check_6rd(struct ip_tunnel *tunnel, const struct in6_addr *v6dst,
+ __be32 *v4dst);
+static struct rtnl_link_ops sit_link_ops __read_mostly;
static int sit_net_id __read_mostly;
struct sit_net {
@@ -80,22 +87,6 @@ struct sit_net {
struct net_device *fb_tunnel_dev;
};
-/*
- * Locking : hash tables are protected by RCU and RTNL
- */
-
-#define for_each_ip_tunnel_rcu(start) \
- for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
-
-/* often modified stats are per cpu, other are shared (netdev->stats) */
-struct pcpu_tstats {
- u64 rx_packets;
- u64 rx_bytes;
- u64 tx_packets;
- u64 tx_bytes;
- struct u64_stats_sync syncp;
-};
-
static struct rtnl_link_stats64 *ipip6_get_stats64(struct net_device *dev,
struct rtnl_link_stats64 *tot)
{
@@ -121,6 +112,7 @@ static struct rtnl_link_stats64 *ipip6_get_stats64(struct net_device *dev,
}
tot->rx_errors = dev->stats.rx_errors;
+ tot->rx_frame_errors = dev->stats.rx_frame_errors;
tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
tot->tx_dropped = dev->stats.tx_dropped;
@@ -141,20 +133,20 @@ static struct ip_tunnel *ipip6_tunnel_lookup(struct net *net,
struct ip_tunnel *t;
struct sit_net *sitn = net_generic(net, sit_net_id);
- for_each_ip_tunnel_rcu(sitn->tunnels_r_l[h0 ^ h1]) {
+ for_each_ip_tunnel_rcu(t, sitn->tunnels_r_l[h0 ^ h1]) {
if (local == t->parms.iph.saddr &&
remote == t->parms.iph.daddr &&
(!dev || !t->parms.link || dev->iflink == t->parms.link) &&
(t->dev->flags & IFF_UP))
return t;
}
- for_each_ip_tunnel_rcu(sitn->tunnels_r[h0]) {
+ for_each_ip_tunnel_rcu(t, sitn->tunnels_r[h0]) {
if (remote == t->parms.iph.daddr &&
(!dev || !t->parms.link || dev->iflink == t->parms.link) &&
(t->dev->flags & IFF_UP))
return t;
}
- for_each_ip_tunnel_rcu(sitn->tunnels_l[h1]) {
+ for_each_ip_tunnel_rcu(t, sitn->tunnels_l[h1]) {
if (local == t->parms.iph.saddr &&
(!dev || !t->parms.link || dev->iflink == t->parms.link) &&
(t->dev->flags & IFF_UP))
@@ -231,6 +223,37 @@ static void ipip6_tunnel_clone_6rd(struct net_device *dev, struct sit_net *sitn)
#endif
}
+static int ipip6_tunnel_create(struct net_device *dev)
+{
+ struct ip_tunnel *t = netdev_priv(dev);
+ struct net *net = dev_net(dev);
+ struct sit_net *sitn = net_generic(net, sit_net_id);
+ int err;
+
+ err = ipip6_tunnel_init(dev);
+ if (err < 0)
+ goto out;
+ ipip6_tunnel_clone_6rd(dev, sitn);
+
+ if ((__force u16)t->parms.i_flags & SIT_ISATAP)
+ dev->priv_flags |= IFF_ISATAP;
+
+ err = register_netdevice(dev);
+ if (err < 0)
+ goto out;
+
+ strcpy(t->parms.name, dev->name);
+ dev->rtnl_link_ops = &sit_link_ops;
+
+ dev_hold(dev);
+
+ ipip6_tunnel_link(sitn, t);
+ return 0;
+
+out:
+ return err;
+}
+
static struct ip_tunnel *ipip6_tunnel_locate(struct net *net,
struct ip_tunnel_parm *parms, int create)
{
@@ -271,21 +294,9 @@ static struct ip_tunnel *ipip6_tunnel_locate(struct net *net,
nt = netdev_priv(dev);
nt->parms = *parms;
- if (ipip6_tunnel_init(dev) < 0)
- goto failed_free;
- ipip6_tunnel_clone_6rd(dev, sitn);
-
- if (parms->i_flags & SIT_ISATAP)
- dev->priv_flags |= IFF_ISATAP;
-
- if (register_netdevice(dev) < 0)
+ if (ipip6_tunnel_create(dev) < 0)
goto failed_free;
- strcpy(nt->parms.name, dev->name);
-
- dev_hold(dev);
-
- ipip6_tunnel_link(sitn, nt);
return nt;
failed_free:
@@ -581,21 +592,20 @@ out:
return err;
}
-static inline void ipip6_ecn_decapsulate(const struct iphdr *iph, struct sk_buff *skb)
+static inline bool is_spoofed_6rd(struct ip_tunnel *tunnel, const __be32 v4addr,
+ const struct in6_addr *v6addr)
{
- if (INET_ECN_is_ce(iph->tos))
- IP6_ECN_set_ce(ipv6_hdr(skb));
+ __be32 v4embed = 0;
+ if (check_6rd(tunnel, v6addr, &v4embed) && v4addr != v4embed)
+ return true;
+ return false;
}
static int ipip6_rcv(struct sk_buff *skb)
{
- const struct iphdr *iph;
+ const struct iphdr *iph = ip_hdr(skb);
struct ip_tunnel *tunnel;
-
- if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
- goto out;
-
- iph = ip_hdr(skb);
+ int err;
tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev,
iph->saddr, iph->daddr);
@@ -609,21 +619,39 @@ static int ipip6_rcv(struct sk_buff *skb)
skb->protocol = htons(ETH_P_IPV6);
skb->pkt_type = PACKET_HOST;
- if ((tunnel->dev->priv_flags & IFF_ISATAP) &&
- !isatap_chksrc(skb, iph, tunnel)) {
- tunnel->dev->stats.rx_errors++;
- kfree_skb(skb);
- return 0;
+ if (tunnel->dev->priv_flags & IFF_ISATAP) {
+ if (!isatap_chksrc(skb, iph, tunnel)) {
+ tunnel->dev->stats.rx_errors++;
+ goto out;
+ }
+ } else {
+ if (is_spoofed_6rd(tunnel, iph->saddr,
+ &ipv6_hdr(skb)->saddr) ||
+ is_spoofed_6rd(tunnel, iph->daddr,
+ &ipv6_hdr(skb)->daddr)) {
+ tunnel->dev->stats.rx_errors++;
+ goto out;
+ }
+ }
+
+ __skb_tunnel_rx(skb, tunnel->dev);
+
+ err = IP_ECN_decapsulate(iph, skb);
+ if (unlikely(err)) {
+ if (log_ecn_error)
+ net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
+ &iph->saddr, iph->tos);
+ if (err > 1) {
+ ++tunnel->dev->stats.rx_frame_errors;
+ ++tunnel->dev->stats.rx_errors;
+ goto out;
+ }
}
tstats = this_cpu_ptr(tunnel->dev->tstats);
tstats->rx_packets++;
tstats->rx_bytes += skb->len;
- __skb_tunnel_rx(skb, tunnel->dev);
-
- ipip6_ecn_decapsulate(iph, skb);
-
netif_rx(skb);
return 0;
@@ -637,14 +665,12 @@ out:
}
/*
- * Returns the embedded IPv4 address if the IPv6 address
- * comes from 6rd / 6to4 (RFC 3056) addr space.
+ * If the IPv6 address comes from 6rd / 6to4 (RFC 3056) addr space this function
+ * stores the embedded IPv4 address in v4dst and returns true.
*/
-static inline
-__be32 try_6rd(const struct in6_addr *v6dst, struct ip_tunnel *tunnel)
+static bool check_6rd(struct ip_tunnel *tunnel, const struct in6_addr *v6dst,
+ __be32 *v4dst)
{
- __be32 dst = 0;
-
#ifdef CONFIG_IPV6_SIT_6RD
if (ipv6_prefix_equal(v6dst, &tunnel->ip6rd.prefix,
tunnel->ip6rd.prefixlen)) {
@@ -663,14 +689,24 @@ __be32 try_6rd(const struct in6_addr *v6dst, struct ip_tunnel *tunnel)
d |= ntohl(v6dst->s6_addr32[pbw0 + 1]) >>
(32 - pbi1);
- dst = tunnel->ip6rd.relay_prefix | htonl(d);
+ *v4dst = tunnel->ip6rd.relay_prefix | htonl(d);
+ return true;
}
#else
if (v6dst->s6_addr16[0] == htons(0x2002)) {
/* 6to4 v6 addr has 16 bits prefix, 32 v4addr, 16 SLA, ... */
- memcpy(&dst, &v6dst->s6_addr16[1], 4);
+ memcpy(v4dst, &v6dst->s6_addr16[1], 4);
+ return true;
}
#endif
+ return false;
+}
+
+static inline __be32 try_6rd(struct ip_tunnel *tunnel,
+ const struct in6_addr *v6dst)
+{
+ __be32 dst = 0;
+ check_6rd(tunnel, v6dst, &dst);
return dst;
}
@@ -683,7 +719,6 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
struct net_device *dev)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
- struct pcpu_tstats *tstats;
const struct iphdr *tiph = &tunnel->parms.iph;
const struct ipv6hdr *iph6 = ipv6_hdr(skb);
u8 tos = tunnel->parms.iph.tos;
@@ -732,7 +767,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
}
if (!dst)
- dst = try_6rd(&iph6->daddr, tunnel);
+ dst = try_6rd(tunnel, &iph6->daddr);
if (!dst) {
struct neighbour *neigh = NULL;
@@ -864,9 +899,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
if ((iph->ttl = tiph->ttl) == 0)
iph->ttl = iph6->hop_limit;
- nf_reset(skb);
- tstats = this_cpu_ptr(dev->tstats);
- __IPTUNNEL_XMIT(tstats, &dev->stats);
+ iptunnel_xmit(skb, dev);
return NETDEV_TX_OK;
tx_error_icmp:
@@ -914,6 +947,59 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev)
dev->iflink = tunnel->parms.link;
}
+static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p)
+{
+ struct net *net = dev_net(t->dev);
+ struct sit_net *sitn = net_generic(net, sit_net_id);
+
+ ipip6_tunnel_unlink(sitn, t);
+ synchronize_net();
+ t->parms.iph.saddr = p->iph.saddr;
+ t->parms.iph.daddr = p->iph.daddr;
+ memcpy(t->dev->dev_addr, &p->iph.saddr, 4);
+ memcpy(t->dev->broadcast, &p->iph.daddr, 4);
+ ipip6_tunnel_link(sitn, t);
+ t->parms.iph.ttl = p->iph.ttl;
+ t->parms.iph.tos = p->iph.tos;
+ if (t->parms.link != p->link) {
+ t->parms.link = p->link;
+ ipip6_tunnel_bind_dev(t->dev);
+ }
+ netdev_state_change(t->dev);
+}
+
+#ifdef CONFIG_IPV6_SIT_6RD
+static int ipip6_tunnel_update_6rd(struct ip_tunnel *t,
+ struct ip_tunnel_6rd *ip6rd)
+{
+ struct in6_addr prefix;
+ __be32 relay_prefix;
+
+ if (ip6rd->relay_prefixlen > 32 ||
+ ip6rd->prefixlen + (32 - ip6rd->relay_prefixlen) > 64)
+ return -EINVAL;
+
+ ipv6_addr_prefix(&prefix, &ip6rd->prefix, ip6rd->prefixlen);
+ if (!ipv6_addr_equal(&prefix, &ip6rd->prefix))
+ return -EINVAL;
+ if (ip6rd->relay_prefixlen)
+ relay_prefix = ip6rd->relay_prefix &
+ htonl(0xffffffffUL <<
+ (32 - ip6rd->relay_prefixlen));
+ else
+ relay_prefix = 0;
+ if (relay_prefix != ip6rd->relay_prefix)
+ return -EINVAL;
+
+ t->ip6rd.prefix = prefix;
+ t->ip6rd.relay_prefix = relay_prefix;
+ t->ip6rd.prefixlen = ip6rd->prefixlen;
+ t->ip6rd.relay_prefixlen = ip6rd->relay_prefixlen;
+ netdev_state_change(t->dev);
+ return 0;
+}
+#endif
+
static int
ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
{
@@ -966,7 +1052,7 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
case SIOCADDTUNNEL:
case SIOCCHGTUNNEL:
err = -EPERM;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
goto done;
err = -EFAULT;
@@ -995,28 +1081,13 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
break;
}
t = netdev_priv(dev);
- ipip6_tunnel_unlink(sitn, t);
- synchronize_net();
- t->parms.iph.saddr = p.iph.saddr;
- t->parms.iph.daddr = p.iph.daddr;
- memcpy(dev->dev_addr, &p.iph.saddr, 4);
- memcpy(dev->broadcast, &p.iph.daddr, 4);
- ipip6_tunnel_link(sitn, t);
- netdev_state_change(dev);
}
+
+ ipip6_tunnel_update(t, &p);
}
if (t) {
err = 0;
- if (cmd == SIOCCHGTUNNEL) {
- t->parms.iph.ttl = p.iph.ttl;
- t->parms.iph.tos = p.iph.tos;
- if (t->parms.link != p.link) {
- t->parms.link = p.link;
- ipip6_tunnel_bind_dev(dev);
- netdev_state_change(dev);
- }
- }
if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
err = -EFAULT;
} else
@@ -1025,7 +1096,7 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
case SIOCDELTUNNEL:
err = -EPERM;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
goto done;
if (dev == sitn->fb_tunnel_dev) {
@@ -1058,7 +1129,7 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
case SIOCDELPRL:
case SIOCCHGPRL:
err = -EPERM;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
goto done;
err = -EINVAL;
if (dev == sitn->fb_tunnel_dev)
@@ -1087,7 +1158,7 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
case SIOCCHG6RD:
case SIOCDEL6RD:
err = -EPERM;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
goto done;
err = -EFAULT;
@@ -1098,31 +1169,9 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
t = netdev_priv(dev);
if (cmd != SIOCDEL6RD) {
- struct in6_addr prefix;
- __be32 relay_prefix;
-
- err = -EINVAL;
- if (ip6rd.relay_prefixlen > 32 ||
- ip6rd.prefixlen + (32 - ip6rd.relay_prefixlen) > 64)
- goto done;
-
- ipv6_addr_prefix(&prefix, &ip6rd.prefix,
- ip6rd.prefixlen);
- if (!ipv6_addr_equal(&prefix, &ip6rd.prefix))
+ err = ipip6_tunnel_update_6rd(t, &ip6rd);
+ if (err < 0)
goto done;
- if (ip6rd.relay_prefixlen)
- relay_prefix = ip6rd.relay_prefix &
- htonl(0xffffffffUL <<
- (32 - ip6rd.relay_prefixlen));
- else
- relay_prefix = 0;
- if (relay_prefix != ip6rd.relay_prefix)
- goto done;
-
- t->ip6rd.prefix = prefix;
- t->ip6rd.relay_prefix = relay_prefix;
- t->ip6rd.prefixlen = ip6rd.prefixlen;
- t->ip6rd.relay_prefixlen = ip6rd.relay_prefixlen;
} else
ipip6_tunnel_clone_6rd(dev, sitn);
@@ -1216,6 +1265,239 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev)
return 0;
}
+static void ipip6_netlink_parms(struct nlattr *data[],
+ struct ip_tunnel_parm *parms)
+{
+ memset(parms, 0, sizeof(*parms));
+
+ parms->iph.version = 4;
+ parms->iph.protocol = IPPROTO_IPV6;
+ parms->iph.ihl = 5;
+ parms->iph.ttl = 64;
+
+ if (!data)
+ return;
+
+ if (data[IFLA_IPTUN_LINK])
+ parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]);
+
+ if (data[IFLA_IPTUN_LOCAL])
+ parms->iph.saddr = nla_get_be32(data[IFLA_IPTUN_LOCAL]);
+
+ if (data[IFLA_IPTUN_REMOTE])
+ parms->iph.daddr = nla_get_be32(data[IFLA_IPTUN_REMOTE]);
+
+ if (data[IFLA_IPTUN_TTL]) {
+ parms->iph.ttl = nla_get_u8(data[IFLA_IPTUN_TTL]);
+ if (parms->iph.ttl)
+ parms->iph.frag_off = htons(IP_DF);
+ }
+
+ if (data[IFLA_IPTUN_TOS])
+ parms->iph.tos = nla_get_u8(data[IFLA_IPTUN_TOS]);
+
+ if (!data[IFLA_IPTUN_PMTUDISC] || nla_get_u8(data[IFLA_IPTUN_PMTUDISC]))
+ parms->iph.frag_off = htons(IP_DF);
+
+ if (data[IFLA_IPTUN_FLAGS])
+ parms->i_flags = nla_get_be16(data[IFLA_IPTUN_FLAGS]);
+}
+
+#ifdef CONFIG_IPV6_SIT_6RD
+/* This function returns true when 6RD attributes are present in the nl msg */
+static bool ipip6_netlink_6rd_parms(struct nlattr *data[],
+ struct ip_tunnel_6rd *ip6rd)
+{
+ bool ret = false;
+ memset(ip6rd, 0, sizeof(*ip6rd));
+
+ if (!data)
+ return ret;
+
+ if (data[IFLA_IPTUN_6RD_PREFIX]) {
+ ret = true;
+ nla_memcpy(&ip6rd->prefix, data[IFLA_IPTUN_6RD_PREFIX],
+ sizeof(struct in6_addr));
+ }
+
+ if (data[IFLA_IPTUN_6RD_RELAY_PREFIX]) {
+ ret = true;
+ ip6rd->relay_prefix =
+ nla_get_be32(data[IFLA_IPTUN_6RD_RELAY_PREFIX]);
+ }
+
+ if (data[IFLA_IPTUN_6RD_PREFIXLEN]) {
+ ret = true;
+ ip6rd->prefixlen = nla_get_u16(data[IFLA_IPTUN_6RD_PREFIXLEN]);
+ }
+
+ if (data[IFLA_IPTUN_6RD_RELAY_PREFIXLEN]) {
+ ret = true;
+ ip6rd->relay_prefixlen =
+ nla_get_u16(data[IFLA_IPTUN_6RD_RELAY_PREFIXLEN]);
+ }
+
+ return ret;
+}
+#endif
+
+static int ipip6_newlink(struct net *src_net, struct net_device *dev,
+ struct nlattr *tb[], struct nlattr *data[])
+{
+ struct net *net = dev_net(dev);
+ struct ip_tunnel *nt;
+#ifdef CONFIG_IPV6_SIT_6RD
+ struct ip_tunnel_6rd ip6rd;
+#endif
+ int err;
+
+ nt = netdev_priv(dev);
+ ipip6_netlink_parms(data, &nt->parms);
+
+ if (ipip6_tunnel_locate(net, &nt->parms, 0))
+ return -EEXIST;
+
+ err = ipip6_tunnel_create(dev);
+ if (err < 0)
+ return err;
+
+#ifdef CONFIG_IPV6_SIT_6RD
+ if (ipip6_netlink_6rd_parms(data, &ip6rd))
+ err = ipip6_tunnel_update_6rd(nt, &ip6rd);
+#endif
+
+ return err;
+}
+
+static int ipip6_changelink(struct net_device *dev, struct nlattr *tb[],
+ struct nlattr *data[])
+{
+ struct ip_tunnel *t;
+ struct ip_tunnel_parm p;
+ struct net *net = dev_net(dev);
+ struct sit_net *sitn = net_generic(net, sit_net_id);
+#ifdef CONFIG_IPV6_SIT_6RD
+ struct ip_tunnel_6rd ip6rd;
+#endif
+
+ if (dev == sitn->fb_tunnel_dev)
+ return -EINVAL;
+
+ ipip6_netlink_parms(data, &p);
+
+ if (((dev->flags & IFF_POINTOPOINT) && !p.iph.daddr) ||
+ (!(dev->flags & IFF_POINTOPOINT) && p.iph.daddr))
+ return -EINVAL;
+
+ t = ipip6_tunnel_locate(net, &p, 0);
+
+ if (t) {
+ if (t->dev != dev)
+ return -EEXIST;
+ } else
+ t = netdev_priv(dev);
+
+ ipip6_tunnel_update(t, &p);
+
+#ifdef CONFIG_IPV6_SIT_6RD
+ if (ipip6_netlink_6rd_parms(data, &ip6rd))
+ return ipip6_tunnel_update_6rd(t, &ip6rd);
+#endif
+
+ return 0;
+}
+
+static size_t ipip6_get_size(const struct net_device *dev)
+{
+ return
+ /* IFLA_IPTUN_LINK */
+ nla_total_size(4) +
+ /* IFLA_IPTUN_LOCAL */
+ nla_total_size(4) +
+ /* IFLA_IPTUN_REMOTE */
+ nla_total_size(4) +
+ /* IFLA_IPTUN_TTL */
+ nla_total_size(1) +
+ /* IFLA_IPTUN_TOS */
+ nla_total_size(1) +
+ /* IFLA_IPTUN_PMTUDISC */
+ nla_total_size(1) +
+ /* IFLA_IPTUN_FLAGS */
+ nla_total_size(2) +
+#ifdef CONFIG_IPV6_SIT_6RD
+ /* IFLA_IPTUN_6RD_PREFIX */
+ nla_total_size(sizeof(struct in6_addr)) +
+ /* IFLA_IPTUN_6RD_RELAY_PREFIX */
+ nla_total_size(4) +
+ /* IFLA_IPTUN_6RD_PREFIXLEN */
+ nla_total_size(2) +
+ /* IFLA_IPTUN_6RD_RELAY_PREFIXLEN */
+ nla_total_size(2) +
+#endif
+ 0;
+}
+
+static int ipip6_fill_info(struct sk_buff *skb, const struct net_device *dev)
+{
+ struct ip_tunnel *tunnel = netdev_priv(dev);
+ struct ip_tunnel_parm *parm = &tunnel->parms;
+
+ if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) ||
+ nla_put_be32(skb, IFLA_IPTUN_LOCAL, parm->iph.saddr) ||
+ nla_put_be32(skb, IFLA_IPTUN_REMOTE, parm->iph.daddr) ||
+ nla_put_u8(skb, IFLA_IPTUN_TTL, parm->iph.ttl) ||
+ nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos) ||
+ nla_put_u8(skb, IFLA_IPTUN_PMTUDISC,
+ !!(parm->iph.frag_off & htons(IP_DF))) ||
+ nla_put_be16(skb, IFLA_IPTUN_FLAGS, parm->i_flags))
+ goto nla_put_failure;
+
+#ifdef CONFIG_IPV6_SIT_6RD
+ if (nla_put(skb, IFLA_IPTUN_6RD_PREFIX, sizeof(struct in6_addr),
+ &tunnel->ip6rd.prefix) ||
+ nla_put_be32(skb, IFLA_IPTUN_6RD_RELAY_PREFIX,
+ tunnel->ip6rd.relay_prefix) ||
+ nla_put_u16(skb, IFLA_IPTUN_6RD_PREFIXLEN,
+ tunnel->ip6rd.prefixlen) ||
+ nla_put_u16(skb, IFLA_IPTUN_6RD_RELAY_PREFIXLEN,
+ tunnel->ip6rd.relay_prefixlen))
+ goto nla_put_failure;
+#endif
+
+ return 0;
+
+nla_put_failure:
+ return -EMSGSIZE;
+}
+
+static const struct nla_policy ipip6_policy[IFLA_IPTUN_MAX + 1] = {
+ [IFLA_IPTUN_LINK] = { .type = NLA_U32 },
+ [IFLA_IPTUN_LOCAL] = { .type = NLA_U32 },
+ [IFLA_IPTUN_REMOTE] = { .type = NLA_U32 },
+ [IFLA_IPTUN_TTL] = { .type = NLA_U8 },
+ [IFLA_IPTUN_TOS] = { .type = NLA_U8 },
+ [IFLA_IPTUN_PMTUDISC] = { .type = NLA_U8 },
+ [IFLA_IPTUN_FLAGS] = { .type = NLA_U16 },
+#ifdef CONFIG_IPV6_SIT_6RD
+ [IFLA_IPTUN_6RD_PREFIX] = { .len = sizeof(struct in6_addr) },
+ [IFLA_IPTUN_6RD_RELAY_PREFIX] = { .type = NLA_U32 },
+ [IFLA_IPTUN_6RD_PREFIXLEN] = { .type = NLA_U16 },
+ [IFLA_IPTUN_6RD_RELAY_PREFIXLEN] = { .type = NLA_U16 },
+#endif
+};
+
+static struct rtnl_link_ops sit_link_ops __read_mostly = {
+ .kind = "sit",
+ .maxtype = IFLA_IPTUN_MAX,
+ .policy = ipip6_policy,
+ .priv_size = sizeof(struct ip_tunnel),
+ .setup = ipip6_tunnel_setup,
+ .newlink = ipip6_newlink,
+ .changelink = ipip6_changelink,
+ .get_size = ipip6_get_size,
+ .fill_info = ipip6_fill_info,
+};
+
static struct xfrm_tunnel sit_handler __read_mostly = {
.handler = ipip6_rcv,
.err_handler = ipip6_err,
@@ -1302,6 +1584,7 @@ static struct pernet_operations sit_net_ops = {
static void __exit sit_cleanup(void)
{
+ rtnl_link_unregister(&sit_link_ops);
xfrm4_tunnel_deregister(&sit_handler, AF_INET6);
unregister_pernet_device(&sit_net_ops);
@@ -1319,10 +1602,21 @@ static int __init sit_init(void)
return err;
err = xfrm4_tunnel_register(&sit_handler, AF_INET6);
if (err < 0) {
- unregister_pernet_device(&sit_net_ops);
pr_info("%s: can't add protocol\n", __func__);
+ goto xfrm_tunnel_failed;
}
+ err = rtnl_link_register(&sit_link_ops);
+ if (err < 0)
+ goto rtnl_link_failed;
+
+out:
return err;
+
+rtnl_link_failed:
+ xfrm4_tunnel_deregister(&sit_handler, AF_INET6);
+xfrm_tunnel_failed:
+ unregister_pernet_device(&sit_net_ops);
+ goto out;
}
module_init(sit_init);
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 182ab9a85d6c..8a0848b60b35 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -179,7 +179,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
memset(&tcp_opt, 0, sizeof(tcp_opt));
tcp_parse_options(skb, &tcp_opt, &hash_location, 0, NULL);
- if (!cookie_check_timestamp(&tcp_opt, &ecn_ok))
+ if (!cookie_check_timestamp(&tcp_opt, sock_net(sk), &ecn_ok))
goto out;
ret = NULL;
@@ -214,7 +214,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
ireq6->iif = inet6_iif(skb);
req->expires = 0UL;
- req->retrans = 0;
+ req->num_retrans = 0;
ireq->ecn_ok = ecn_ok;
ireq->snd_wscale = tcp_opt.snd_wscale;
ireq->sack_ok = tcp_opt.sack_ok;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 26175bffbaa0..9b6460055df5 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -77,9 +77,6 @@ static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
struct request_sock *req);
static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
-static void __tcp_v6_send_check(struct sk_buff *skb,
- const struct in6_addr *saddr,
- const struct in6_addr *daddr);
static const struct inet_connection_sock_af_ops ipv6_mapped;
static const struct inet_connection_sock_af_ops ipv6_specific;
@@ -119,14 +116,6 @@ static void tcp_v6_hash(struct sock *sk)
}
}
-static __inline__ __sum16 tcp_v6_check(int len,
- const struct in6_addr *saddr,
- const struct in6_addr *daddr,
- __wsum base)
-{
- return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
-}
-
static __u32 tcp_v6_init_sequence(const struct sk_buff *skb)
{
return secure_tcpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32,
@@ -306,7 +295,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
if (err)
goto late_failure;
- if (!tp->write_seq)
+ if (!tp->write_seq && likely(!tp->repair))
tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
np->daddr.s6_addr32,
inet->inet_sport,
@@ -434,6 +423,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
}
inet_csk_reqsk_queue_drop(sk, req, prev);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
goto out;
case TCP_SYN_SENT:
@@ -495,9 +485,12 @@ static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req,
struct request_values *rvp)
{
struct flowi6 fl6;
+ int res;
- TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
- return tcp_v6_send_synack(sk, NULL, &fl6, req, rvp, 0);
+ res = tcp_v6_send_synack(sk, NULL, &fl6, req, rvp, 0);
+ if (!res)
+ TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
+ return res;
}
static void tcp_v6_reqsk_destructor(struct request_sock *req)
@@ -719,96 +712,9 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
};
#endif
-static void __tcp_v6_send_check(struct sk_buff *skb,
- const struct in6_addr *saddr, const struct in6_addr *daddr)
-{
- struct tcphdr *th = tcp_hdr(skb);
-
- if (skb->ip_summed == CHECKSUM_PARTIAL) {
- th->check = ~tcp_v6_check(skb->len, saddr, daddr, 0);
- skb->csum_start = skb_transport_header(skb) - skb->head;
- skb->csum_offset = offsetof(struct tcphdr, check);
- } else {
- th->check = tcp_v6_check(skb->len, saddr, daddr,
- csum_partial(th, th->doff << 2,
- skb->csum));
- }
-}
-
-static void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
-{
- struct ipv6_pinfo *np = inet6_sk(sk);
-
- __tcp_v6_send_check(skb, &np->saddr, &np->daddr);
-}
-
-static int tcp_v6_gso_send_check(struct sk_buff *skb)
-{
- const struct ipv6hdr *ipv6h;
- struct tcphdr *th;
-
- if (!pskb_may_pull(skb, sizeof(*th)))
- return -EINVAL;
-
- ipv6h = ipv6_hdr(skb);
- th = tcp_hdr(skb);
-
- th->check = 0;
- skb->ip_summed = CHECKSUM_PARTIAL;
- __tcp_v6_send_check(skb, &ipv6h->saddr, &ipv6h->daddr);
- return 0;
-}
-
-static struct sk_buff **tcp6_gro_receive(struct sk_buff **head,
- struct sk_buff *skb)
-{
- const struct ipv6hdr *iph = skb_gro_network_header(skb);
- __wsum wsum;
- __sum16 sum;
-
- switch (skb->ip_summed) {
- case CHECKSUM_COMPLETE:
- if (!tcp_v6_check(skb_gro_len(skb), &iph->saddr, &iph->daddr,
- skb->csum)) {
- skb->ip_summed = CHECKSUM_UNNECESSARY;
- break;
- }
-flush:
- NAPI_GRO_CB(skb)->flush = 1;
- return NULL;
-
- case CHECKSUM_NONE:
- wsum = ~csum_unfold(csum_ipv6_magic(&iph->saddr, &iph->daddr,
- skb_gro_len(skb),
- IPPROTO_TCP, 0));
- sum = csum_fold(skb_checksum(skb,
- skb_gro_offset(skb),
- skb_gro_len(skb),
- wsum));
- if (sum)
- goto flush;
-
- skb->ip_summed = CHECKSUM_UNNECESSARY;
- break;
- }
-
- return tcp_gro_receive(head, skb);
-}
-
-static int tcp6_gro_complete(struct sk_buff *skb)
-{
- const struct ipv6hdr *iph = ipv6_hdr(skb);
- struct tcphdr *th = tcp_hdr(skb);
-
- th->check = ~tcp_v6_check(skb->len - skb_transport_offset(skb),
- &iph->saddr, &iph->daddr, 0);
- skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
-
- return tcp_gro_complete(skb);
-}
-
static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
- u32 ts, struct tcp_md5sig_key *key, int rst, u8 tclass)
+ u32 tsval, u32 tsecr,
+ struct tcp_md5sig_key *key, int rst, u8 tclass)
{
const struct tcphdr *th = tcp_hdr(skb);
struct tcphdr *t1;
@@ -820,7 +726,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
struct dst_entry *dst;
__be32 *topt;
- if (ts)
+ if (tsecr)
tot_len += TCPOLEN_TSTAMP_ALIGNED;
#ifdef CONFIG_TCP_MD5SIG
if (key)
@@ -850,11 +756,11 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
topt = (__be32 *)(t1 + 1);
- if (ts) {
+ if (tsecr) {
*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
(TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
- *topt++ = htonl(tcp_time_stamp);
- *topt++ = htonl(ts);
+ *topt++ = htonl(tsval);
+ *topt++ = htonl(tsecr);
}
#ifdef CONFIG_TCP_MD5SIG
@@ -930,7 +836,8 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
* no RST generated if md5 hash doesn't match.
*/
sk1 = inet6_lookup_listener(dev_net(skb_dst(skb)->dev),
- &tcp_hashinfo, &ipv6h->daddr,
+ &tcp_hashinfo, &ipv6h->saddr,
+ th->source, &ipv6h->daddr,
ntohs(th->source), inet6_iif(skb));
if (!sk1)
return;
@@ -954,7 +861,7 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
(th->doff << 2);
- tcp_v6_send_response(skb, seq, ack_seq, 0, 0, key, 1, 0);
+ tcp_v6_send_response(skb, seq, ack_seq, 0, 0, 0, key, 1, 0);
#ifdef CONFIG_TCP_MD5SIG
release_sk1:
@@ -965,10 +872,11 @@ release_sk1:
#endif
}
-static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts,
+static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
+ u32 win, u32 tsval, u32 tsecr,
struct tcp_md5sig_key *key, u8 tclass)
{
- tcp_v6_send_response(skb, seq, ack, win, ts, key, 0, tclass);
+ tcp_v6_send_response(skb, seq, ack, win, tsval, tsecr, key, 0, tclass);
}
static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
@@ -978,6 +886,7 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
+ tcp_time_stamp + tcptw->tw_ts_offset,
tcptw->tw_ts_recent, tcp_twsk_md5_key(tcptw),
tw->tw_tclass);
@@ -987,7 +896,8 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
struct request_sock *req)
{
- tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent,
+ tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1,
+ req->rcv_wnd, tcp_time_stamp, req->ts_recent,
tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr), 0);
}
@@ -1054,8 +964,10 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
goto drop;
}
- if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
+ if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) {
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
goto drop;
+ }
req = inet6_reqsk_alloc(&tcp6_request_sock_ops);
if (req == NULL)
@@ -1123,7 +1035,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
treq->rmt_addr = ipv6_hdr(skb)->saddr;
treq->loc_addr = ipv6_hdr(skb)->daddr;
if (!want_cookie || tmp_opt.tstamp_ok)
- TCP_ECN_create_request(req, skb);
+ TCP_ECN_create_request(req, skb, sock_net(sk));
treq->iif = sk->sk_bound_dev_if;
@@ -1204,6 +1116,7 @@ drop_and_release:
drop_and_free:
reqsk_free(req);
drop:
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
return 0; /* don't send reset */
}
@@ -1259,7 +1172,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
newnp->opt = NULL;
newnp->mcast_oif = inet6_iif(skb);
newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
- newnp->rcv_tclass = ipv6_tclass(ipv6_hdr(skb));
+ newnp->rcv_tclass = ipv6_get_dsfield(ipv6_hdr(skb));
/*
* No need to charge this sock to the relevant IPv6 refcnt debug socks count
@@ -1339,7 +1252,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
newnp->opt = NULL;
newnp->mcast_oif = inet6_iif(skb);
newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
- newnp->rcv_tclass = ipv6_tclass(ipv6_hdr(skb));
+ newnp->rcv_tclass = ipv6_get_dsfield(ipv6_hdr(skb));
/* Clone native IPv6 options from listening socket (if any)
@@ -1364,7 +1277,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
tcp_initialize_rcv_mss(newsk);
tcp_synack_rtt_meas(newsk, req);
- newtp->total_retrans = req->retrans;
+ newtp->total_retrans = req->num_retrans;
newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
@@ -1384,7 +1297,8 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
#endif
if (__inet_inherit_port(sk, newsk) < 0) {
- sock_put(newsk);
+ inet_csk_prepare_forced_close(newsk);
+ tcp_done(newsk);
goto out;
}
__inet6_hash(newsk, NULL);
@@ -1551,7 +1465,7 @@ ipv6_pktoptions:
if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
if (np->rxopt.bits.rxtclass)
- np->rcv_tclass = ipv6_tclass(ipv6_hdr(skb));
+ np->rcv_tclass = ipv6_get_dsfield(ipv6_hdr(skb));
if (ipv6_opt_accepted(sk, opt_skb)) {
skb_set_owner_r(opt_skb, sk);
opt_skb = xchg(&np->pktoptions, opt_skb);
@@ -1693,6 +1607,7 @@ do_time_wait:
struct sock *sk2;
sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
+ &ipv6_hdr(skb)->saddr, th->source,
&ipv6_hdr(skb)->daddr,
ntohs(th->dest), inet6_iif(skb));
if (sk2 != NULL) {
@@ -1741,11 +1656,11 @@ static void tcp_v6_early_demux(struct sk_buff *skb)
skb->destructor = sock_edemux;
if (sk->sk_state != TCP_TIME_WAIT) {
struct dst_entry *dst = sk->sk_rx_dst;
- struct inet_sock *icsk = inet_sk(sk);
+
if (dst)
dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie);
if (dst &&
- icsk->rx_dst_ifindex == skb->skb_iif)
+ inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
skb_dst_set_noref(skb, dst);
}
}
@@ -1866,7 +1781,7 @@ static void get_openreq6(struct seq_file *seq,
0,0, /* could print option size, but that is af dependent. */
1, /* timers active (only the expire timer) */
jiffies_to_clock_t(ttd),
- req->retrans,
+ req->num_timeout,
from_kuid_munged(seq_user_ns(seq), uid),
0, /* non standard timer */
0, /* open_requests have no inode */
@@ -2063,10 +1978,6 @@ static const struct inet6_protocol tcpv6_protocol = {
.early_demux = tcp_v6_early_demux,
.handler = tcp_v6_rcv,
.err_handler = tcp_v6_err,
- .gso_send_check = tcp_v6_gso_send_check,
- .gso_segment = tcp_tso_segment,
- .gro_receive = tcp6_gro_receive,
- .gro_complete = tcp6_gro_complete,
.flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
};
@@ -2121,10 +2032,10 @@ int __init tcpv6_init(void)
out:
return ret;
-out_tcpv6_protocol:
- inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
out_tcpv6_protosw:
inet6_unregister_protosw(&tcpv6_protosw);
+out_tcpv6_protocol:
+ inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
goto out;
}
diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c
new file mode 100644
index 000000000000..2ec6bf6a0aa0
--- /dev/null
+++ b/net/ipv6/tcpv6_offload.c
@@ -0,0 +1,95 @@
+/*
+ * IPV6 GSO/GRO offload support
+ * Linux INET6 implementation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * TCPv6 GSO/GRO support
+ */
+#include <linux/skbuff.h>
+#include <net/protocol.h>
+#include <net/tcp.h>
+#include <net/ip6_checksum.h>
+#include "ip6_offload.h"
+
+static int tcp_v6_gso_send_check(struct sk_buff *skb)
+{
+ const struct ipv6hdr *ipv6h;
+ struct tcphdr *th;
+
+ if (!pskb_may_pull(skb, sizeof(*th)))
+ return -EINVAL;
+
+ ipv6h = ipv6_hdr(skb);
+ th = tcp_hdr(skb);
+
+ th->check = 0;
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ __tcp_v6_send_check(skb, &ipv6h->saddr, &ipv6h->daddr);
+ return 0;
+}
+
+static struct sk_buff **tcp6_gro_receive(struct sk_buff **head,
+ struct sk_buff *skb)
+{
+ const struct ipv6hdr *iph = skb_gro_network_header(skb);
+ __wsum wsum;
+ __sum16 sum;
+
+ switch (skb->ip_summed) {
+ case CHECKSUM_COMPLETE:
+ if (!tcp_v6_check(skb_gro_len(skb), &iph->saddr, &iph->daddr,
+ skb->csum)) {
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ break;
+ }
+flush:
+ NAPI_GRO_CB(skb)->flush = 1;
+ return NULL;
+
+ case CHECKSUM_NONE:
+ wsum = ~csum_unfold(csum_ipv6_magic(&iph->saddr, &iph->daddr,
+ skb_gro_len(skb),
+ IPPROTO_TCP, 0));
+ sum = csum_fold(skb_checksum(skb,
+ skb_gro_offset(skb),
+ skb_gro_len(skb),
+ wsum));
+ if (sum)
+ goto flush;
+
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ break;
+ }
+
+ return tcp_gro_receive(head, skb);
+}
+
+static int tcp6_gro_complete(struct sk_buff *skb)
+{
+ const struct ipv6hdr *iph = ipv6_hdr(skb);
+ struct tcphdr *th = tcp_hdr(skb);
+
+ th->check = ~tcp_v6_check(skb->len - skb_transport_offset(skb),
+ &iph->saddr, &iph->daddr, 0);
+ skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
+
+ return tcp_gro_complete(skb);
+}
+
+static const struct net_offload tcpv6_offload = {
+ .callbacks = {
+ .gso_send_check = tcp_v6_gso_send_check,
+ .gso_segment = tcp_tso_segment,
+ .gro_receive = tcp6_gro_receive,
+ .gro_complete = tcp6_gro_complete,
+ },
+};
+
+int __init tcpv6_offload_init(void)
+{
+ return inet6_add_offload(&tcpv6_offload, IPPROTO_TCP);
+}
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index fc9997260a6b..599e1ba6d1ce 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -45,6 +45,7 @@
#include <net/tcp_states.h>
#include <net/ip6_checksum.h>
#include <net/xfrm.h>
+#include <net/inet6_hashtables.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
@@ -203,7 +204,8 @@ static struct sock *udp6_lib_lookup2(struct net *net,
{
struct sock *sk, *result;
struct hlist_nulls_node *node;
- int score, badness;
+ int score, badness, matches = 0, reuseport = 0;
+ u32 hash = 0;
begin:
result = NULL;
@@ -214,8 +216,18 @@ begin:
if (score > badness) {
result = sk;
badness = score;
- if (score == SCORE2_MAX)
+ reuseport = sk->sk_reuseport;
+ if (reuseport) {
+ hash = inet6_ehashfn(net, daddr, hnum,
+ saddr, sport);
+ matches = 1;
+ } else if (score == SCORE2_MAX)
goto exact_match;
+ } else if (score == badness && reuseport) {
+ matches++;
+ if (((u64)hash * matches) >> 32 == 0)
+ result = sk;
+ hash = next_pseudo_random32(hash);
}
}
/*
@@ -249,7 +261,8 @@ struct sock *__udp6_lib_lookup(struct net *net,
unsigned short hnum = ntohs(dport);
unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask);
struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];
- int score, badness;
+ int score, badness, matches = 0, reuseport = 0;
+ u32 hash = 0;
rcu_read_lock();
if (hslot->count > 10) {
@@ -284,6 +297,17 @@ begin:
if (score > badness) {
result = sk;
badness = score;
+ reuseport = sk->sk_reuseport;
+ if (reuseport) {
+ hash = inet6_ehashfn(net, daddr, hnum,
+ saddr, sport);
+ matches = 1;
+ }
+ } else if (score == badness && reuseport) {
+ matches++;
+ if (((u64)hash * matches) >> 32 == 0)
+ result = sk;
+ hash = next_pseudo_random32(hash);
}
}
/*
@@ -443,7 +467,7 @@ try_again:
ip_cmsg_recv(msg, skb);
} else {
if (np->rxopt.all)
- datagram_recv_ctl(sk, msg, skb);
+ ip6_datagram_recv_ctl(sk, msg, skb);
}
err = copied;
@@ -752,40 +776,6 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
return 0;
}
-static inline int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh,
- int proto)
-{
- int err;
-
- UDP_SKB_CB(skb)->partial_cov = 0;
- UDP_SKB_CB(skb)->cscov = skb->len;
-
- if (proto == IPPROTO_UDPLITE) {
- err = udplite_checksum_init(skb, uh);
- if (err)
- return err;
- }
-
- if (uh->check == 0) {
- /* RFC 2460 section 8.1 says that we SHOULD log
- this error. Well, it is reasonable.
- */
- LIMIT_NETDEBUG(KERN_INFO "IPv6: udp checksum is 0\n");
- return 1;
- }
- if (skb->ip_summed == CHECKSUM_COMPLETE &&
- !csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
- skb->len, proto, skb->csum))
- skb->ip_summed = CHECKSUM_UNNECESSARY;
-
- if (!skb_csum_unnecessary(skb))
- skb->csum = ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
- &ipv6_hdr(skb)->daddr,
- skb->len, proto, 0));
-
- return 0;
-}
-
int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
int proto)
{
@@ -1153,8 +1143,8 @@ do_udp_sendmsg:
memset(opt, 0, sizeof(struct ipv6_txoptions));
opt->tot_len = sizeof(*opt);
- err = datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt,
- &hlimit, &tclass, &dontfrag);
+ err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt,
+ &hlimit, &tclass, &dontfrag);
if (err < 0) {
fl6_sock_release(flowlabel);
return err;
@@ -1343,103 +1333,9 @@ int compat_udpv6_getsockopt(struct sock *sk, int level, int optname,
}
#endif
-static int udp6_ufo_send_check(struct sk_buff *skb)
-{
- const struct ipv6hdr *ipv6h;
- struct udphdr *uh;
-
- if (!pskb_may_pull(skb, sizeof(*uh)))
- return -EINVAL;
-
- ipv6h = ipv6_hdr(skb);
- uh = udp_hdr(skb);
-
- uh->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, skb->len,
- IPPROTO_UDP, 0);
- skb->csum_start = skb_transport_header(skb) - skb->head;
- skb->csum_offset = offsetof(struct udphdr, check);
- skb->ip_summed = CHECKSUM_PARTIAL;
- return 0;
-}
-
-static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
- netdev_features_t features)
-{
- struct sk_buff *segs = ERR_PTR(-EINVAL);
- unsigned int mss;
- unsigned int unfrag_ip6hlen, unfrag_len;
- struct frag_hdr *fptr;
- u8 *mac_start, *prevhdr;
- u8 nexthdr;
- u8 frag_hdr_sz = sizeof(struct frag_hdr);
- int offset;
- __wsum csum;
-
- mss = skb_shinfo(skb)->gso_size;
- if (unlikely(skb->len <= mss))
- goto out;
-
- if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) {
- /* Packet is from an untrusted source, reset gso_segs. */
- int type = skb_shinfo(skb)->gso_type;
-
- if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY) ||
- !(type & (SKB_GSO_UDP))))
- goto out;
-
- skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss);
-
- segs = NULL;
- goto out;
- }
-
- /* Do software UFO. Complete and fill in the UDP checksum as HW cannot
- * do checksum of UDP packets sent as multiple IP fragments.
- */
- offset = skb_checksum_start_offset(skb);
- csum = skb_checksum(skb, offset, skb->len - offset, 0);
- offset += skb->csum_offset;
- *(__sum16 *)(skb->data + offset) = csum_fold(csum);
- skb->ip_summed = CHECKSUM_NONE;
-
- /* Check if there is enough headroom to insert fragment header. */
- if ((skb_mac_header(skb) < skb->head + frag_hdr_sz) &&
- pskb_expand_head(skb, frag_hdr_sz, 0, GFP_ATOMIC))
- goto out;
-
- /* Find the unfragmentable header and shift it left by frag_hdr_sz
- * bytes to insert fragment header.
- */
- unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr);
- nexthdr = *prevhdr;
- *prevhdr = NEXTHDR_FRAGMENT;
- unfrag_len = skb_network_header(skb) - skb_mac_header(skb) +
- unfrag_ip6hlen;
- mac_start = skb_mac_header(skb);
- memmove(mac_start-frag_hdr_sz, mac_start, unfrag_len);
-
- skb->mac_header -= frag_hdr_sz;
- skb->network_header -= frag_hdr_sz;
-
- fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen);
- fptr->nexthdr = nexthdr;
- fptr->reserved = 0;
- ipv6_select_ident(fptr, (struct rt6_info *)skb_dst(skb));
-
- /* Fragment the skb. ipv6 header and the remaining fields of the
- * fragment header are updated in ipv6_gso_segment()
- */
- segs = skb_segment(skb, features);
-
-out:
- return segs;
-}
-
static const struct inet6_protocol udpv6_protocol = {
.handler = udpv6_rcv,
.err_handler = udpv6_err,
- .gso_send_check = udp6_ufo_send_check,
- .gso_segment = udp6_ufo_fragment,
.flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
};
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
new file mode 100644
index 000000000000..cf05cf073c51
--- /dev/null
+++ b/net/ipv6/udp_offload.c
@@ -0,0 +1,121 @@
+/*
+ * IPV6 GSO/GRO offload support
+ * Linux INET6 implementation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * UDPv6 GSO support
+ */
+#include <linux/skbuff.h>
+#include <net/protocol.h>
+#include <net/ipv6.h>
+#include <net/udp.h>
+#include <net/ip6_checksum.h>
+#include "ip6_offload.h"
+
+static int udp6_ufo_send_check(struct sk_buff *skb)
+{
+ const struct ipv6hdr *ipv6h;
+ struct udphdr *uh;
+
+ if (!pskb_may_pull(skb, sizeof(*uh)))
+ return -EINVAL;
+
+ ipv6h = ipv6_hdr(skb);
+ uh = udp_hdr(skb);
+
+ uh->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, skb->len,
+ IPPROTO_UDP, 0);
+ skb->csum_start = skb_transport_header(skb) - skb->head;
+ skb->csum_offset = offsetof(struct udphdr, check);
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ return 0;
+}
+
+static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
+ netdev_features_t features)
+{
+ struct sk_buff *segs = ERR_PTR(-EINVAL);
+ unsigned int mss;
+ unsigned int unfrag_ip6hlen, unfrag_len;
+ struct frag_hdr *fptr;
+ u8 *mac_start, *prevhdr;
+ u8 nexthdr;
+ u8 frag_hdr_sz = sizeof(struct frag_hdr);
+ int offset;
+ __wsum csum;
+
+ mss = skb_shinfo(skb)->gso_size;
+ if (unlikely(skb->len <= mss))
+ goto out;
+
+ if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) {
+ /* Packet is from an untrusted source, reset gso_segs. */
+ int type = skb_shinfo(skb)->gso_type;
+
+ if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY |
+ SKB_GSO_GRE) ||
+ !(type & (SKB_GSO_UDP))))
+ goto out;
+
+ skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss);
+
+ segs = NULL;
+ goto out;
+ }
+
+ /* Do software UFO. Complete and fill in the UDP checksum as HW cannot
+ * do checksum of UDP packets sent as multiple IP fragments.
+ */
+ offset = skb_checksum_start_offset(skb);
+ csum = skb_checksum(skb, offset, skb->len - offset, 0);
+ offset += skb->csum_offset;
+ *(__sum16 *)(skb->data + offset) = csum_fold(csum);
+ skb->ip_summed = CHECKSUM_NONE;
+
+ /* Check if there is enough headroom to insert fragment header. */
+ if ((skb_mac_header(skb) < skb->head + frag_hdr_sz) &&
+ pskb_expand_head(skb, frag_hdr_sz, 0, GFP_ATOMIC))
+ goto out;
+
+ /* Find the unfragmentable header and shift it left by frag_hdr_sz
+ * bytes to insert fragment header.
+ */
+ unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr);
+ nexthdr = *prevhdr;
+ *prevhdr = NEXTHDR_FRAGMENT;
+ unfrag_len = skb_network_header(skb) - skb_mac_header(skb) +
+ unfrag_ip6hlen;
+ mac_start = skb_mac_header(skb);
+ memmove(mac_start-frag_hdr_sz, mac_start, unfrag_len);
+
+ skb->mac_header -= frag_hdr_sz;
+ skb->network_header -= frag_hdr_sz;
+
+ fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen);
+ fptr->nexthdr = nexthdr;
+ fptr->reserved = 0;
+ ipv6_select_ident(fptr, (struct rt6_info *)skb_dst(skb));
+
+ /* Fragment the skb. ipv6 header and the remaining fields of the
+ * fragment header are updated in ipv6_gso_segment()
+ */
+ segs = skb_segment(skb, features);
+
+out:
+ return segs;
+}
+static const struct net_offload udpv6_offload = {
+ .callbacks = {
+ .gso_send_check = udp6_ufo_send_check,
+ .gso_segment = udp6_ufo_fragment,
+ },
+};
+
+int __init udp_offload_init(void)
+{
+ return inet6_add_offload(&udpv6_offload, IPPROTO_UDP);
+}
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index 9f2095b19ad0..9bf6a74a71d2 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -69,8 +69,8 @@ static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
goto out;
- if (skb_cloned(skb) &&
- (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
+ err = skb_unclone(skb, GFP_ATOMIC);
+ if (err)
goto out;
if (x->props.flags & XFRM_STATE_DECAP_DSCP)
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index f8c4c08ffb60..4ef7bdb65440 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -20,7 +20,7 @@
#include <net/ip.h>
#include <net/ipv6.h>
#include <net/ip6_route.h>
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
#include <net/mip6.h>
#endif
@@ -110,7 +110,6 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
/* Sheit... I remember I did this right. Apparently,
* it was magically lost, so this code needs audit */
- xdst->u.rt6.n = neigh_clone(rt->n);
xdst->u.rt6.rt6i_flags = rt->rt6i_flags & (RTF_ANYCAST |
RTF_LOCAL);
xdst->u.rt6.rt6i_metric = rt->rt6i_metric;
@@ -182,7 +181,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
fl6->flowi6_proto = nexthdr;
return;
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
case IPPROTO_MH:
if (!onlyproto && pskb_may_pull(skb, nh + offset + 3 - skb->data)) {
struct ip6_mh *mh;
@@ -321,27 +320,57 @@ static struct ctl_table xfrm6_policy_table[] = {
{ }
};
-static struct ctl_table_header *sysctl_hdr;
+static int __net_init xfrm6_net_init(struct net *net)
+{
+ struct ctl_table *table;
+ struct ctl_table_header *hdr;
+
+ table = xfrm6_policy_table;
+ if (!net_eq(net, &init_net)) {
+ table = kmemdup(table, sizeof(xfrm6_policy_table), GFP_KERNEL);
+ if (!table)
+ goto err_alloc;
+
+ table[0].data = &net->xfrm.xfrm6_dst_ops.gc_thresh;
+ }
+
+ hdr = register_net_sysctl(net, "net/ipv6", table);
+ if (!hdr)
+ goto err_reg;
+
+ net->ipv6.sysctl.xfrm6_hdr = hdr;
+ return 0;
+
+err_reg:
+ if (!net_eq(net, &init_net))
+ kfree(table);
+err_alloc:
+ return -ENOMEM;
+}
+
+static void __net_exit xfrm6_net_exit(struct net *net)
+{
+ struct ctl_table *table;
+
+ if (net->ipv6.sysctl.xfrm6_hdr == NULL)
+ return;
+
+ table = net->ipv6.sysctl.xfrm6_hdr->ctl_table_arg;
+ unregister_net_sysctl_table(net->ipv6.sysctl.xfrm6_hdr);
+ if (!net_eq(net, &init_net))
+ kfree(table);
+}
+
+static struct pernet_operations xfrm6_net_ops = {
+ .init = xfrm6_net_init,
+ .exit = xfrm6_net_exit,
+};
#endif
int __init xfrm6_init(void)
{
int ret;
- unsigned int gc_thresh;
-
- /*
- * We need a good default value for the xfrm6 gc threshold.
- * In ipv4 we set it to the route hash table size * 8, which
- * is half the size of the maximaum route cache for ipv4. It
- * would be good to do the same thing for v6, except the table is
- * constructed differently here. Here each table for a net namespace
- * can have FIB_TABLE_HASHSZ entries, so lets go with the same
- * computation that we used for ipv4 here. Also, lets keep the initial
- * gc_thresh to a minimum of 1024, since, the ipv6 route cache defaults
- * to that as a minimum as well
- */
- gc_thresh = FIB6_TABLE_HASHSZ * 8;
- xfrm6_dst_ops.gc_thresh = (gc_thresh < 1024) ? 1024 : gc_thresh;
+
dst_entries_init(&xfrm6_dst_ops);
ret = xfrm6_policy_init();
@@ -354,8 +383,7 @@ int __init xfrm6_init(void)
goto out_policy;
#ifdef CONFIG_SYSCTL
- sysctl_hdr = register_net_sysctl(&init_net, "net/ipv6",
- xfrm6_policy_table);
+ register_pernet_subsys(&xfrm6_net_ops);
#endif
out:
return ret;
@@ -367,10 +395,8 @@ out_policy:
void xfrm6_fini(void)
{
#ifdef CONFIG_SYSCTL
- if (sysctl_hdr)
- unregister_net_sysctl_table(sysctl_hdr);
+ unregister_pernet_subsys(&xfrm6_net_ops);
#endif
- //xfrm6_input_fini();
xfrm6_policy_fini();
xfrm6_state_fini();
dst_entries_destroy(&xfrm6_dst_ops);
diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
index 3f2f7c4ab721..d8c70b8efc24 100644
--- a/net/ipv6/xfrm6_state.c
+++ b/net/ipv6/xfrm6_state.c
@@ -101,7 +101,7 @@ static int __xfrm6_state_sort_cmp(void *p)
return 1;
else
return 3;
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
case XFRM_MODE_ROUTEOPTIMIZATION:
case XFRM_MODE_IN_TRIGGER:
return 2;
@@ -134,7 +134,7 @@ static int __xfrm6_tmpl_sort_cmp(void *p)
switch (v->mode) {
case XFRM_MODE_TRANSPORT:
return 1;
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
case XFRM_MODE_ROUTEOPTIMIZATION:
case XFRM_MODE_IN_TRIGGER:
return 2;
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index ee5a7065aacc..de2bcfaaf759 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -72,7 +72,7 @@ static inline unsigned int xfrm6_tunnel_spi_hash_byaddr(const xfrm_address_t *ad
{
unsigned int h;
- h = (__force u32)(addr->a6[0] ^ addr->a6[1] ^ addr->a6[2] ^ addr->a6[3]);
+ h = ipv6_addr_hash((const struct in6_addr *)addr);
h ^= h >> 16;
h ^= h >> 8;
h &= XFRM6_TUNNEL_SPI_BYADDR_HSIZE - 1;
@@ -89,12 +89,11 @@ static struct xfrm6_tunnel_spi *__xfrm6_tunnel_spi_lookup(struct net *net, const
{
struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net);
struct xfrm6_tunnel_spi *x6spi;
- struct hlist_node *pos;
- hlist_for_each_entry_rcu(x6spi, pos,
+ hlist_for_each_entry_rcu(x6spi,
&xfrm6_tn->spi_byaddr[xfrm6_tunnel_spi_hash_byaddr(saddr)],
list_byaddr) {
- if (memcmp(&x6spi->addr, saddr, sizeof(x6spi->addr)) == 0)
+ if (xfrm6_addr_equal(&x6spi->addr, saddr))
return x6spi;
}
@@ -120,9 +119,8 @@ static int __xfrm6_tunnel_spi_check(struct net *net, u32 spi)
struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net);
struct xfrm6_tunnel_spi *x6spi;
int index = xfrm6_tunnel_spi_hash_byspi(spi);
- struct hlist_node *pos;
- hlist_for_each_entry(x6spi, pos,
+ hlist_for_each_entry(x6spi,
&xfrm6_tn->spi_byspi[index],
list_byspi) {
if (x6spi->spi == spi)
@@ -203,15 +201,15 @@ static void xfrm6_tunnel_free_spi(struct net *net, xfrm_address_t *saddr)
{
struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net);
struct xfrm6_tunnel_spi *x6spi;
- struct hlist_node *pos, *n;
+ struct hlist_node *n;
spin_lock_bh(&xfrm6_tunnel_spi_lock);
- hlist_for_each_entry_safe(x6spi, pos, n,
+ hlist_for_each_entry_safe(x6spi, n,
&xfrm6_tn->spi_byaddr[xfrm6_tunnel_spi_hash_byaddr(saddr)],
list_byaddr)
{
- if (memcmp(&x6spi->addr, saddr, sizeof(x6spi->addr)) == 0) {
+ if (xfrm6_addr_equal(&x6spi->addr, saddr)) {
if (atomic_dec_and_test(&x6spi->refcnt)) {
hlist_del_rcu(&x6spi->list_byaddr);
hlist_del_rcu(&x6spi->list_byspi);