summaryrefslogtreecommitdiff
path: root/net/core
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-04-15 09:00:47 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2015-04-15 09:00:47 -0700
commit6c373ca89399c5a3f7ef210ad8f63dc3437da345 (patch)
tree74d1ec65087df1da1021b43ac51acc1ee8601809 /net/core
parentbb0fd7ab0986105765d11baa82e619c618a235aa (diff)
parent9f9151412dd7aae0e3f51a89ae4a1f8755fdb4d0 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: 1) Add BQL support to via-rhine, from Tino Reichardt. 2) Integrate SWITCHDEV layer support into the DSA layer, so DSA drivers can support hw switch offloading. From Floria Fainelli. 3) Allow 'ip address' commands to initiate multicast group join/leave, from Madhu Challa. 4) Many ipv4 FIB lookup optimizations from Alexander Duyck. 5) Support EBPF in cls_bpf classifier and act_bpf action, from Daniel Borkmann. 6) Remove the ugly compat support in ARP for ugly layers like ax25, rose, etc. And use this to clean up the neigh layer, then use it to implement MPLS support. All from Eric Biederman. 7) Support L3 forwarding offloading in switches, from Scott Feldman. 8) Collapse the LOCAL and MAIN ipv4 FIB tables when possible, to speed up route lookups even further. From Alexander Duyck. 9) Many improvements and bug fixes to the rhashtable implementation, from Herbert Xu and Thomas Graf. In particular, in the case where an rhashtable user bulk adds a large number of items into an empty table, we expand the table much more sanely. 10) Don't make the tcp_metrics hash table per-namespace, from Eric Biederman. 11) Extend EBPF to access SKB fields, from Alexei Starovoitov. 12) Split out new connection request sockets so that they can be established in the main hash table. Much less false sharing since hash lookups go direct to the request sockets instead of having to go first to the listener then to the request socks hashed underneath. From Eric Dumazet. 13) Add async I/O support for crytpo AF_ALG sockets, from Tadeusz Struk. 14) Support stable privacy address generation for RFC7217 in IPV6. From Hannes Frederic Sowa. 15) Hash network namespace into IP frag IDs, also from Hannes Frederic Sowa. 16) Convert PTP get/set methods to use 64-bit time, from Richard Cochran. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1816 commits) fm10k: Bump driver version to 0.15.2 fm10k: corrected VF multicast update fm10k: mbx_update_max_size does not drop all oversized messages fm10k: reset head instead of calling update_max_size fm10k: renamed mbx_tx_dropped to mbx_tx_oversized fm10k: update xcast mode before synchronizing multicast addresses fm10k: start service timer on probe fm10k: fix function header comment fm10k: comment next_vf_mbx flow fm10k: don't handle mailbox events in iov_event path and always process mailbox fm10k: use separate workqueue for fm10k driver fm10k: Set PF queues to unlimited bandwidth during virtualization fm10k: expose tx_timeout_count as an ethtool stat fm10k: only increment tx_timeout_count in Tx hang path fm10k: remove extraneous "Reset interface" message fm10k: separate PF only stats so that VF does not display them fm10k: use hw->mac.max_queues for stats fm10k: only show actual queues, not the maximum in hardware fm10k: allow creation of VLAN on default vid fm10k: fix unused warnings ...
Diffstat (limited to 'net/core')
-rw-r--r--net/core/datagram.c2
-rw-r--r--net/core/dev.c150
-rw-r--r--net/core/ethtool.c2
-rw-r--r--net/core/fib_rules.c25
-rw-r--r--net/core/filter.c460
-rw-r--r--net/core/link_watch.c4
-rw-r--r--net/core/neighbour.c112
-rw-r--r--net/core/net-sysfs.c125
-rw-r--r--net/core/net_namespace.c114
-rw-r--r--net/core/request_sock.c45
-rw-r--r--net/core/rtnetlink.c121
-rw-r--r--net/core/skbuff.c55
-rw-r--r--net/core/sock.c109
-rw-r--r--net/core/sock_diag.c37
-rw-r--r--net/core/sysctl_net_core.c2
15 files changed, 955 insertions, 408 deletions
diff --git a/net/core/datagram.c b/net/core/datagram.c
index df493d68330c..b80fb91bb3f7 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -673,7 +673,7 @@ int skb_copy_and_csum_datagram_msg(struct sk_buff *skb,
if (!chunk)
return 0;
- if (iov_iter_count(&msg->msg_iter) < chunk) {
+ if (msg_data_left(msg) < chunk) {
if (__skb_checksum_complete(skb))
goto csum_error;
if (skb_copy_datagram_msg(skb, hlen, msg, chunk))
diff --git a/net/core/dev.c b/net/core/dev.c
index 45109b70664e..af4a1b0adc10 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -660,6 +660,27 @@ __setup("netdev=", netdev_boot_setup);
*******************************************************************************/
/**
+ * dev_get_iflink - get 'iflink' value of a interface
+ * @dev: targeted interface
+ *
+ * Indicates the ifindex the interface is linked to.
+ * Physical interfaces have the same 'ifindex' and 'iflink' values.
+ */
+
+int dev_get_iflink(const struct net_device *dev)
+{
+ if (dev->netdev_ops && dev->netdev_ops->ndo_get_iflink)
+ return dev->netdev_ops->ndo_get_iflink(dev);
+
+ /* If dev->rtnl_link_ops is set, it's a virtual interface. */
+ if (dev->rtnl_link_ops)
+ return 0;
+
+ return dev->ifindex;
+}
+EXPORT_SYMBOL(dev_get_iflink);
+
+/**
* __dev_get_by_name - find a device by its name
* @net: the applicable net namespace
* @name: name to find
@@ -1385,7 +1406,7 @@ static int __dev_close(struct net_device *dev)
return retval;
}
-static int dev_close_many(struct list_head *head)
+int dev_close_many(struct list_head *head, bool unlink)
{
struct net_device *dev, *tmp;
@@ -1399,11 +1420,13 @@ static int dev_close_many(struct list_head *head)
list_for_each_entry_safe(dev, tmp, head, close_list) {
rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING, GFP_KERNEL);
call_netdevice_notifiers(NETDEV_DOWN, dev);
- list_del_init(&dev->close_list);
+ if (unlink)
+ list_del_init(&dev->close_list);
}
return 0;
}
+EXPORT_SYMBOL(dev_close_many);
/**
* dev_close - shutdown an interface.
@@ -1420,7 +1443,7 @@ int dev_close(struct net_device *dev)
LIST_HEAD(single);
list_add(&dev->close_list, &single);
- dev_close_many(&single);
+ dev_close_many(&single, true);
list_del(&single);
}
return 0;
@@ -1607,6 +1630,22 @@ int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
}
EXPORT_SYMBOL(call_netdevice_notifiers);
+#ifdef CONFIG_NET_CLS_ACT
+static struct static_key ingress_needed __read_mostly;
+
+void net_inc_ingress_queue(void)
+{
+ static_key_slow_inc(&ingress_needed);
+}
+EXPORT_SYMBOL_GPL(net_inc_ingress_queue);
+
+void net_dec_ingress_queue(void)
+{
+ static_key_slow_dec(&ingress_needed);
+}
+EXPORT_SYMBOL_GPL(net_dec_ingress_queue);
+#endif
+
static struct static_key netstamp_needed __read_mostly;
#ifdef HAVE_JUMP_LABEL
/* We are not allowed to call static_key_slow_dec() from irq context
@@ -1694,6 +1733,7 @@ int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
}
skb_scrub_packet(skb, true);
+ skb->priority = 0;
skb->protocol = eth_type_trans(skb, dev);
skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
@@ -1737,7 +1777,8 @@ static inline int deliver_skb(struct sk_buff *skb,
static inline void deliver_ptype_list_skb(struct sk_buff *skb,
struct packet_type **pt,
- struct net_device *dev, __be16 type,
+ struct net_device *orig_dev,
+ __be16 type,
struct list_head *ptype_list)
{
struct packet_type *ptype, *pt_prev = *pt;
@@ -1746,7 +1787,7 @@ static inline void deliver_ptype_list_skb(struct sk_buff *skb,
if (ptype->type != type)
continue;
if (pt_prev)
- deliver_skb(skb, pt_prev, dev);
+ deliver_skb(skb, pt_prev, orig_dev);
pt_prev = ptype;
}
*pt = pt_prev;
@@ -2559,12 +2600,26 @@ static netdev_features_t harmonize_features(struct sk_buff *skb,
return features;
}
+netdev_features_t passthru_features_check(struct sk_buff *skb,
+ struct net_device *dev,
+ netdev_features_t features)
+{
+ return features;
+}
+EXPORT_SYMBOL(passthru_features_check);
+
+static netdev_features_t dflt_features_check(const struct sk_buff *skb,
+ struct net_device *dev,
+ netdev_features_t features)
+{
+ return vlan_features_check(skb, features);
+}
+
netdev_features_t netif_skb_features(struct sk_buff *skb)
{
struct net_device *dev = skb->dev;
netdev_features_t features = dev->features;
u16 gso_segs = skb_shinfo(skb)->gso_segs;
- __be16 protocol = skb->protocol;
if (gso_segs > dev->gso_max_segs || gso_segs < dev->gso_min_segs)
features &= ~NETIF_F_GSO_MASK;
@@ -2576,34 +2631,17 @@ netdev_features_t netif_skb_features(struct sk_buff *skb)
if (skb->encapsulation)
features &= dev->hw_enc_features;
- if (!skb_vlan_tag_present(skb)) {
- if (unlikely(protocol == htons(ETH_P_8021Q) ||
- protocol == htons(ETH_P_8021AD))) {
- struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
- protocol = veh->h_vlan_encapsulated_proto;
- } else {
- goto finalize;
- }
- }
-
- features = netdev_intersect_features(features,
- dev->vlan_features |
- NETIF_F_HW_VLAN_CTAG_TX |
- NETIF_F_HW_VLAN_STAG_TX);
-
- if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD))
+ if (skb_vlan_tagged(skb))
features = netdev_intersect_features(features,
- NETIF_F_SG |
- NETIF_F_HIGHDMA |
- NETIF_F_FRAGLIST |
- NETIF_F_GEN_CSUM |
+ dev->vlan_features |
NETIF_F_HW_VLAN_CTAG_TX |
NETIF_F_HW_VLAN_STAG_TX);
-finalize:
if (dev->netdev_ops->ndo_features_check)
features &= dev->netdev_ops->ndo_features_check(skb, dev,
features);
+ else
+ features &= dflt_features_check(skb, dev, features);
return harmonize_features(skb, features);
}
@@ -2857,7 +2895,7 @@ EXPORT_SYMBOL(xmit_recursion);
* dev_loopback_xmit - loop back @skb
* @skb: buffer to transmit
*/
-int dev_loopback_xmit(struct sk_buff *skb)
+int dev_loopback_xmit(struct sock *sk, struct sk_buff *skb)
{
skb_reset_mac_header(skb);
__skb_pull(skb, skb_network_offset(skb));
@@ -2995,11 +3033,11 @@ out:
return rc;
}
-int dev_queue_xmit(struct sk_buff *skb)
+int dev_queue_xmit_sk(struct sock *sk, struct sk_buff *skb)
{
return __dev_queue_xmit(skb, NULL);
}
-EXPORT_SYMBOL(dev_queue_xmit);
+EXPORT_SYMBOL(dev_queue_xmit_sk);
int dev_queue_xmit_accel(struct sk_buff *skb, void *accel_priv)
{
@@ -3525,7 +3563,7 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb,
struct netdev_queue *rxq = rcu_dereference(skb->dev->ingress_queue);
if (!rxq || rcu_access_pointer(rxq->qdisc) == &noop_qdisc)
- goto out;
+ return skb;
if (*pt_prev) {
*ret = deliver_skb(skb, *pt_prev, orig_dev);
@@ -3539,8 +3577,6 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb,
return NULL;
}
-out:
- skb->tc_verd = 0;
return skb;
}
#endif
@@ -3676,12 +3712,15 @@ another_round:
skip_taps:
#ifdef CONFIG_NET_CLS_ACT
- skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
- if (!skb)
- goto unlock;
+ if (static_key_false(&ingress_needed)) {
+ skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
+ if (!skb)
+ goto unlock;
+ }
+
+ skb->tc_verd = 0;
ncls:
#endif
-
if (pfmemalloc && !skb_pfmemalloc_protocol(skb))
goto drop;
@@ -3831,13 +3870,13 @@ static int netif_receive_skb_internal(struct sk_buff *skb)
* NET_RX_SUCCESS: no congestion
* NET_RX_DROP: packet was dropped
*/
-int netif_receive_skb(struct sk_buff *skb)
+int netif_receive_skb_sk(struct sock *sk, struct sk_buff *skb)
{
trace_netif_receive_skb_entry(skb);
return netif_receive_skb_internal(skb);
}
-EXPORT_SYMBOL(netif_receive_skb);
+EXPORT_SYMBOL(netif_receive_skb_sk);
/* Network device is going away, flush any packets still pending
* Called with irqs disabled.
@@ -5914,6 +5953,24 @@ int dev_get_phys_port_id(struct net_device *dev,
EXPORT_SYMBOL(dev_get_phys_port_id);
/**
+ * dev_get_phys_port_name - Get device physical port name
+ * @dev: device
+ * @name: port name
+ *
+ * Get device physical port name
+ */
+int dev_get_phys_port_name(struct net_device *dev,
+ char *name, size_t len)
+{
+ const struct net_device_ops *ops = dev->netdev_ops;
+
+ if (!ops->ndo_get_phys_port_name)
+ return -EOPNOTSUPP;
+ return ops->ndo_get_phys_port_name(dev, name, len);
+}
+EXPORT_SYMBOL(dev_get_phys_port_name);
+
+/**
* dev_new_index - allocate an ifindex
* @net: the applicable net namespace
*
@@ -5970,7 +6027,7 @@ static void rollback_registered_many(struct list_head *head)
/* If device is running, close it first. */
list_for_each_entry(dev, head, unreg_list)
list_add_tail(&dev->close_list, &close_head);
- dev_close_many(&close_head);
+ dev_close_many(&close_head, true);
list_for_each_entry(dev, head, unreg_list) {
/* And unlink it from device chain. */
@@ -6297,8 +6354,6 @@ int register_netdevice(struct net_device *dev)
spin_lock_init(&dev->addr_list_lock);
netdev_set_addr_lockdep_class(dev);
- dev->iflink = -1;
-
ret = dev_get_valid_name(net, dev, dev->name);
if (ret < 0)
goto out;
@@ -6328,9 +6383,6 @@ int register_netdevice(struct net_device *dev)
else if (__dev_get_by_index(net, dev->ifindex))
goto err_uninit;
- if (dev->iflink == -1)
- dev->iflink = dev->ifindex;
-
/* Transfer changeable features to wanted_features and enable
* software offloads (GSO and GRO).
*/
@@ -6843,8 +6895,6 @@ void free_netdev(struct net_device *dev)
{
struct napi_struct *p, *n;
- release_net(dev_net(dev));
-
netif_free_tx_queues(dev);
#ifdef CONFIG_SYSFS
kvfree(dev->_rx);
@@ -7045,12 +7095,8 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
dev_net_set(dev, net);
/* If there is an ifindex conflict assign a new one */
- if (__dev_get_by_index(net, dev->ifindex)) {
- int iflink = (dev->iflink == dev->ifindex);
+ if (__dev_get_by_index(net, dev->ifindex))
dev->ifindex = dev_new_index(net);
- if (iflink)
- dev->iflink = dev->ifindex;
- }
/* Send a netdev-add uevent to the new namespace */
kobject_uevent(&dev->dev.kobj, KOBJ_ADD);
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index aa378ecef186..1d00b8922902 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -790,7 +790,7 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
if (ops->get_rxfh_indir_size)
dev_indir_size = ops->get_rxfh_indir_size(dev);
if (ops->get_rxfh_key_size)
- dev_key_size = dev->ethtool_ops->get_rxfh_key_size(dev);
+ dev_key_size = ops->get_rxfh_key_size(dev);
if (copy_from_user(&rxfh, useraddr, sizeof(rxfh)))
return -EFAULT;
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index e4fdc9dfb2c7..9a12668f7d62 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -31,7 +31,7 @@ int fib_default_rule_add(struct fib_rules_ops *ops,
r->pref = pref;
r->table = table;
r->flags = flags;
- r->fr_net = hold_net(ops->fro_net);
+ r->fr_net = ops->fro_net;
r->suppress_prefixlen = -1;
r->suppress_ifgroup = -1;
@@ -116,7 +116,6 @@ static int __fib_rules_register(struct fib_rules_ops *ops)
if (ops->family == o->family)
goto errout;
- hold_net(net);
list_add_tail_rcu(&ops->list, &net->rules_ops);
err = 0;
errout:
@@ -160,15 +159,6 @@ static void fib_rules_cleanup_ops(struct fib_rules_ops *ops)
}
}
-static void fib_rules_put_rcu(struct rcu_head *head)
-{
- struct fib_rules_ops *ops = container_of(head, struct fib_rules_ops, rcu);
- struct net *net = ops->fro_net;
-
- release_net(net);
- kfree(ops);
-}
-
void fib_rules_unregister(struct fib_rules_ops *ops)
{
struct net *net = ops->fro_net;
@@ -178,7 +168,7 @@ void fib_rules_unregister(struct fib_rules_ops *ops)
spin_unlock(&net->rules_mod_lock);
fib_rules_cleanup_ops(ops);
- call_rcu(&ops->rcu, fib_rules_put_rcu);
+ kfree_rcu(ops, rcu);
}
EXPORT_SYMBOL_GPL(fib_rules_unregister);
@@ -303,7 +293,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh)
err = -ENOMEM;
goto errout;
}
- rule->fr_net = hold_net(net);
+ rule->fr_net = net;
if (tb[FRA_PRIORITY])
rule->pref = nla_get_u32(tb[FRA_PRIORITY]);
@@ -423,7 +413,6 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh)
return 0;
errout_free:
- release_net(rule->fr_net);
kfree(rule);
errout:
rules_ops_put(ops);
@@ -492,6 +481,12 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh)
goto errout;
}
+ if (ops->delete) {
+ err = ops->delete(rule);
+ if (err)
+ goto errout;
+ }
+
list_del_rcu(&rule->list);
if (rule->action == FR_ACT_GOTO) {
@@ -517,8 +512,6 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh)
notify_rule_change(RTM_DELRULE, rule, ops, nlh,
NETLINK_CB(skb).portid);
- if (ops->delete)
- ops->delete(rule);
fib_rule_put(rule);
flush_route_cache(ops);
rules_ops_put(ops);
diff --git a/net/core/filter.c b/net/core/filter.c
index f6bdc2b1ba01..b669e75d2b36 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -150,10 +150,62 @@ static u64 __get_random_u32(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
return prandom_u32();
}
+static u32 convert_skb_access(int skb_field, int dst_reg, int src_reg,
+ struct bpf_insn *insn_buf)
+{
+ struct bpf_insn *insn = insn_buf;
+
+ switch (skb_field) {
+ case SKF_AD_MARK:
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
+
+ *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
+ offsetof(struct sk_buff, mark));
+ break;
+
+ case SKF_AD_PKTTYPE:
+ *insn++ = BPF_LDX_MEM(BPF_B, dst_reg, src_reg, PKT_TYPE_OFFSET());
+ *insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, PKT_TYPE_MAX);
+#ifdef __BIG_ENDIAN_BITFIELD
+ *insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, 5);
+#endif
+ break;
+
+ case SKF_AD_QUEUE:
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2);
+
+ *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
+ offsetof(struct sk_buff, queue_mapping));
+ break;
+
+ case SKF_AD_VLAN_TAG:
+ case SKF_AD_VLAN_TAG_PRESENT:
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
+ BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000);
+
+ /* dst_reg = *(u16 *) (src_reg + offsetof(vlan_tci)) */
+ *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
+ offsetof(struct sk_buff, vlan_tci));
+ if (skb_field == SKF_AD_VLAN_TAG) {
+ *insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg,
+ ~VLAN_TAG_PRESENT);
+ } else {
+ /* dst_reg >>= 12 */
+ *insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, 12);
+ /* dst_reg &= 1 */
+ *insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, 1);
+ }
+ break;
+ }
+
+ return insn - insn_buf;
+}
+
static bool convert_bpf_extensions(struct sock_filter *fp,
struct bpf_insn **insnp)
{
struct bpf_insn *insn = *insnp;
+ u32 cnt;
switch (fp->k) {
case SKF_AD_OFF + SKF_AD_PROTOCOL:
@@ -167,13 +219,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
break;
case SKF_AD_OFF + SKF_AD_PKTTYPE:
- *insn++ = BPF_LDX_MEM(BPF_B, BPF_REG_A, BPF_REG_CTX,
- PKT_TYPE_OFFSET());
- *insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, PKT_TYPE_MAX);
-#ifdef __BIG_ENDIAN_BITFIELD
- insn++;
- *insn = BPF_ALU32_IMM(BPF_RSH, BPF_REG_A, 5);
-#endif
+ cnt = convert_skb_access(SKF_AD_PKTTYPE, BPF_REG_A, BPF_REG_CTX, insn);
+ insn += cnt - 1;
break;
case SKF_AD_OFF + SKF_AD_IFINDEX:
@@ -197,10 +244,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
break;
case SKF_AD_OFF + SKF_AD_MARK:
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
-
- *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX,
- offsetof(struct sk_buff, mark));
+ cnt = convert_skb_access(SKF_AD_MARK, BPF_REG_A, BPF_REG_CTX, insn);
+ insn += cnt - 1;
break;
case SKF_AD_OFF + SKF_AD_RXHASH:
@@ -211,29 +256,30 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
break;
case SKF_AD_OFF + SKF_AD_QUEUE:
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2);
-
- *insn = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
- offsetof(struct sk_buff, queue_mapping));
+ cnt = convert_skb_access(SKF_AD_QUEUE, BPF_REG_A, BPF_REG_CTX, insn);
+ insn += cnt - 1;
break;
case SKF_AD_OFF + SKF_AD_VLAN_TAG:
+ cnt = convert_skb_access(SKF_AD_VLAN_TAG,
+ BPF_REG_A, BPF_REG_CTX, insn);
+ insn += cnt - 1;
+ break;
+
case SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT:
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
- BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000);
+ cnt = convert_skb_access(SKF_AD_VLAN_TAG_PRESENT,
+ BPF_REG_A, BPF_REG_CTX, insn);
+ insn += cnt - 1;
+ break;
+
+ case SKF_AD_OFF + SKF_AD_VLAN_TPID:
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_proto) != 2);
- /* A = *(u16 *) (CTX + offsetof(vlan_tci)) */
+ /* A = *(u16 *) (CTX + offsetof(vlan_proto)) */
*insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
- offsetof(struct sk_buff, vlan_tci));
- if (fp->k == SKF_AD_OFF + SKF_AD_VLAN_TAG) {
- *insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A,
- ~VLAN_TAG_PRESENT);
- } else {
- /* A >>= 12 */
- *insn++ = BPF_ALU32_IMM(BPF_RSH, BPF_REG_A, 12);
- /* A &= 1 */
- *insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 1);
- }
+ offsetof(struct sk_buff, vlan_proto));
+ /* A = ntohs(A) [emitting a nop or swap16] */
+ *insn = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, 16);
break;
case SKF_AD_OFF + SKF_AD_PAY_OFFSET:
@@ -814,7 +860,7 @@ static void bpf_release_orig_filter(struct bpf_prog *fp)
static void __bpf_prog_release(struct bpf_prog *prog)
{
- if (prog->aux->prog_type == BPF_PROG_TYPE_SOCKET_FILTER) {
+ if (prog->type == BPF_PROG_TYPE_SOCKET_FILTER) {
bpf_prog_put(prog);
} else {
bpf_release_orig_filter(prog);
@@ -1019,6 +1065,32 @@ void bpf_prog_destroy(struct bpf_prog *fp)
}
EXPORT_SYMBOL_GPL(bpf_prog_destroy);
+static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk)
+{
+ struct sk_filter *fp, *old_fp;
+
+ fp = kmalloc(sizeof(*fp), GFP_KERNEL);
+ if (!fp)
+ return -ENOMEM;
+
+ fp->prog = prog;
+ atomic_set(&fp->refcnt, 0);
+
+ if (!sk_filter_charge(sk, fp)) {
+ kfree(fp);
+ return -ENOMEM;
+ }
+
+ old_fp = rcu_dereference_protected(sk->sk_filter,
+ sock_owned_by_user(sk));
+ rcu_assign_pointer(sk->sk_filter, fp);
+
+ if (old_fp)
+ sk_filter_uncharge(sk, old_fp);
+
+ return 0;
+}
+
/**
* sk_attach_filter - attach a socket filter
* @fprog: the filter program
@@ -1031,7 +1103,6 @@ EXPORT_SYMBOL_GPL(bpf_prog_destroy);
*/
int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
{
- struct sk_filter *fp, *old_fp;
unsigned int fsize = bpf_classic_proglen(fprog);
unsigned int bpf_fsize = bpf_prog_size(fprog->len);
struct bpf_prog *prog;
@@ -1068,36 +1139,20 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
if (IS_ERR(prog))
return PTR_ERR(prog);
- fp = kmalloc(sizeof(*fp), GFP_KERNEL);
- if (!fp) {
+ err = __sk_attach_prog(prog, sk);
+ if (err < 0) {
__bpf_prog_release(prog);
- return -ENOMEM;
- }
- fp->prog = prog;
-
- atomic_set(&fp->refcnt, 0);
-
- if (!sk_filter_charge(sk, fp)) {
- __sk_filter_release(fp);
- return -ENOMEM;
+ return err;
}
- old_fp = rcu_dereference_protected(sk->sk_filter,
- sock_owned_by_user(sk));
- rcu_assign_pointer(sk->sk_filter, fp);
-
- if (old_fp)
- sk_filter_uncharge(sk, old_fp);
-
return 0;
}
EXPORT_SYMBOL_GPL(sk_attach_filter);
-#ifdef CONFIG_BPF_SYSCALL
int sk_attach_bpf(u32 ufd, struct sock *sk)
{
- struct sk_filter *fp, *old_fp;
struct bpf_prog *prog;
+ int err;
if (sock_flag(sk, SOCK_FILTER_LOCKED))
return -EPERM;
@@ -1106,40 +1161,168 @@ int sk_attach_bpf(u32 ufd, struct sock *sk)
if (IS_ERR(prog))
return PTR_ERR(prog);
- if (prog->aux->prog_type != BPF_PROG_TYPE_SOCKET_FILTER) {
- /* valid fd, but invalid program type */
+ if (prog->type != BPF_PROG_TYPE_SOCKET_FILTER) {
bpf_prog_put(prog);
return -EINVAL;
}
- fp = kmalloc(sizeof(*fp), GFP_KERNEL);
- if (!fp) {
+ err = __sk_attach_prog(prog, sk);
+ if (err < 0) {
bpf_prog_put(prog);
- return -ENOMEM;
+ return err;
}
- fp->prog = prog;
- atomic_set(&fp->refcnt, 0);
+ return 0;
+}
- if (!sk_filter_charge(sk, fp)) {
- __sk_filter_release(fp);
- return -ENOMEM;
+#define BPF_RECOMPUTE_CSUM(flags) ((flags) & 1)
+
+static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
+{
+ struct sk_buff *skb = (struct sk_buff *) (long) r1;
+ unsigned int offset = (unsigned int) r2;
+ void *from = (void *) (long) r3;
+ unsigned int len = (unsigned int) r4;
+ char buf[16];
+ void *ptr;
+
+ /* bpf verifier guarantees that:
+ * 'from' pointer points to bpf program stack
+ * 'len' bytes of it were initialized
+ * 'len' > 0
+ * 'skb' is a valid pointer to 'struct sk_buff'
+ *
+ * so check for invalid 'offset' and too large 'len'
+ */
+ if (unlikely(offset > 0xffff || len > sizeof(buf)))
+ return -EFAULT;
+
+ if (skb_cloned(skb) && !skb_clone_writable(skb, offset + len))
+ return -EFAULT;
+
+ ptr = skb_header_pointer(skb, offset, len, buf);
+ if (unlikely(!ptr))
+ return -EFAULT;
+
+ if (BPF_RECOMPUTE_CSUM(flags))
+ skb_postpull_rcsum(skb, ptr, len);
+
+ memcpy(ptr, from, len);
+
+ if (ptr == buf)
+ /* skb_store_bits cannot return -EFAULT here */
+ skb_store_bits(skb, offset, ptr, len);
+
+ if (BPF_RECOMPUTE_CSUM(flags) && skb->ip_summed == CHECKSUM_COMPLETE)
+ skb->csum = csum_add(skb->csum, csum_partial(ptr, len, 0));
+ return 0;
+}
+
+const struct bpf_func_proto bpf_skb_store_bytes_proto = {
+ .func = bpf_skb_store_bytes,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_PTR_TO_STACK,
+ .arg4_type = ARG_CONST_STACK_SIZE,
+ .arg5_type = ARG_ANYTHING,
+};
+
+#define BPF_HEADER_FIELD_SIZE(flags) ((flags) & 0x0f)
+#define BPF_IS_PSEUDO_HEADER(flags) ((flags) & 0x10)
+
+static u64 bpf_l3_csum_replace(u64 r1, u64 offset, u64 from, u64 to, u64 flags)
+{
+ struct sk_buff *skb = (struct sk_buff *) (long) r1;
+ __sum16 sum, *ptr;
+
+ if (unlikely(offset > 0xffff))
+ return -EFAULT;
+
+ if (skb_cloned(skb) && !skb_clone_writable(skb, offset + sizeof(sum)))
+ return -EFAULT;
+
+ ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum);
+ if (unlikely(!ptr))
+ return -EFAULT;
+
+ switch (BPF_HEADER_FIELD_SIZE(flags)) {
+ case 2:
+ csum_replace2(ptr, from, to);
+ break;
+ case 4:
+ csum_replace4(ptr, from, to);
+ break;
+ default:
+ return -EINVAL;
}
- old_fp = rcu_dereference_protected(sk->sk_filter,
- sock_owned_by_user(sk));
- rcu_assign_pointer(sk->sk_filter, fp);
+ if (ptr == &sum)
+ /* skb_store_bits guaranteed to not return -EFAULT here */
+ skb_store_bits(skb, offset, ptr, sizeof(sum));
- if (old_fp)
- sk_filter_uncharge(sk, old_fp);
+ return 0;
+}
+
+const struct bpf_func_proto bpf_l3_csum_replace_proto = {
+ .func = bpf_l3_csum_replace,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_ANYTHING,
+ .arg4_type = ARG_ANYTHING,
+ .arg5_type = ARG_ANYTHING,
+};
+
+static u64 bpf_l4_csum_replace(u64 r1, u64 offset, u64 from, u64 to, u64 flags)
+{
+ struct sk_buff *skb = (struct sk_buff *) (long) r1;
+ u32 is_pseudo = BPF_IS_PSEUDO_HEADER(flags);
+ __sum16 sum, *ptr;
+
+ if (unlikely(offset > 0xffff))
+ return -EFAULT;
+
+ if (skb_cloned(skb) && !skb_clone_writable(skb, offset + sizeof(sum)))
+ return -EFAULT;
+
+ ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum);
+ if (unlikely(!ptr))
+ return -EFAULT;
+
+ switch (BPF_HEADER_FIELD_SIZE(flags)) {
+ case 2:
+ inet_proto_csum_replace2(ptr, skb, from, to, is_pseudo);
+ break;
+ case 4:
+ inet_proto_csum_replace4(ptr, skb, from, to, is_pseudo);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (ptr == &sum)
+ /* skb_store_bits guaranteed to not return -EFAULT here */
+ skb_store_bits(skb, offset, ptr, sizeof(sum));
return 0;
}
-/* allow socket filters to call
- * bpf_map_lookup_elem(), bpf_map_update_elem(), bpf_map_delete_elem()
- */
-static const struct bpf_func_proto *sock_filter_func_proto(enum bpf_func_id func_id)
+const struct bpf_func_proto bpf_l4_csum_replace_proto = {
+ .func = bpf_l4_csum_replace,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_ANYTHING,
+ .arg4_type = ARG_ANYTHING,
+ .arg5_type = ARG_ANYTHING,
+};
+
+static const struct bpf_func_proto *
+sk_filter_func_proto(enum bpf_func_id func_id)
{
switch (func_id) {
case BPF_FUNC_map_lookup_elem:
@@ -1148,39 +1331,144 @@ static const struct bpf_func_proto *sock_filter_func_proto(enum bpf_func_id func
return &bpf_map_update_elem_proto;
case BPF_FUNC_map_delete_elem:
return &bpf_map_delete_elem_proto;
+ case BPF_FUNC_get_prandom_u32:
+ return &bpf_get_prandom_u32_proto;
+ case BPF_FUNC_get_smp_processor_id:
+ return &bpf_get_smp_processor_id_proto;
default:
return NULL;
}
}
-static bool sock_filter_is_valid_access(int off, int size, enum bpf_access_type type)
+static const struct bpf_func_proto *
+tc_cls_act_func_proto(enum bpf_func_id func_id)
{
- /* skb fields cannot be accessed yet */
- return false;
+ switch (func_id) {
+ case BPF_FUNC_skb_store_bytes:
+ return &bpf_skb_store_bytes_proto;
+ case BPF_FUNC_l3_csum_replace:
+ return &bpf_l3_csum_replace_proto;
+ case BPF_FUNC_l4_csum_replace:
+ return &bpf_l4_csum_replace_proto;
+ default:
+ return sk_filter_func_proto(func_id);
+ }
+}
+
+static bool sk_filter_is_valid_access(int off, int size,
+ enum bpf_access_type type)
+{
+ /* only read is allowed */
+ if (type != BPF_READ)
+ return false;
+
+ /* check bounds */
+ if (off < 0 || off >= sizeof(struct __sk_buff))
+ return false;
+
+ /* disallow misaligned access */
+ if (off % size != 0)
+ return false;
+
+ /* all __sk_buff fields are __u32 */
+ if (size != 4)
+ return false;
+
+ return true;
+}
+
+static u32 sk_filter_convert_ctx_access(int dst_reg, int src_reg, int ctx_off,
+ struct bpf_insn *insn_buf)
+{
+ struct bpf_insn *insn = insn_buf;
+
+ switch (ctx_off) {
+ case offsetof(struct __sk_buff, len):
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4);
+
+ *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
+ offsetof(struct sk_buff, len));
+ break;
+
+ case offsetof(struct __sk_buff, protocol):
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2);
+
+ *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
+ offsetof(struct sk_buff, protocol));
+ break;
+
+ case offsetof(struct __sk_buff, vlan_proto):
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_proto) != 2);
+
+ *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
+ offsetof(struct sk_buff, vlan_proto));
+ break;
+
+ case offsetof(struct __sk_buff, priority):
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, priority) != 4);
+
+ *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
+ offsetof(struct sk_buff, priority));
+ break;
+
+ case offsetof(struct __sk_buff, mark):
+ return convert_skb_access(SKF_AD_MARK, dst_reg, src_reg, insn);
+
+ case offsetof(struct __sk_buff, pkt_type):
+ return convert_skb_access(SKF_AD_PKTTYPE, dst_reg, src_reg, insn);
+
+ case offsetof(struct __sk_buff, queue_mapping):
+ return convert_skb_access(SKF_AD_QUEUE, dst_reg, src_reg, insn);
+
+ case offsetof(struct __sk_buff, vlan_present):
+ return convert_skb_access(SKF_AD_VLAN_TAG_PRESENT,
+ dst_reg, src_reg, insn);
+
+ case offsetof(struct __sk_buff, vlan_tci):
+ return convert_skb_access(SKF_AD_VLAN_TAG,
+ dst_reg, src_reg, insn);
+ }
+
+ return insn - insn_buf;
}
-static struct bpf_verifier_ops sock_filter_ops = {
- .get_func_proto = sock_filter_func_proto,
- .is_valid_access = sock_filter_is_valid_access,
+static const struct bpf_verifier_ops sk_filter_ops = {
+ .get_func_proto = sk_filter_func_proto,
+ .is_valid_access = sk_filter_is_valid_access,
+ .convert_ctx_access = sk_filter_convert_ctx_access,
+};
+
+static const struct bpf_verifier_ops tc_cls_act_ops = {
+ .get_func_proto = tc_cls_act_func_proto,
+ .is_valid_access = sk_filter_is_valid_access,
+ .convert_ctx_access = sk_filter_convert_ctx_access,
};
-static struct bpf_prog_type_list tl = {
- .ops = &sock_filter_ops,
+static struct bpf_prog_type_list sk_filter_type __read_mostly = {
+ .ops = &sk_filter_ops,
.type = BPF_PROG_TYPE_SOCKET_FILTER,
};
-static int __init register_sock_filter_ops(void)
+static struct bpf_prog_type_list sched_cls_type __read_mostly = {
+ .ops = &tc_cls_act_ops,
+ .type = BPF_PROG_TYPE_SCHED_CLS,
+};
+
+static struct bpf_prog_type_list sched_act_type __read_mostly = {
+ .ops = &tc_cls_act_ops,
+ .type = BPF_PROG_TYPE_SCHED_ACT,
+};
+
+static int __init register_sk_filter_ops(void)
{
- bpf_register_prog_type(&tl);
+ bpf_register_prog_type(&sk_filter_type);
+ bpf_register_prog_type(&sched_cls_type);
+ bpf_register_prog_type(&sched_act_type);
+
return 0;
}
-late_initcall(register_sock_filter_ops);
-#else
-int sk_attach_bpf(u32 ufd, struct sock *sk)
-{
- return -EOPNOTSUPP;
-}
-#endif
+late_initcall(register_sk_filter_ops);
+
int sk_detach_filter(struct sock *sk)
{
int ret = -ENOENT;
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index 49a9e3e06c08..982861607f88 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -40,7 +40,7 @@ static DEFINE_SPINLOCK(lweventlist_lock);
static unsigned char default_operstate(const struct net_device *dev)
{
if (!netif_carrier_ok(dev))
- return (dev->ifindex != dev->iflink ?
+ return (dev->ifindex != dev_get_iflink(dev) ?
IF_OPER_LOWERLAYERDOWN : IF_OPER_DOWN);
if (netif_dormant(dev))
@@ -89,7 +89,7 @@ static bool linkwatch_urgent_event(struct net_device *dev)
if (!netif_running(dev))
return false;
- if (dev->ifindex != dev->iflink)
+ if (dev->ifindex != dev_get_iflink(dev))
return true;
if (dev->priv_flags & IFF_TEAM_PORT)
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 70fe9e10ac86..3de654256028 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -397,25 +397,15 @@ struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
struct net_device *dev)
{
struct neighbour *n;
- int key_len = tbl->key_len;
- u32 hash_val;
- struct neigh_hash_table *nht;
NEIGH_CACHE_STAT_INC(tbl, lookups);
rcu_read_lock_bh();
- nht = rcu_dereference_bh(tbl->nht);
- hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
-
- for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
- n != NULL;
- n = rcu_dereference_bh(n->next)) {
- if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
- if (!atomic_inc_not_zero(&n->refcnt))
- n = NULL;
- NEIGH_CACHE_STAT_INC(tbl, hits);
- break;
- }
+ n = __neigh_lookup_noref(tbl, pkey, dev);
+ if (n) {
+ if (!atomic_inc_not_zero(&n->refcnt))
+ n = NULL;
+ NEIGH_CACHE_STAT_INC(tbl, hits);
}
rcu_read_unlock_bh();
@@ -601,7 +591,7 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
if (!n)
goto out;
- write_pnet(&n->net, hold_net(net));
+ write_pnet(&n->net, net);
memcpy(n->key, pkey, key_len);
n->dev = dev;
if (dev)
@@ -610,7 +600,6 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
if (tbl->pconstructor && tbl->pconstructor(n)) {
if (dev)
dev_put(dev);
- release_net(net);
kfree(n);
n = NULL;
goto out;
@@ -644,7 +633,6 @@ int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
tbl->pdestructor(n);
if (n->dev)
dev_put(n->dev);
- release_net(pneigh_net(n));
kfree(n);
return 0;
}
@@ -667,7 +655,6 @@ static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
tbl->pdestructor(n);
if (n->dev)
dev_put(n->dev);
- release_net(pneigh_net(n));
kfree(n);
continue;
}
@@ -830,10 +817,9 @@ out:
static __inline__ int neigh_max_probes(struct neighbour *n)
{
struct neigh_parms *p = n->parms;
- int max_probes = NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES);
- if (!(n->nud_state & NUD_PROBE))
- max_probes += NEIGH_VAR(p, MCAST_PROBES);
- return max_probes;
+ return NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES) +
+ (n->nud_state & NUD_PROBE ? NEIGH_VAR(p, MCAST_REPROBES) :
+ NEIGH_VAR(p, MCAST_PROBES));
}
static void neigh_invalidate(struct neighbour *neigh)
@@ -1263,10 +1249,10 @@ struct neighbour *neigh_event_ns(struct neigh_table *tbl,
EXPORT_SYMBOL(neigh_event_ns);
/* called with read_lock_bh(&n->lock); */
-static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst)
+static void neigh_hh_init(struct neighbour *n)
{
- struct net_device *dev = dst->dev;
- __be16 prot = dst->ops->protocol;
+ struct net_device *dev = n->dev;
+ __be16 prot = n->tbl->protocol;
struct hh_cache *hh = &n->hh;
write_lock_bh(&n->lock);
@@ -1280,43 +1266,19 @@ static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst)
write_unlock_bh(&n->lock);
}
-/* This function can be used in contexts, where only old dev_queue_xmit
- * worked, f.e. if you want to override normal output path (eql, shaper),
- * but resolution is not made yet.
- */
-
-int neigh_compat_output(struct neighbour *neigh, struct sk_buff *skb)
-{
- struct net_device *dev = skb->dev;
-
- __skb_pull(skb, skb_network_offset(skb));
-
- if (dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL,
- skb->len) < 0 &&
- dev_rebuild_header(skb))
- return 0;
-
- return dev_queue_xmit(skb);
-}
-EXPORT_SYMBOL(neigh_compat_output);
-
/* Slow and careful. */
int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
{
- struct dst_entry *dst = skb_dst(skb);
int rc = 0;
- if (!dst)
- goto discard;
-
if (!neigh_event_send(neigh, skb)) {
int err;
struct net_device *dev = neigh->dev;
unsigned int seq;
if (dev->header_ops->cache && !neigh->hh.hh_len)
- neigh_hh_init(neigh, dst);
+ neigh_hh_init(neigh);
do {
__skb_pull(skb, skb_network_offset(skb));
@@ -1332,8 +1294,6 @@ int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
}
out:
return rc;
-discard:
- neigh_dbg(1, "%s: dst=%p neigh=%p\n", __func__, dst, neigh);
out_kfree_skb:
rc = -EINVAL;
kfree_skb(skb);
@@ -1464,11 +1424,10 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
dev_hold(dev);
p->dev = dev;
- write_pnet(&p->net, hold_net(net));
+ write_pnet(&p->net, net);
p->sysctl_table = NULL;
if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
- release_net(net);
dev_put(dev);
kfree(p);
return NULL;
@@ -1508,7 +1467,6 @@ EXPORT_SYMBOL(neigh_parms_release);
static void neigh_parms_destroy(struct neigh_parms *parms)
{
- release_net(neigh_parms_net(parms));
kfree(parms);
}
@@ -1783,6 +1741,8 @@ static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
NEIGH_VAR(parms, UCAST_PROBES)) ||
nla_put_u32(skb, NDTPA_MCAST_PROBES,
NEIGH_VAR(parms, MCAST_PROBES)) ||
+ nla_put_u32(skb, NDTPA_MCAST_REPROBES,
+ NEIGH_VAR(parms, MCAST_REPROBES)) ||
nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time) ||
nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
NEIGH_VAR(parms, BASE_REACHABLE_TIME)) ||
@@ -1942,6 +1902,7 @@ static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
[NDTPA_APP_PROBES] = { .type = NLA_U32 },
[NDTPA_UCAST_PROBES] = { .type = NLA_U32 },
[NDTPA_MCAST_PROBES] = { .type = NLA_U32 },
+ [NDTPA_MCAST_REPROBES] = { .type = NLA_U32 },
[NDTPA_BASE_REACHABLE_TIME] = { .type = NLA_U64 },
[NDTPA_GC_STALETIME] = { .type = NLA_U64 },
[NDTPA_DELAY_PROBE_TIME] = { .type = NLA_U64 },
@@ -2042,6 +2003,10 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh)
NEIGH_VAR_SET(p, MCAST_PROBES,
nla_get_u32(tbp[i]));
break;
+ case NDTPA_MCAST_REPROBES:
+ NEIGH_VAR_SET(p, MCAST_REPROBES,
+ nla_get_u32(tbp[i]));
+ break;
case NDTPA_BASE_REACHABLE_TIME:
NEIGH_VAR_SET(p, BASE_REACHABLE_TIME,
nla_get_msecs(tbp[i]));
@@ -2427,6 +2392,40 @@ void __neigh_for_each_release(struct neigh_table *tbl,
}
EXPORT_SYMBOL(__neigh_for_each_release);
+int neigh_xmit(int index, struct net_device *dev,
+ const void *addr, struct sk_buff *skb)
+{
+ int err = -EAFNOSUPPORT;
+ if (likely(index < NEIGH_NR_TABLES)) {
+ struct neigh_table *tbl;
+ struct neighbour *neigh;
+
+ tbl = neigh_tables[index];
+ if (!tbl)
+ goto out;
+ neigh = __neigh_lookup_noref(tbl, addr, dev);
+ if (!neigh)
+ neigh = __neigh_create(tbl, addr, dev, false);
+ err = PTR_ERR(neigh);
+ if (IS_ERR(neigh))
+ goto out_kfree_skb;
+ err = neigh->output(neigh, skb);
+ }
+ else if (index == NEIGH_LINK_TABLE) {
+ err = dev_hard_header(skb, dev, ntohs(skb->protocol),
+ addr, NULL, skb->len);
+ if (err < 0)
+ goto out_kfree_skb;
+ err = dev_queue_xmit(skb);
+ }
+out:
+ return err;
+out_kfree_skb:
+ kfree_skb(skb);
+ goto out;
+}
+EXPORT_SYMBOL(neigh_xmit);
+
#ifdef CONFIG_PROC_FS
static struct neighbour *neigh_get_first(struct seq_file *seq)
@@ -2994,6 +2993,7 @@ static struct neigh_sysctl_table {
NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"),
NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(UCAST_PROBES, "ucast_solicit"),
NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(APP_PROBES, "app_solicit"),
+ NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_REPROBES, "mcast_resolicit"),
NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"),
NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"),
NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"),
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index f2aa73bfb0e4..4238d6da5c60 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -23,6 +23,7 @@
#include <linux/export.h>
#include <linux/jiffies.h>
#include <linux/pm_runtime.h>
+#include <linux/of.h>
#include "net-sysfs.h"
@@ -108,11 +109,19 @@ NETDEVICE_SHOW_RO(dev_id, fmt_hex);
NETDEVICE_SHOW_RO(dev_port, fmt_dec);
NETDEVICE_SHOW_RO(addr_assign_type, fmt_dec);
NETDEVICE_SHOW_RO(addr_len, fmt_dec);
-NETDEVICE_SHOW_RO(iflink, fmt_dec);
NETDEVICE_SHOW_RO(ifindex, fmt_dec);
NETDEVICE_SHOW_RO(type, fmt_dec);
NETDEVICE_SHOW_RO(link_mode, fmt_dec);
+static ssize_t iflink_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct net_device *ndev = to_net_dev(dev);
+
+ return sprintf(buf, fmt_dec, dev_get_iflink(ndev));
+}
+static DEVICE_ATTR_RO(iflink);
+
static ssize_t format_name_assign_type(const struct net_device *dev, char *buf)
{
return sprintf(buf, fmt_dec, dev->name_assign_type);
@@ -417,6 +426,28 @@ static ssize_t phys_port_id_show(struct device *dev,
}
static DEVICE_ATTR_RO(phys_port_id);
+static ssize_t phys_port_name_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct net_device *netdev = to_net_dev(dev);
+ ssize_t ret = -EINVAL;
+
+ if (!rtnl_trylock())
+ return restart_syscall();
+
+ if (dev_isalive(netdev)) {
+ char name[IFNAMSIZ];
+
+ ret = dev_get_phys_port_name(netdev, name, sizeof(name));
+ if (!ret)
+ ret = sprintf(buf, "%s\n", name);
+ }
+ rtnl_unlock();
+
+ return ret;
+}
+static DEVICE_ATTR_RO(phys_port_name);
+
static ssize_t phys_switch_id_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -464,6 +495,7 @@ static struct attribute *net_class_attrs[] = {
&dev_attr_tx_queue_len.attr,
&dev_attr_gro_flush_timeout.attr,
&dev_attr_phys_port_id.attr,
+ &dev_attr_phys_port_name.attr,
&dev_attr_phys_switch_id.attr,
NULL,
};
@@ -950,6 +982,60 @@ static ssize_t show_trans_timeout(struct netdev_queue *queue,
return sprintf(buf, "%lu", trans_timeout);
}
+#ifdef CONFIG_XPS
+static inline unsigned int get_netdev_queue_index(struct netdev_queue *queue)
+{
+ struct net_device *dev = queue->dev;
+ int i;
+
+ for (i = 0; i < dev->num_tx_queues; i++)
+ if (queue == &dev->_tx[i])
+ break;
+
+ BUG_ON(i >= dev->num_tx_queues);
+
+ return i;
+}
+
+static ssize_t show_tx_maxrate(struct netdev_queue *queue,
+ struct netdev_queue_attribute *attribute,
+ char *buf)
+{
+ return sprintf(buf, "%lu\n", queue->tx_maxrate);
+}
+
+static ssize_t set_tx_maxrate(struct netdev_queue *queue,
+ struct netdev_queue_attribute *attribute,
+ const char *buf, size_t len)
+{
+ struct net_device *dev = queue->dev;
+ int err, index = get_netdev_queue_index(queue);
+ u32 rate = 0;
+
+ err = kstrtou32(buf, 10, &rate);
+ if (err < 0)
+ return err;
+
+ if (!rtnl_trylock())
+ return restart_syscall();
+
+ err = -EOPNOTSUPP;
+ if (dev->netdev_ops->ndo_set_tx_maxrate)
+ err = dev->netdev_ops->ndo_set_tx_maxrate(dev, index, rate);
+
+ rtnl_unlock();
+ if (!err) {
+ queue->tx_maxrate = rate;
+ return len;
+ }
+ return err;
+}
+
+static struct netdev_queue_attribute queue_tx_maxrate =
+ __ATTR(tx_maxrate, S_IRUGO | S_IWUSR,
+ show_tx_maxrate, set_tx_maxrate);
+#endif
+
static struct netdev_queue_attribute queue_trans_timeout =
__ATTR(tx_timeout, S_IRUGO, show_trans_timeout, NULL);
@@ -1064,18 +1150,6 @@ static struct attribute_group dql_group = {
#endif /* CONFIG_BQL */
#ifdef CONFIG_XPS
-static unsigned int get_netdev_queue_index(struct netdev_queue *queue)
-{
- struct net_device *dev = queue->dev;
- unsigned int i;
-
- i = queue - dev->_tx;
- BUG_ON(i >= dev->num_tx_queues);
-
- return i;
-}
-
-
static ssize_t show_xps_map(struct netdev_queue *queue,
struct netdev_queue_attribute *attribute, char *buf)
{
@@ -1152,6 +1226,7 @@ static struct attribute *netdev_queue_default_attrs[] = {
&queue_trans_timeout.attr,
#ifdef CONFIG_XPS
&xps_cpus_attribute.attr,
+ &queue_tx_maxrate.attr,
#endif
NULL
};
@@ -1374,6 +1449,30 @@ static struct class net_class = {
.namespace = net_namespace,
};
+#ifdef CONFIG_OF_NET
+static int of_dev_node_match(struct device *dev, const void *data)
+{
+ int ret = 0;
+
+ if (dev->parent)
+ ret = dev->parent->of_node == data;
+
+ return ret == 0 ? dev->of_node == data : ret;
+}
+
+struct net_device *of_find_net_device_by_node(struct device_node *np)
+{
+ struct device *dev;
+
+ dev = class_find_device(&net_class, NULL, np, of_dev_node_match);
+ if (!dev)
+ return NULL;
+
+ return to_net_dev(dev);
+}
+EXPORT_SYMBOL(of_find_net_device_by_node);
+#endif
+
/* Delete sysfs entries but hold kobject reference until after all
* netdev references are gone.
*/
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 70d3450588b2..a3abb719221f 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -148,9 +148,11 @@ static void ops_free_list(const struct pernet_operations *ops,
}
}
+static void rtnl_net_notifyid(struct net *net, struct net *peer, int cmd,
+ int id);
static int alloc_netid(struct net *net, struct net *peer, int reqid)
{
- int min = 0, max = 0;
+ int min = 0, max = 0, id;
ASSERT_RTNL();
@@ -159,7 +161,11 @@ static int alloc_netid(struct net *net, struct net *peer, int reqid)
max = reqid + 1;
}
- return idr_alloc(&net->netns_ids, peer, min, max, GFP_KERNEL);
+ id = idr_alloc(&net->netns_ids, peer, min, max, GFP_KERNEL);
+ if (id >= 0)
+ rtnl_net_notifyid(net, peer, RTM_NEWNSID, id);
+
+ return id;
}
/* This function is used by idr_for_each(). If net is equal to peer, the
@@ -238,10 +244,6 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
net->user_ns = user_ns;
idr_init(&net->netns_ids);
-#ifdef NETNS_REFCNT_DEBUG
- atomic_set(&net->use_count, 0);
-#endif
-
list_for_each_entry(ops, &pernet_list, list) {
error = ops_init(ops, net);
if (error < 0)
@@ -296,13 +298,6 @@ out_free:
static void net_free(struct net *net)
{
-#ifdef NETNS_REFCNT_DEBUG
- if (unlikely(atomic_read(&net->use_count) != 0)) {
- pr_emerg("network namespace not free! Usage: %d\n",
- atomic_read(&net->use_count));
- return;
- }
-#endif
kfree(rcu_access_pointer(net->gen));
kmem_cache_free(net_cachep, net);
}
@@ -370,8 +365,10 @@ static void cleanup_net(struct work_struct *work)
for_each_net(tmp) {
int id = __peernet2id(tmp, net, false);
- if (id >= 0)
+ if (id >= 0) {
+ rtnl_net_notifyid(tmp, net, RTM_DELNSID, id);
idr_remove(&tmp->netns_ids, id);
+ }
}
idr_destroy(&net->netns_ids);
@@ -542,7 +539,8 @@ static int rtnl_net_get_size(void)
}
static int rtnl_net_fill(struct sk_buff *skb, u32 portid, u32 seq, int flags,
- int cmd, struct net *net, struct net *peer)
+ int cmd, struct net *net, struct net *peer,
+ int nsid)
{
struct nlmsghdr *nlh;
struct rtgenmsg *rth;
@@ -557,9 +555,13 @@ static int rtnl_net_fill(struct sk_buff *skb, u32 portid, u32 seq, int flags,
rth = nlmsg_data(nlh);
rth->rtgen_family = AF_UNSPEC;
- id = __peernet2id(net, peer, false);
- if (id < 0)
- id = NETNSA_NSID_NOT_ASSIGNED;
+ if (nsid >= 0) {
+ id = nsid;
+ } else {
+ id = __peernet2id(net, peer, false);
+ if (id < 0)
+ id = NETNSA_NSID_NOT_ASSIGNED;
+ }
if (nla_put_s32(skb, NETNSA_NSID, id))
goto nla_put_failure;
@@ -576,8 +578,8 @@ static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh)
struct net *net = sock_net(skb->sk);
struct nlattr *tb[NETNSA_MAX + 1];
struct sk_buff *msg;
- int err = -ENOBUFS;
struct net *peer;
+ int err;
err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX,
rtnl_net_policy);
@@ -600,7 +602,7 @@ static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh)
}
err = rtnl_net_fill(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
- RTM_GETNSID, net, peer);
+ RTM_GETNSID, net, peer, -1);
if (err < 0)
goto err_out;
@@ -614,6 +616,75 @@ out:
return err;
}
+struct rtnl_net_dump_cb {
+ struct net *net;
+ struct sk_buff *skb;
+ struct netlink_callback *cb;
+ int idx;
+ int s_idx;
+};
+
+static int rtnl_net_dumpid_one(int id, void *peer, void *data)
+{
+ struct rtnl_net_dump_cb *net_cb = (struct rtnl_net_dump_cb *)data;
+ int ret;
+
+ if (net_cb->idx < net_cb->s_idx)
+ goto cont;
+
+ ret = rtnl_net_fill(net_cb->skb, NETLINK_CB(net_cb->cb->skb).portid,
+ net_cb->cb->nlh->nlmsg_seq, NLM_F_MULTI,
+ RTM_NEWNSID, net_cb->net, peer, id);
+ if (ret < 0)
+ return ret;
+
+cont:
+ net_cb->idx++;
+ return 0;
+}
+
+static int rtnl_net_dumpid(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct net *net = sock_net(skb->sk);
+ struct rtnl_net_dump_cb net_cb = {
+ .net = net,
+ .skb = skb,
+ .cb = cb,
+ .idx = 0,
+ .s_idx = cb->args[0],
+ };
+
+ ASSERT_RTNL();
+
+ idr_for_each(&net->netns_ids, rtnl_net_dumpid_one, &net_cb);
+
+ cb->args[0] = net_cb.idx;
+ return skb->len;
+}
+
+static void rtnl_net_notifyid(struct net *net, struct net *peer, int cmd,
+ int id)
+{
+ struct sk_buff *msg;
+ int err = -ENOMEM;
+
+ msg = nlmsg_new(rtnl_net_get_size(), GFP_KERNEL);
+ if (!msg)
+ goto out;
+
+ err = rtnl_net_fill(msg, 0, 0, 0, cmd, net, peer, id);
+ if (err < 0)
+ goto err_out;
+
+ rtnl_notify(msg, net, 0, RTNLGRP_NSID, NULL, 0);
+ return;
+
+err_out:
+ nlmsg_free(msg);
+out:
+ rtnl_set_sk_err(net, RTNLGRP_NSID, err);
+}
+
static int __init net_ns_init(void)
{
struct net_generic *ng;
@@ -648,7 +719,8 @@ static int __init net_ns_init(void)
register_pernet_subsys(&net_ns_ops);
rtnl_register(PF_UNSPEC, RTM_NEWNSID, rtnl_net_newid, NULL, NULL);
- rtnl_register(PF_UNSPEC, RTM_GETNSID, rtnl_net_getid, NULL, NULL);
+ rtnl_register(PF_UNSPEC, RTM_GETNSID, rtnl_net_getid, rtnl_net_dumpid,
+ NULL);
return 0;
}
diff --git a/net/core/request_sock.c b/net/core/request_sock.c
index 04db318e6218..87b22c0bc08c 100644
--- a/net/core/request_sock.c
+++ b/net/core/request_sock.c
@@ -58,14 +58,14 @@ int reqsk_queue_alloc(struct request_sock_queue *queue,
return -ENOMEM;
get_random_bytes(&lopt->hash_rnd, sizeof(lopt->hash_rnd));
- rwlock_init(&queue->syn_wait_lock);
+ spin_lock_init(&queue->syn_wait_lock);
queue->rskq_accept_head = NULL;
lopt->nr_table_entries = nr_table_entries;
lopt->max_qlen_log = ilog2(nr_table_entries);
- write_lock_bh(&queue->syn_wait_lock);
+ spin_lock_bh(&queue->syn_wait_lock);
queue->listen_opt = lopt;
- write_unlock_bh(&queue->syn_wait_lock);
+ spin_unlock_bh(&queue->syn_wait_lock);
return 0;
}
@@ -81,10 +81,10 @@ static inline struct listen_sock *reqsk_queue_yank_listen_sk(
{
struct listen_sock *lopt;
- write_lock_bh(&queue->syn_wait_lock);
+ spin_lock_bh(&queue->syn_wait_lock);
lopt = queue->listen_opt;
queue->listen_opt = NULL;
- write_unlock_bh(&queue->syn_wait_lock);
+ spin_unlock_bh(&queue->syn_wait_lock);
return lopt;
}
@@ -94,21 +94,26 @@ void reqsk_queue_destroy(struct request_sock_queue *queue)
/* make all the listen_opt local to us */
struct listen_sock *lopt = reqsk_queue_yank_listen_sk(queue);
- if (lopt->qlen != 0) {
+ if (listen_sock_qlen(lopt) != 0) {
unsigned int i;
for (i = 0; i < lopt->nr_table_entries; i++) {
struct request_sock *req;
+ spin_lock_bh(&queue->syn_wait_lock);
while ((req = lopt->syn_table[i]) != NULL) {
lopt->syn_table[i] = req->dl_next;
- lopt->qlen--;
- reqsk_free(req);
+ atomic_inc(&lopt->qlen_dec);
+ if (del_timer(&req->rsk_timer))
+ reqsk_put(req);
+ reqsk_put(req);
}
+ spin_unlock_bh(&queue->syn_wait_lock);
}
}
- WARN_ON(lopt->qlen != 0);
+ if (WARN_ON(listen_sock_qlen(lopt) != 0))
+ pr_err("qlen %u\n", listen_sock_qlen(lopt));
kvfree(lopt);
}
@@ -153,24 +158,22 @@ void reqsk_queue_destroy(struct request_sock_queue *queue)
* case might also exist in tcp_v4_hnd_req() that will trigger this locking
* order.
*
- * When a TFO req is created, it needs to sock_hold its listener to prevent
- * the latter data structure from going away.
- *
- * This function also sets "treq->listener" to NULL and unreference listener
- * socket. treq->listener is used by the listener so it is protected by the
+ * This function also sets "treq->tfo_listener" to false.
+ * treq->tfo_listener is used by the listener so it is protected by the
* fastopenq->lock in this function.
*/
void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req,
bool reset)
{
- struct sock *lsk = tcp_rsk(req)->listener;
- struct fastopen_queue *fastopenq =
- inet_csk(lsk)->icsk_accept_queue.fastopenq;
+ struct sock *lsk = req->rsk_listener;
+ struct fastopen_queue *fastopenq;
+
+ fastopenq = inet_csk(lsk)->icsk_accept_queue.fastopenq;
tcp_sk(sk)->fastopen_rsk = NULL;
spin_lock_bh(&fastopenq->lock);
fastopenq->qlen--;
- tcp_rsk(req)->listener = NULL;
+ tcp_rsk(req)->tfo_listener = false;
if (req->sk) /* the child socket hasn't been accepted yet */
goto out;
@@ -179,8 +182,7 @@ void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req,
* special RST handling below.
*/
spin_unlock_bh(&fastopenq->lock);
- sock_put(lsk);
- reqsk_free(req);
+ reqsk_put(req);
return;
}
/* Wait for 60secs before removing a req that has triggered RST.
@@ -190,7 +192,7 @@ void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req,
*
* For more details see CoNext'11 "TCP Fast Open" paper.
*/
- req->expires = jiffies + 60*HZ;
+ req->rsk_timer.expires = jiffies + 60*HZ;
if (fastopenq->rskq_rst_head == NULL)
fastopenq->rskq_rst_head = req;
else
@@ -201,5 +203,4 @@ void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req,
fastopenq->qlen++;
out:
spin_unlock_bh(&fastopenq->lock);
- sock_put(lsk);
}
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 7ebed55b5f7d..358d52a38533 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -818,7 +818,8 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev,
nla_total_size(sizeof(struct ifla_vf_vlan)) +
nla_total_size(sizeof(struct ifla_vf_spoofchk)) +
nla_total_size(sizeof(struct ifla_vf_rate)) +
- nla_total_size(sizeof(struct ifla_vf_link_state)));
+ nla_total_size(sizeof(struct ifla_vf_link_state)) +
+ nla_total_size(sizeof(struct ifla_vf_rss_query_en)));
return size;
} else
return 0;
@@ -982,6 +983,24 @@ static int rtnl_phys_port_id_fill(struct sk_buff *skb, struct net_device *dev)
return 0;
}
+static int rtnl_phys_port_name_fill(struct sk_buff *skb, struct net_device *dev)
+{
+ char name[IFNAMSIZ];
+ int err;
+
+ err = dev_get_phys_port_name(dev, name, sizeof(name));
+ if (err) {
+ if (err == -EOPNOTSUPP)
+ return 0;
+ return err;
+ }
+
+ if (nla_put(skb, IFLA_PHYS_PORT_NAME, strlen(name), name))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
static int rtnl_phys_switch_id_fill(struct sk_buff *skb, struct net_device *dev)
{
int err;
@@ -1037,8 +1056,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
#ifdef CONFIG_RPS
nla_put_u32(skb, IFLA_NUM_RX_QUEUES, dev->num_rx_queues) ||
#endif
- (dev->ifindex != dev->iflink &&
- nla_put_u32(skb, IFLA_LINK, dev->iflink)) ||
+ (dev->ifindex != dev_get_iflink(dev) &&
+ nla_put_u32(skb, IFLA_LINK, dev_get_iflink(dev))) ||
(upper_dev &&
nla_put_u32(skb, IFLA_MASTER, upper_dev->ifindex)) ||
nla_put_u8(skb, IFLA_CARRIER, netif_carrier_ok(dev)) ||
@@ -1072,6 +1091,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
if (rtnl_phys_port_id_fill(skb, dev))
goto nla_put_failure;
+ if (rtnl_phys_port_name_fill(skb, dev))
+ goto nla_put_failure;
+
if (rtnl_phys_switch_id_fill(skb, dev))
goto nla_put_failure;
@@ -1111,14 +1133,16 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
struct ifla_vf_tx_rate vf_tx_rate;
struct ifla_vf_spoofchk vf_spoofchk;
struct ifla_vf_link_state vf_linkstate;
+ struct ifla_vf_rss_query_en vf_rss_query_en;
/*
* Not all SR-IOV capable drivers support the
- * spoofcheck query. Preset to -1 so the user
- * space tool can detect that the driver didn't
- * report anything.
+ * spoofcheck and "RSS query enable" query. Preset to
+ * -1 so the user space tool can detect that the driver
+ * didn't report anything.
*/
ivi.spoofchk = -1;
+ ivi.rss_query_en = -1;
memset(ivi.mac, 0, sizeof(ivi.mac));
/* The default value for VF link state is "auto"
* IFLA_VF_LINK_STATE_AUTO which equals zero
@@ -1131,7 +1155,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
vf_rate.vf =
vf_tx_rate.vf =
vf_spoofchk.vf =
- vf_linkstate.vf = ivi.vf;
+ vf_linkstate.vf =
+ vf_rss_query_en.vf = ivi.vf;
memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac));
vf_vlan.vlan = ivi.vlan;
@@ -1141,6 +1166,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
vf_rate.max_tx_rate = ivi.max_tx_rate;
vf_spoofchk.setting = ivi.spoofchk;
vf_linkstate.link_state = ivi.linkstate;
+ vf_rss_query_en.setting = ivi.rss_query_en;
vf = nla_nest_start(skb, IFLA_VF_INFO);
if (!vf) {
nla_nest_cancel(skb, vfinfo);
@@ -1155,7 +1181,10 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
nla_put(skb, IFLA_VF_SPOOFCHK, sizeof(vf_spoofchk),
&vf_spoofchk) ||
nla_put(skb, IFLA_VF_LINK_STATE, sizeof(vf_linkstate),
- &vf_linkstate))
+ &vf_linkstate) ||
+ nla_put(skb, IFLA_VF_RSS_QUERY_EN,
+ sizeof(vf_rss_query_en),
+ &vf_rss_query_en))
goto nla_put_failure;
nla_nest_end(skb, vf);
}
@@ -1269,6 +1298,7 @@ static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = {
[IFLA_VF_SPOOFCHK] = { .len = sizeof(struct ifla_vf_spoofchk) },
[IFLA_VF_RATE] = { .len = sizeof(struct ifla_vf_rate) },
[IFLA_VF_LINK_STATE] = { .len = sizeof(struct ifla_vf_link_state) },
+ [IFLA_VF_RSS_QUERY_EN] = { .len = sizeof(struct ifla_vf_rss_query_en) },
};
static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = {
@@ -1479,6 +1509,17 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr *attr)
ivl->link_state);
break;
}
+ case IFLA_VF_RSS_QUERY_EN: {
+ struct ifla_vf_rss_query_en *ivrssq_en;
+
+ ivrssq_en = nla_data(vf);
+ err = -EOPNOTSUPP;
+ if (ops->ndo_set_vf_rss_query_en)
+ err = ops->ndo_set_vf_rss_query_en(dev,
+ ivrssq_en->vf,
+ ivrssq_en->setting);
+ break;
+ }
default:
err = -EINVAL;
break;
@@ -1815,6 +1856,42 @@ errout:
return err;
}
+static int rtnl_group_dellink(const struct net *net, int group)
+{
+ struct net_device *dev, *aux;
+ LIST_HEAD(list_kill);
+ bool found = false;
+
+ if (!group)
+ return -EPERM;
+
+ for_each_netdev(net, dev) {
+ if (dev->group == group) {
+ const struct rtnl_link_ops *ops;
+
+ found = true;
+ ops = dev->rtnl_link_ops;
+ if (!ops || !ops->dellink)
+ return -EOPNOTSUPP;
+ }
+ }
+
+ if (!found)
+ return -ENODEV;
+
+ for_each_netdev_safe(net, dev, aux) {
+ if (dev->group == group) {
+ const struct rtnl_link_ops *ops;
+
+ ops = dev->rtnl_link_ops;
+ ops->dellink(dev, &list_kill);
+ }
+ }
+ unregister_netdevice_many(&list_kill);
+
+ return 0;
+}
+
static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct net *net = sock_net(skb->sk);
@@ -1838,6 +1915,8 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh)
dev = __dev_get_by_index(net, ifm->ifi_index);
else if (tb[IFLA_IFNAME])
dev = __dev_get_by_name(net, ifname);
+ else if (tb[IFLA_GROUP])
+ return rtnl_group_dellink(net, nla_get_u32(tb[IFLA_GROUP]));
else
return -EINVAL;
@@ -1873,7 +1952,7 @@ int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm)
EXPORT_SYMBOL(rtnl_configure_link);
struct net_device *rtnl_create_link(struct net *net,
- char *ifname, unsigned char name_assign_type,
+ const char *ifname, unsigned char name_assign_type,
const struct rtnl_link_ops *ops, struct nlattr *tb[])
{
int err;
@@ -2345,7 +2424,7 @@ EXPORT_SYMBOL(rtmsg_ifinfo);
static int nlmsg_populate_fdb_fill(struct sk_buff *skb,
struct net_device *dev,
- u8 *addr, u32 pid, u32 seq,
+ u8 *addr, u16 vid, u32 pid, u32 seq,
int type, unsigned int flags,
int nlflags)
{
@@ -2367,6 +2446,9 @@ static int nlmsg_populate_fdb_fill(struct sk_buff *skb,
if (nla_put(skb, NDA_LLADDR, ETH_ALEN, addr))
goto nla_put_failure;
+ if (vid)
+ if (nla_put(skb, NDA_VLAN, sizeof(u16), &vid))
+ goto nla_put_failure;
nlmsg_end(skb, nlh);
return 0;
@@ -2381,7 +2463,7 @@ static inline size_t rtnl_fdb_nlmsg_size(void)
return NLMSG_ALIGN(sizeof(struct ndmsg)) + nla_total_size(ETH_ALEN);
}
-static void rtnl_fdb_notify(struct net_device *dev, u8 *addr, int type)
+static void rtnl_fdb_notify(struct net_device *dev, u8 *addr, u16 vid, int type)
{
struct net *net = dev_net(dev);
struct sk_buff *skb;
@@ -2391,7 +2473,8 @@ static void rtnl_fdb_notify(struct net_device *dev, u8 *addr, int type)
if (!skb)
goto errout;
- err = nlmsg_populate_fdb_fill(skb, dev, addr, 0, 0, type, NTF_SELF, 0);
+ err = nlmsg_populate_fdb_fill(skb, dev, addr, vid,
+ 0, 0, type, NTF_SELF, 0);
if (err < 0) {
kfree_skb(skb);
goto errout;
@@ -2526,7 +2609,7 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh)
nlh->nlmsg_flags);
if (!err) {
- rtnl_fdb_notify(dev, addr, RTM_NEWNEIGH);
+ rtnl_fdb_notify(dev, addr, vid, RTM_NEWNEIGH);
ndm->ndm_flags &= ~NTF_SELF;
}
}
@@ -2627,7 +2710,7 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh)
err = ndo_dflt_fdb_del(ndm, tb, dev, addr, vid);
if (!err) {
- rtnl_fdb_notify(dev, addr, RTM_DELNEIGH);
+ rtnl_fdb_notify(dev, addr, vid, RTM_DELNEIGH);
ndm->ndm_flags &= ~NTF_SELF;
}
}
@@ -2652,7 +2735,7 @@ static int nlmsg_populate_fdb(struct sk_buff *skb,
if (*idx < cb->args[0])
goto skip;
- err = nlmsg_populate_fdb_fill(skb, dev, ha->addr,
+ err = nlmsg_populate_fdb_fill(skb, dev, ha->addr, 0,
portid, seq,
RTM_NEWNEIGH, NTF_SELF,
NLM_F_MULTI);
@@ -2695,7 +2778,6 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
struct net_device *dev;
struct nlattr *tb[IFLA_MAX+1];
- struct net_device *bdev = NULL;
struct net_device *br_dev = NULL;
const struct net_device_ops *ops = NULL;
const struct net_device_ops *cops = NULL;
@@ -2719,7 +2801,6 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
return -ENODEV;
ops = br_dev->netdev_ops;
- bdev = br_dev;
}
for_each_netdev(net, dev) {
@@ -2732,7 +2813,6 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
cops = br_dev->netdev_ops;
}
- bdev = dev;
} else {
if (dev != br_dev &&
!(dev->priv_flags & IFF_BRIDGE_PORT))
@@ -2742,7 +2822,6 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
!(dev->priv_flags & IFF_EBRIDGE))
continue;
- bdev = br_dev;
cops = ops;
}
@@ -2804,8 +2883,8 @@ int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
nla_put_u32(skb, IFLA_MASTER, br_dev->ifindex)) ||
(dev->addr_len &&
nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr)) ||
- (dev->ifindex != dev->iflink &&
- nla_put_u32(skb, IFLA_LINK, dev->iflink)))
+ (dev->ifindex != dev_get_iflink(dev) &&
+ nla_put_u32(skb, IFLA_LINK, dev_get_iflink(dev))))
goto nla_put_failure;
br_afspec = nla_nest_start(skb, IFLA_AF_SPEC);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 8e4ac97c8477..3b6e5830256e 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2865,7 +2865,6 @@ static void skb_ts_finish(struct ts_config *conf, struct ts_state *state)
* @from: search offset
* @to: search limit
* @config: textsearch configuration
- * @state: uninitialized textsearch state variable
*
* Finds a pattern in the skb data according to the specified
* textsearch configuration. Use textsearch_next() to retrieve
@@ -2873,17 +2872,17 @@ static void skb_ts_finish(struct ts_config *conf, struct ts_state *state)
* to the first occurrence or UINT_MAX if no match was found.
*/
unsigned int skb_find_text(struct sk_buff *skb, unsigned int from,
- unsigned int to, struct ts_config *config,
- struct ts_state *state)
+ unsigned int to, struct ts_config *config)
{
+ struct ts_state state;
unsigned int ret;
config->get_next_block = skb_ts_get_next_block;
config->finish = skb_ts_finish;
- skb_prepare_seq_read(skb, from, to, TS_SKB_CB(state));
+ skb_prepare_seq_read(skb, from, to, TS_SKB_CB(&state));
- ret = textsearch_find(config, state);
+ ret = textsearch_find(config, &state);
return (ret <= to - from ? ret : UINT_MAX);
}
EXPORT_SYMBOL(skb_find_text);
@@ -3207,10 +3206,9 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
struct skb_shared_info *pinfo, *skbinfo = skb_shinfo(skb);
unsigned int offset = skb_gro_offset(skb);
unsigned int headlen = skb_headlen(skb);
- struct sk_buff *nskb, *lp, *p = *head;
unsigned int len = skb_gro_len(skb);
+ struct sk_buff *lp, *p = *head;
unsigned int delta_truesize;
- unsigned int headroom;
if (unlikely(p->len + len >= 65536))
return -E2BIG;
@@ -3277,48 +3275,6 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
NAPI_GRO_CB(skb)->free = NAPI_GRO_FREE_STOLEN_HEAD;
goto done;
}
- /* switch back to head shinfo */
- pinfo = skb_shinfo(p);
-
- if (pinfo->frag_list)
- goto merge;
- if (skb_gro_len(p) != pinfo->gso_size)
- return -E2BIG;
-
- headroom = skb_headroom(p);
- nskb = alloc_skb(headroom + skb_gro_offset(p), GFP_ATOMIC);
- if (unlikely(!nskb))
- return -ENOMEM;
-
- __copy_skb_header(nskb, p);
- nskb->mac_len = p->mac_len;
-
- skb_reserve(nskb, headroom);
- __skb_put(nskb, skb_gro_offset(p));
-
- skb_set_mac_header(nskb, skb_mac_header(p) - p->data);
- skb_set_network_header(nskb, skb_network_offset(p));
- skb_set_transport_header(nskb, skb_transport_offset(p));
-
- __skb_pull(p, skb_gro_offset(p));
- memcpy(skb_mac_header(nskb), skb_mac_header(p),
- p->data - skb_mac_header(p));
-
- skb_shinfo(nskb)->frag_list = p;
- skb_shinfo(nskb)->gso_size = pinfo->gso_size;
- pinfo->gso_size = 0;
- __skb_header_release(p);
- NAPI_GRO_CB(nskb)->last = p;
-
- nskb->data_len += p->len;
- nskb->truesize += p->truesize;
- nskb->len += p->len;
-
- *head = nskb;
- nskb->next = p->next;
- p->next = NULL;
-
- p = nskb;
merge:
delta_truesize = skb->truesize;
@@ -3796,7 +3752,6 @@ void skb_complete_wifi_ack(struct sk_buff *skb, bool acked)
}
EXPORT_SYMBOL_GPL(skb_complete_wifi_ack);
-
/**
* skb_partial_csum_set - set up and verify partial csum values for packet
* @skb: the skb to set
diff --git a/net/core/sock.c b/net/core/sock.c
index 71e3e5f1eaa0..e891bcf325ca 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -466,7 +466,7 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
skb_dst_force(skb);
spin_lock_irqsave(&list->lock, flags);
- skb->dropcount = atomic_read(&sk->sk_drops);
+ sock_skb_set_dropcount(sk, skb);
__skb_queue_tail(list, skb);
spin_unlock_irqrestore(&list->lock, flags);
@@ -947,8 +947,6 @@ set_rcvbuf:
sk->sk_mark = val;
break;
- /* We implement the SO_SNDLOWAT etc to
- not be settable (1003.1g 5.3) */
case SO_RXQ_OVFL:
sock_valbool_flag(sk, SOCK_RXQ_OVFL, valbool);
break;
@@ -1253,6 +1251,9 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
break;
default:
+ /* We implement the SO_SNDLOWAT etc to not be settable
+ * (1003.1g 7).
+ */
return -ENOPROTOOPT;
}
@@ -1473,9 +1474,8 @@ void sk_release_kernel(struct sock *sk)
return;
sock_hold(sk);
- sock_release(sk->sk_socket);
- release_net(sock_net(sk));
sock_net_set(sk, get_net(&init_net));
+ sock_release(sk->sk_socket);
sock_put(sk);
}
EXPORT_SYMBOL(sk_release_kernel);
@@ -1557,6 +1557,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
newsk->sk_err = 0;
newsk->sk_priority = 0;
newsk->sk_incoming_cpu = raw_smp_processor_id();
+ atomic64_set(&newsk->sk_cookie, 0);
/*
* Before updating sk_refcnt, we must commit prior changes to memory
* (Documentation/RCU/rculist_nulls.txt for details)
@@ -1684,19 +1685,6 @@ void sock_efree(struct sk_buff *skb)
}
EXPORT_SYMBOL(sock_efree);
-#ifdef CONFIG_INET
-void sock_edemux(struct sk_buff *skb)
-{
- struct sock *sk = skb->sk;
-
- if (sk->sk_state == TCP_TIME_WAIT)
- inet_twsk_put(inet_twsk(sk));
- else
- sock_put(sk);
-}
-EXPORT_SYMBOL(sock_edemux);
-#endif
-
kuid_t sock_i_uid(struct sock *sk)
{
kuid_t uid;
@@ -2186,15 +2174,14 @@ int sock_no_getsockopt(struct socket *sock, int level, int optname,
}
EXPORT_SYMBOL(sock_no_getsockopt);
-int sock_no_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
- size_t len)
+int sock_no_sendmsg(struct socket *sock, struct msghdr *m, size_t len)
{
return -EOPNOTSUPP;
}
EXPORT_SYMBOL(sock_no_sendmsg);
-int sock_no_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
- size_t len, int flags)
+int sock_no_recvmsg(struct socket *sock, struct msghdr *m, size_t len,
+ int flags)
{
return -EOPNOTSUPP;
}
@@ -2566,14 +2553,14 @@ int compat_sock_common_getsockopt(struct socket *sock, int level, int optname,
EXPORT_SYMBOL(compat_sock_common_getsockopt);
#endif
-int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock,
- struct msghdr *msg, size_t size, int flags)
+int sock_common_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
+ int flags)
{
struct sock *sk = sock->sk;
int addr_len = 0;
int err;
- err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT,
+ err = sk->sk_prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT,
flags & ~MSG_DONTWAIT, &addr_len);
if (err >= 0)
msg->msg_namelen = addr_len;
@@ -2750,6 +2737,42 @@ static inline void release_proto_idx(struct proto *prot)
}
#endif
+static void req_prot_cleanup(struct request_sock_ops *rsk_prot)
+{
+ if (!rsk_prot)
+ return;
+ kfree(rsk_prot->slab_name);
+ rsk_prot->slab_name = NULL;
+ if (rsk_prot->slab) {
+ kmem_cache_destroy(rsk_prot->slab);
+ rsk_prot->slab = NULL;
+ }
+}
+
+static int req_prot_init(const struct proto *prot)
+{
+ struct request_sock_ops *rsk_prot = prot->rsk_prot;
+
+ if (!rsk_prot)
+ return 0;
+
+ rsk_prot->slab_name = kasprintf(GFP_KERNEL, "request_sock_%s",
+ prot->name);
+ if (!rsk_prot->slab_name)
+ return -ENOMEM;
+
+ rsk_prot->slab = kmem_cache_create(rsk_prot->slab_name,
+ rsk_prot->obj_size, 0,
+ 0, NULL);
+
+ if (!rsk_prot->slab) {
+ pr_crit("%s: Can't create request sock SLAB cache!\n",
+ prot->name);
+ return -ENOMEM;
+ }
+ return 0;
+}
+
int proto_register(struct proto *prot, int alloc_slab)
{
if (alloc_slab) {
@@ -2763,21 +2786,8 @@ int proto_register(struct proto *prot, int alloc_slab)
goto out;
}
- if (prot->rsk_prot != NULL) {
- prot->rsk_prot->slab_name = kasprintf(GFP_KERNEL, "request_sock_%s", prot->name);
- if (prot->rsk_prot->slab_name == NULL)
- goto out_free_sock_slab;
-
- prot->rsk_prot->slab = kmem_cache_create(prot->rsk_prot->slab_name,
- prot->rsk_prot->obj_size, 0,
- SLAB_HWCACHE_ALIGN, NULL);
-
- if (prot->rsk_prot->slab == NULL) {
- pr_crit("%s: Can't create request sock SLAB cache!\n",
- prot->name);
- goto out_free_request_sock_slab_name;
- }
- }
+ if (req_prot_init(prot))
+ goto out_free_request_sock_slab;
if (prot->twsk_prot != NULL) {
prot->twsk_prot->twsk_slab_name = kasprintf(GFP_KERNEL, "tw_sock_%s", prot->name);
@@ -2789,8 +2799,7 @@ int proto_register(struct proto *prot, int alloc_slab)
kmem_cache_create(prot->twsk_prot->twsk_slab_name,
prot->twsk_prot->twsk_obj_size,
0,
- SLAB_HWCACHE_ALIGN |
- prot->slab_flags,
+ prot->slab_flags,
NULL);
if (prot->twsk_prot->twsk_slab == NULL)
goto out_free_timewait_sock_slab_name;
@@ -2806,14 +2815,8 @@ int proto_register(struct proto *prot, int alloc_slab)
out_free_timewait_sock_slab_name:
kfree(prot->twsk_prot->twsk_slab_name);
out_free_request_sock_slab:
- if (prot->rsk_prot && prot->rsk_prot->slab) {
- kmem_cache_destroy(prot->rsk_prot->slab);
- prot->rsk_prot->slab = NULL;
- }
-out_free_request_sock_slab_name:
- if (prot->rsk_prot)
- kfree(prot->rsk_prot->slab_name);
-out_free_sock_slab:
+ req_prot_cleanup(prot->rsk_prot);
+
kmem_cache_destroy(prot->slab);
prot->slab = NULL;
out:
@@ -2833,11 +2836,7 @@ void proto_unregister(struct proto *prot)
prot->slab = NULL;
}
- if (prot->rsk_prot != NULL && prot->rsk_prot->slab != NULL) {
- kmem_cache_destroy(prot->rsk_prot->slab);
- kfree(prot->rsk_prot->slab_name);
- prot->rsk_prot->slab = NULL;
- }
+ req_prot_cleanup(prot->rsk_prot);
if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) {
kmem_cache_destroy(prot->twsk_prot->twsk_slab);
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index ad704c757bb4..74dddf84adcd 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -13,22 +13,39 @@ static const struct sock_diag_handler *sock_diag_handlers[AF_MAX];
static int (*inet_rcv_compat)(struct sk_buff *skb, struct nlmsghdr *nlh);
static DEFINE_MUTEX(sock_diag_table_mutex);
-int sock_diag_check_cookie(void *sk, __u32 *cookie)
+static u64 sock_gen_cookie(struct sock *sk)
{
- if ((cookie[0] != INET_DIAG_NOCOOKIE ||
- cookie[1] != INET_DIAG_NOCOOKIE) &&
- ((u32)(unsigned long)sk != cookie[0] ||
- (u32)((((unsigned long)sk) >> 31) >> 1) != cookie[1]))
- return -ESTALE;
- else
+ while (1) {
+ u64 res = atomic64_read(&sk->sk_cookie);
+
+ if (res)
+ return res;
+ res = atomic64_inc_return(&sock_net(sk)->cookie_gen);
+ atomic64_cmpxchg(&sk->sk_cookie, 0, res);
+ }
+}
+
+int sock_diag_check_cookie(struct sock *sk, const __u32 *cookie)
+{
+ u64 res;
+
+ if (cookie[0] == INET_DIAG_NOCOOKIE && cookie[1] == INET_DIAG_NOCOOKIE)
return 0;
+
+ res = sock_gen_cookie(sk);
+ if ((u32)res != cookie[0] || (u32)(res >> 32) != cookie[1])
+ return -ESTALE;
+
+ return 0;
}
EXPORT_SYMBOL_GPL(sock_diag_check_cookie);
-void sock_diag_save_cookie(void *sk, __u32 *cookie)
+void sock_diag_save_cookie(struct sock *sk, __u32 *cookie)
{
- cookie[0] = (u32)(unsigned long)sk;
- cookie[1] = (u32)(((unsigned long)sk >> 31) >> 1);
+ u64 res = sock_gen_cookie(sk);
+
+ cookie[0] = (u32)res;
+ cookie[1] = (u32)(res >> 32);
}
EXPORT_SYMBOL_GPL(sock_diag_save_cookie);
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 8ce351ffceb1..95b6139d710c 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -24,7 +24,6 @@
static int zero = 0;
static int one = 1;
-static int ushort_max = USHRT_MAX;
static int min_sndbuf = SOCK_MIN_SNDBUF;
static int min_rcvbuf = SOCK_MIN_RCVBUF;
@@ -403,7 +402,6 @@ static struct ctl_table netns_core_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.extra1 = &zero,
- .extra2 = &ushort_max,
.proc_handler = proc_dointvec_minmax
},
{ }