summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan_dev.c4
-rw-r--r--net/9p/client.c21
-rw-r--r--net/9p/protocol.c3
-rw-r--r--net/appletalk/atalk_proc.c2
-rw-r--r--net/appletalk/ddp.c37
-rw-r--r--net/appletalk/sysctl_net_atalk.c5
-rw-r--r--net/atm/lec.c6
-rw-r--r--net/ax25/af_ax25.c11
-rw-r--r--net/ax25/ax25_dev.c2
-rw-r--r--net/ax25/ax25_ip.c4
-rw-r--r--net/ax25/ax25_route.c19
-rw-r--r--net/batman-adv/bat_v_elp.c5
-rw-r--r--net/batman-adv/bridge_loop_avoidance.c16
-rw-r--r--net/batman-adv/distributed-arp-table.c4
-rw-r--r--net/batman-adv/hard-interface.c5
-rw-r--r--net/batman-adv/main.c1
-rw-r--r--net/batman-adv/multicast.c11
-rw-r--r--net/batman-adv/soft-interface.c4
-rw-r--r--net/batman-adv/translation-table.c32
-rw-r--r--net/batman-adv/types.h5
-rw-r--r--net/bluetooth/af_bluetooth.c16
-rw-r--r--net/bluetooth/hci_conn.c8
-rw-r--r--net/bluetooth/hci_core.c7
-rw-r--r--net/bluetooth/hci_event.c18
-rw-r--r--net/bluetooth/hci_request.c5
-rw-r--r--net/bluetooth/hci_request.h1
-rw-r--r--net/bluetooth/hci_sock.c3
-rw-r--r--net/bluetooth/hidp/sock.c1
-rw-r--r--net/bluetooth/l2cap_core.c83
-rw-r--r--net/bluetooth/l2cap_sock.c2
-rw-r--r--net/bluetooth/rfcomm/sock.c2
-rw-r--r--net/bluetooth/sco.c2
-rw-r--r--net/bluetooth/smp.c23
-rw-r--r--net/bridge/br_fdb.c5
-rw-r--r--net/bridge/br_forward.c10
-rw-r--r--net/bridge/br_if.c13
-rw-r--r--net/bridge/br_input.c23
-rw-r--r--net/bridge/br_multicast.c13
-rw-r--r--net/bridge/br_netfilter_hooks.c8
-rw-r--r--net/bridge/br_netfilter_ipv6.c3
-rw-r--r--net/bridge/netfilter/ebtables.c149
-rw-r--r--net/bridge/netfilter/nft_reject_bridge.c1
-rw-r--r--net/can/bcm.c27
-rw-r--r--net/can/gw.c30
-rw-r--r--net/ceph/ceph_common.c18
-rw-r--r--net/ceph/messenger.c20
-rw-r--r--net/ceph/mon_client.c9
-rw-r--r--net/compat.c15
-rw-r--r--net/core/datagram.c2
-rw-r--r--net/core/dev.c37
-rw-r--r--net/core/ethtool.c63
-rw-r--r--net/core/failover.c6
-rw-r--r--net/core/filter.c42
-rw-r--r--net/core/gen_stats.c2
-rw-r--r--net/core/gro_cells.c23
-rw-r--r--net/core/lwt_bpf.c1
-rw-r--r--net/core/neighbour.c9
-rw-r--r--net/core/net-sysfs.c9
-rw-r--r--net/core/net_namespace.c1
-rw-r--r--net/core/pktgen.c11
-rw-r--r--net/core/rtnetlink.c16
-rw-r--r--net/core/skbuff.c23
-rw-r--r--net/core/sock.c16
-rw-r--r--net/core/sysctl_net_core.c10
-rw-r--r--net/dccp/ccid.h4
-rw-r--r--net/dccp/ipv6.c4
-rw-r--r--net/dsa/dsa.c11
-rw-r--r--net/dsa/master.c4
-rw-r--r--net/dsa/slave.c17
-rw-r--r--net/hsr/hsr_device.c18
-rw-r--r--net/hsr/hsr_framereg.c12
-rw-r--r--net/hsr/hsr_framereg.h1
-rw-r--r--net/ieee802154/6lowpan/tx.c3
-rw-r--r--net/ipv4/cipso_ipv4.c20
-rw-r--r--net/ipv4/esp4.c22
-rw-r--r--net/ipv4/fib_frontend.c8
-rw-r--r--net/ipv4/fib_trie.c15
-rw-r--r--net/ipv4/fou.c4
-rw-r--r--net/ipv4/gre_demux.c17
-rw-r--r--net/ipv4/icmp.c7
-rw-r--r--net/ipv4/igmp.c47
-rw-r--r--net/ipv4/inet_diag.c14
-rw-r--r--net/ipv4/inet_fragment.c293
-rw-r--r--net/ipv4/inetpeer.c1
-rw-r--r--net/ipv4/ip_forward.c1
-rw-r--r--net/ipv4/ip_fragment.c296
-rw-r--r--net/ipv4/ip_gre.c63
-rw-r--r--net/ipv4/ip_input.c17
-rw-r--r--net/ipv4/ip_options.c26
-rw-r--r--net/ipv4/ip_output.c1
-rw-r--r--net/ipv4/ip_sockglue.c12
-rw-r--r--net/ipv4/ip_tunnel.c9
-rw-r--r--net/ipv4/ip_vti.c63
-rw-r--r--net/ipv4/ipmr.c4
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c178
-rw-r--r--net/ipv4/netfilter/nf_nat_snmp_basic_main.c7
-rw-r--r--net/ipv4/netlink.c17
-rw-r--r--net/ipv4/raw.c4
-rw-r--r--net/ipv4/route.c86
-rw-r--r--net/ipv4/syncookies.c7
-rw-r--r--net/ipv4/sysctl_net_ipv4.c5
-rw-r--r--net/ipv4/tcp.c9
-rw-r--r--net/ipv4/tcp_dctcp.c36
-rw-r--r--net/ipv4/tcp_input.c18
-rw-r--r--net/ipv4/tcp_ipv4.c19
-rw-r--r--net/ipv4/tcp_timer.c2
-rw-r--r--net/ipv4/udp.c16
-rw-r--r--net/ipv4/xfrm4_policy.c24
-rw-r--r--net/ipv6/addrconf.c7
-rw-r--r--net/ipv6/af_inet6.c17
-rw-r--r--net/ipv6/datagram.c11
-rw-r--r--net/ipv6/esp6.c2
-rw-r--r--net/ipv6/icmp.c8
-rw-r--r--net/ipv6/ila/ila_xlat.c1
-rw-r--r--net/ipv6/ip6_fib.c16
-rw-r--r--net/ipv6/ip6_flowlabel.c22
-rw-r--r--net/ipv6/ip6_gre.c93
-rw-r--r--net/ipv6/ip6_output.c5
-rw-r--r--net/ipv6/ip6_tunnel.c15
-rw-r--r--net/ipv6/ip6_udp_tunnel.c3
-rw-r--r--net/ipv6/ip6_vti.c9
-rw-r--r--net/ipv6/ip6mr.c36
-rw-r--r--net/ipv6/netfilter.c4
-rw-r--r--net/ipv6/netfilter/ip6t_srh.c6
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c260
-rw-r--r--net/ipv6/output_core.c30
-rw-r--r--net/ipv6/raw.c27
-rw-r--r--net/ipv6/reassembly.c239
-rw-r--r--net/ipv6/route.c170
-rw-r--r--net/ipv6/seg6.c4
-rw-r--r--net/ipv6/seg6_iptunnel.c2
-rw-r--r--net/ipv6/sit.c18
-rw-r--r--net/ipv6/tcp_ipv6.c8
-rw-r--r--net/ipv6/udp.c24
-rw-r--r--net/ipv6/xfrm6_tunnel.c9
-rw-r--r--net/kcm/kcmsock.c16
-rw-r--r--net/key/af_key.c46
-rw-r--r--net/l2tp/l2tp_core.c19
-rw-r--r--net/l2tp/l2tp_core.h20
-rw-r--r--net/l2tp/l2tp_ip.c3
-rw-r--r--net/l2tp/l2tp_ip6.c7
-rw-r--r--net/llc/llc_output.c2
-rw-r--r--net/mac80211/agg-tx.c4
-rw-r--r--net/mac80211/cfg.c10
-rw-r--r--net/mac80211/debugfs_netdev.c2
-rw-r--r--net/mac80211/driver-ops.h3
-rw-r--r--net/mac80211/iface.c6
-rw-r--r--net/mac80211/key.c9
-rw-r--r--net/mac80211/main.c2
-rw-r--r--net/mac80211/mesh.h6
-rw-r--r--net/mac80211/mesh_pathtbl.c157
-rw-r--r--net/mac80211/mlme.c3
-rw-r--r--net/mac80211/rx.c18
-rw-r--r--net/mac80211/status.c5
-rw-r--r--net/mac80211/trace_msg.h7
-rw-r--r--net/mac80211/tx.c19
-rw-r--r--net/mac80211/util.c6
-rw-r--r--net/mpls/af_mpls.c3
-rw-r--r--net/netfilter/Kconfig1
-rw-r--r--net/netfilter/core.c6
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ipmac.c10
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipmac.c16
-rw-r--r--net/netfilter/ipset/ip_set_hash_mac.c10
-rw-r--r--net/netfilter/ipset/ip_set_list_set.c2
-rw-r--r--net/netfilter/ipvs/Kconfig1
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c12
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c23
-rw-r--r--net/netfilter/nf_conncount.c292
-rw-r--r--net/netfilter/nf_conntrack_core.c65
-rw-r--r--net/netfilter/nf_conntrack_netlink.c34
-rw-r--r--net/netfilter/nf_conntrack_proto.c2
-rw-r--r--net/netfilter/nf_conntrack_proto_gre.c14
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c50
-rw-r--r--net/netfilter/nf_conntrack_seqadj.c7
-rw-r--r--net/netfilter/nf_flow_table_core.c5
-rw-r--r--net/netfilter/nf_nat_core.c3
-rw-r--r--net/netfilter/nf_tables_api.c148
-rw-r--r--net/netfilter/nf_tables_core.c14
-rw-r--r--net/netfilter/nfnetlink_cttimeout.c57
-rw-r--r--net/netfilter/nfnetlink_osf.c4
-rw-r--r--net/netfilter/nft_compat.c193
-rw-r--r--net/netfilter/nft_connlimit.c14
-rw-r--r--net/netfilter/nft_dynset.c22
-rw-r--r--net/netfilter/nft_flow_offload.c13
-rw-r--r--net/netfilter/nft_immediate.c6
-rw-r--r--net/netfilter/nft_lookup.c21
-rw-r--r--net/netfilter/nft_objref.c40
-rw-r--r--net/netfilter/nft_set_rbtree.c7
-rw-r--r--net/netfilter/x_tables.c2
-rw-r--r--net/netfilter/xt_TEE.c76
-rw-r--r--net/netfilter/xt_cgroup.c72
-rw-r--r--net/netfilter/xt_physdev.c9
-rw-r--r--net/netlabel/netlabel_kapi.c3
-rw-r--r--net/netlink/genetlink.c3
-rw-r--r--net/netrom/af_netrom.c91
-rw-r--r--net/netrom/nr_loopback.c2
-rw-r--r--net/netrom/nr_route.c2
-rw-r--r--net/netrom/nr_timer.c20
-rw-r--r--net/netrom/sysctl_net_netrom.c5
-rw-r--r--net/nfc/llcp_commands.c20
-rw-r--r--net/nfc/llcp_core.c24
-rw-r--r--net/nfc/nci/hci.c8
-rw-r--r--net/openvswitch/flow_netlink.c6
-rw-r--r--net/packet/af_packet.c56
-rw-r--r--net/phonet/pep.c32
-rw-r--r--net/rds/af_rds.c3
-rw-r--r--net/rds/bind.c8
-rw-r--r--net/rds/ib_fmr.c11
-rw-r--r--net/rds/ib_rdma.c13
-rw-r--r--net/rds/rdma.c63
-rw-r--r--net/rds/rds.h20
-rw-r--r--net/rds/send.c50
-rw-r--r--net/rds/tcp.c2
-rw-r--r--net/rose/rose_loopback.c27
-rw-r--r--net/rose/rose_route.c5
-rw-r--r--net/rose/rose_subr.c21
-rw-r--r--net/rxrpc/call_object.c32
-rw-r--r--net/rxrpc/conn_client.c24
-rw-r--r--net/rxrpc/input.c12
-rw-r--r--net/rxrpc/local_object.c3
-rw-r--r--net/rxrpc/recvmsg.c3
-rw-r--r--net/sched/act_api.c3
-rw-r--r--net/sched/act_ipt.c3
-rw-r--r--net/sched/act_mirred.c3
-rw-r--r--net/sched/act_sample.c10
-rw-r--r--net/sched/act_skbedit.c3
-rw-r--r--net/sched/act_tunnel_key.c22
-rw-r--r--net/sched/cls_api.c3
-rw-r--r--net/sched/cls_flower.c62
-rw-r--r--net/sched/cls_matchall.c5
-rw-r--r--net/sched/cls_tcindex.c80
-rw-r--r--net/sched/sch_cake.c57
-rw-r--r--net/sched/sch_generic.c13
-rw-r--r--net/sched/sch_netem.c10
-rw-r--r--net/sctp/diag.c1
-rw-r--r--net/sctp/ipv6.c7
-rw-r--r--net/sctp/offload.c1
-rw-r--r--net/sctp/protocol.c8
-rw-r--r--net/sctp/sm_make_chunk.c24
-rw-r--r--net/sctp/sm_sideeffect.c34
-rw-r--r--net/sctp/sm_statefuns.c35
-rw-r--r--net/sctp/socket.c17
-rw-r--r--net/sctp/stream.c84
-rw-r--r--net/smc/af_smc.c18
-rw-r--r--net/smc/smc.h10
-rw-r--r--net/socket.c83
-rw-r--r--net/strparser/strparser.c12
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_seqnum.c49
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c8
-rw-r--r--net/sunrpc/cache.c13
-rw-r--r--net/sunrpc/clnt.c6
-rw-r--r--net/sunrpc/rpcb_clnt.c8
-rw-r--r--net/sunrpc/svc.c11
-rw-r--r--net/sunrpc/svc_xprt.c5
-rw-r--r--net/sunrpc/svcsock.c24
-rw-r--r--net/sunrpc/xprt.c11
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c105
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c13
-rw-r--r--net/sunrpc/xprtrdma/verbs.c9
-rw-r--r--net/sunrpc/xprtsock.c4
-rw-r--r--net/tipc/bearer.c1
-rw-r--r--net/tipc/core.c32
-rw-r--r--net/tipc/name_table.c3
-rw-r--r--net/tipc/net.c5
-rw-r--r--net/tipc/netlink_compat.c70
-rw-r--r--net/tipc/node.c6
-rw-r--r--net/tipc/socket.c72
-rw-r--r--net/tipc/subscr.h5
-rw-r--r--net/tipc/topsrv.c17
-rw-r--r--net/tipc/udp_media.c9
-rw-r--r--net/tls/tls_device.c84
-rw-r--r--net/tls/tls_device_fallback.c16
-rw-r--r--net/tls/tls_main.c13
-rw-r--r--net/tls/tls_sw.c3
-rw-r--r--net/unix/af_unix.c57
-rw-r--r--net/unix/diag.c3
-rw-r--r--net/vmw_vsock/virtio_transport.c28
-rw-r--r--net/vmw_vsock/virtio_transport_common.c29
-rw-r--r--net/vmw_vsock/vmci_transport.c71
-rw-r--r--net/wireless/nl80211.c27
-rw-r--r--net/wireless/reg.c43
-rw-r--r--net/x25/af_x25.c26
-rw-r--r--net/xdp/xdp_umem.c19
-rw-r--r--net/xfrm/xfrm_input.c7
-rw-r--r--net/xfrm/xfrm_interface.c21
-rw-r--r--net/xfrm/xfrm_output.c1
-rw-r--r--net/xfrm/xfrm_policy.c11
-rw-r--r--net/xfrm/xfrm_state.c40
-rw-r--r--net/xfrm/xfrm_user.c35
289 files changed, 4492 insertions, 2800 deletions
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 546af0e73ac3..fce3b7eebffb 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -368,10 +368,12 @@ static int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
ifrr.ifr_ifru = ifr->ifr_ifru;
switch (cmd) {
+ case SIOCSHWTSTAMP:
+ if (!net_eq(dev_net(dev), &init_net))
+ break;
case SIOCGMIIPHY:
case SIOCGMIIREG:
case SIOCSMIIREG:
- case SIOCSHWTSTAMP:
case SIOCGHWTSTAMP:
if (netif_device_present(real_dev) && ops->ndo_do_ioctl)
err = ops->ndo_do_ioctl(real_dev, &ifrr, cmd);
diff --git a/net/9p/client.c b/net/9p/client.c
index deae53a7dffc..23ec6187dc07 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -181,6 +181,12 @@ static int parse_opts(char *opts, struct p9_client *clnt)
ret = r;
continue;
}
+ if (option < 4096) {
+ p9_debug(P9_DEBUG_ERROR,
+ "msize should be at least 4k\n");
+ ret = -EINVAL;
+ continue;
+ }
clnt->msize = option;
break;
case Opt_trans:
@@ -993,10 +999,18 @@ static int p9_client_version(struct p9_client *c)
else if (!strncmp(version, "9P2000", 6))
c->proto_version = p9_proto_legacy;
else {
+ p9_debug(P9_DEBUG_ERROR,
+ "server returned an unknown version: %s\n", version);
err = -EREMOTEIO;
goto error;
}
+ if (msize < 4096) {
+ p9_debug(P9_DEBUG_ERROR,
+ "server returned a msize < 4096: %d\n", msize);
+ err = -EREMOTEIO;
+ goto error;
+ }
if (msize < c->msize)
c->msize = msize;
@@ -1055,6 +1069,13 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
if (clnt->msize > clnt->trans_mod->maxsize)
clnt->msize = clnt->trans_mod->maxsize;
+ if (clnt->msize < 4096) {
+ p9_debug(P9_DEBUG_ERROR,
+ "Please specify a msize of at least 4k\n");
+ err = -EINVAL;
+ goto close_trans;
+ }
+
err = p9_client_version(clnt);
if (err)
goto close_trans;
diff --git a/net/9p/protocol.c b/net/9p/protocol.c
index ee32bbf12675..b4d80c533f89 100644
--- a/net/9p/protocol.c
+++ b/net/9p/protocol.c
@@ -571,9 +571,10 @@ int p9stat_read(struct p9_client *clnt, char *buf, int len, struct p9_wstat *st)
if (ret) {
p9_debug(P9_DEBUG_9P, "<<< p9stat_read failed: %d\n", ret);
trace_9p_protocol_dump(clnt, &fake_pdu);
+ return ret;
}
- return ret;
+ return fake_pdu.offset;
}
EXPORT_SYMBOL(p9stat_read);
diff --git a/net/appletalk/atalk_proc.c b/net/appletalk/atalk_proc.c
index 8006295f8bd7..dda73991bb54 100644
--- a/net/appletalk/atalk_proc.c
+++ b/net/appletalk/atalk_proc.c
@@ -255,7 +255,7 @@ out_interface:
goto out;
}
-void __exit atalk_proc_exit(void)
+void atalk_proc_exit(void)
{
remove_proc_entry("interface", atalk_proc_dir);
remove_proc_entry("route", atalk_proc_dir);
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 9b6bc5abe946..795fbc6c06aa 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1910,12 +1910,16 @@ static const char atalk_err_snap[] __initconst =
/* Called by proto.c on kernel start up */
static int __init atalk_init(void)
{
- int rc = proto_register(&ddp_proto, 0);
+ int rc;
- if (rc != 0)
+ rc = proto_register(&ddp_proto, 0);
+ if (rc)
goto out;
- (void)sock_register(&atalk_family_ops);
+ rc = sock_register(&atalk_family_ops);
+ if (rc)
+ goto out_proto;
+
ddp_dl = register_snap_client(ddp_snap_id, atalk_rcv);
if (!ddp_dl)
printk(atalk_err_snap);
@@ -1923,12 +1927,33 @@ static int __init atalk_init(void)
dev_add_pack(&ltalk_packet_type);
dev_add_pack(&ppptalk_packet_type);
- register_netdevice_notifier(&ddp_notifier);
+ rc = register_netdevice_notifier(&ddp_notifier);
+ if (rc)
+ goto out_sock;
+
aarp_proto_init();
- atalk_proc_init();
- atalk_register_sysctl();
+ rc = atalk_proc_init();
+ if (rc)
+ goto out_aarp;
+
+ rc = atalk_register_sysctl();
+ if (rc)
+ goto out_proc;
out:
return rc;
+out_proc:
+ atalk_proc_exit();
+out_aarp:
+ aarp_cleanup_module();
+ unregister_netdevice_notifier(&ddp_notifier);
+out_sock:
+ dev_remove_pack(&ppptalk_packet_type);
+ dev_remove_pack(&ltalk_packet_type);
+ unregister_snap_client(ddp_dl);
+ sock_unregister(PF_APPLETALK);
+out_proto:
+ proto_unregister(&ddp_proto);
+ goto out;
}
module_init(atalk_init);
diff --git a/net/appletalk/sysctl_net_atalk.c b/net/appletalk/sysctl_net_atalk.c
index c744a853fa5f..d945b7c0176d 100644
--- a/net/appletalk/sysctl_net_atalk.c
+++ b/net/appletalk/sysctl_net_atalk.c
@@ -45,9 +45,12 @@ static struct ctl_table atalk_table[] = {
static struct ctl_table_header *atalk_table_header;
-void atalk_register_sysctl(void)
+int __init atalk_register_sysctl(void)
{
atalk_table_header = register_net_sysctl(&init_net, "net/appletalk", atalk_table);
+ if (!atalk_table_header)
+ return -ENOMEM;
+ return 0;
}
void atalk_unregister_sysctl(void)
diff --git a/net/atm/lec.c b/net/atm/lec.c
index d7f5cf5b7594..ad4f829193f0 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -710,7 +710,10 @@ static int lec_vcc_attach(struct atm_vcc *vcc, void __user *arg)
static int lec_mcast_attach(struct atm_vcc *vcc, int arg)
{
- if (arg < 0 || arg >= MAX_LEC_ITF || !dev_lec[arg])
+ if (arg < 0 || arg >= MAX_LEC_ITF)
+ return -EINVAL;
+ arg = array_index_nospec(arg, MAX_LEC_ITF);
+ if (!dev_lec[arg])
return -EINVAL;
vcc->proto_data = dev_lec[arg];
return lec_mcast_make(netdev_priv(dev_lec[arg]), vcc);
@@ -728,6 +731,7 @@ static int lecd_attach(struct atm_vcc *vcc, int arg)
i = arg;
if (arg >= MAX_LEC_ITF)
return -EINVAL;
+ i = array_index_nospec(arg, MAX_LEC_ITF);
if (!dev_lec[i]) {
int size;
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index c603d33d5410..5d01edf8d819 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -653,15 +653,22 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname,
break;
}
- dev = dev_get_by_name(&init_net, devname);
+ rtnl_lock();
+ dev = __dev_get_by_name(&init_net, devname);
if (!dev) {
+ rtnl_unlock();
res = -ENODEV;
break;
}
ax25->ax25_dev = ax25_dev_ax25dev(dev);
+ if (!ax25->ax25_dev) {
+ rtnl_unlock();
+ res = -ENODEV;
+ break;
+ }
ax25_fillin_cb(ax25, ax25->ax25_dev);
- dev_put(dev);
+ rtnl_unlock();
break;
default:
diff --git a/net/ax25/ax25_dev.c b/net/ax25/ax25_dev.c
index 9a3a301e1e2f..d92195cd7834 100644
--- a/net/ax25/ax25_dev.c
+++ b/net/ax25/ax25_dev.c
@@ -116,6 +116,7 @@ void ax25_dev_device_down(struct net_device *dev)
if ((s = ax25_dev_list) == ax25_dev) {
ax25_dev_list = s->next;
spin_unlock_bh(&ax25_dev_lock);
+ dev->ax25_ptr = NULL;
dev_put(dev);
kfree(ax25_dev);
return;
@@ -125,6 +126,7 @@ void ax25_dev_device_down(struct net_device *dev)
if (s->next == ax25_dev) {
s->next = ax25_dev->next;
spin_unlock_bh(&ax25_dev_lock);
+ dev->ax25_ptr = NULL;
dev_put(dev);
kfree(ax25_dev);
return;
diff --git a/net/ax25/ax25_ip.c b/net/ax25/ax25_ip.c
index 70417e9b932d..314bbc8010fb 100644
--- a/net/ax25/ax25_ip.c
+++ b/net/ax25/ax25_ip.c
@@ -114,6 +114,7 @@ netdev_tx_t ax25_ip_xmit(struct sk_buff *skb)
dst = (ax25_address *)(bp + 1);
src = (ax25_address *)(bp + 8);
+ ax25_route_lock_use();
route = ax25_get_route(dst, NULL);
if (route) {
digipeat = route->digipeat;
@@ -206,9 +207,8 @@ netdev_tx_t ax25_ip_xmit(struct sk_buff *skb)
ax25_queue_xmit(skb, dev);
put:
- if (route)
- ax25_put_route(route);
+ ax25_route_lock_unuse();
return NETDEV_TX_OK;
}
diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c
index a0eff323af12..66f74c85cf6b 100644
--- a/net/ax25/ax25_route.c
+++ b/net/ax25/ax25_route.c
@@ -40,7 +40,7 @@
#include <linux/export.h>
static ax25_route *ax25_route_list;
-static DEFINE_RWLOCK(ax25_route_lock);
+DEFINE_RWLOCK(ax25_route_lock);
void ax25_rt_device_down(struct net_device *dev)
{
@@ -335,6 +335,7 @@ const struct seq_operations ax25_rt_seqops = {
* Find AX.25 route
*
* Only routes with a reference count of zero can be destroyed.
+ * Must be called with ax25_route_lock read locked.
*/
ax25_route *ax25_get_route(ax25_address *addr, struct net_device *dev)
{
@@ -342,7 +343,6 @@ ax25_route *ax25_get_route(ax25_address *addr, struct net_device *dev)
ax25_route *ax25_def_rt = NULL;
ax25_route *ax25_rt;
- read_lock(&ax25_route_lock);
/*
* Bind to the physical interface we heard them on, or the default
* route if none is found;
@@ -365,11 +365,6 @@ ax25_route *ax25_get_route(ax25_address *addr, struct net_device *dev)
if (ax25_spe_rt != NULL)
ax25_rt = ax25_spe_rt;
- if (ax25_rt != NULL)
- ax25_hold_route(ax25_rt);
-
- read_unlock(&ax25_route_lock);
-
return ax25_rt;
}
@@ -400,9 +395,12 @@ int ax25_rt_autobind(ax25_cb *ax25, ax25_address *addr)
ax25_route *ax25_rt;
int err = 0;
- if ((ax25_rt = ax25_get_route(addr, NULL)) == NULL)
+ ax25_route_lock_use();
+ ax25_rt = ax25_get_route(addr, NULL);
+ if (!ax25_rt) {
+ ax25_route_lock_unuse();
return -EHOSTUNREACH;
-
+ }
if ((ax25->ax25_dev = ax25_dev_ax25dev(ax25_rt->dev)) == NULL) {
err = -EHOSTUNREACH;
goto put;
@@ -437,8 +435,7 @@ int ax25_rt_autobind(ax25_cb *ax25, ax25_address *addr)
}
put:
- ax25_put_route(ax25_rt);
-
+ ax25_route_lock_unuse();
return err;
}
diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c
index e8090f099eb8..5da183b2f4c9 100644
--- a/net/batman-adv/bat_v_elp.c
+++ b/net/batman-adv/bat_v_elp.c
@@ -104,6 +104,11 @@ static u32 batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh)
ret = cfg80211_get_station(real_netdev, neigh->addr, &sinfo);
+ if (!ret) {
+ /* free the TID stats immediately */
+ cfg80211_sinfo_release_content(&sinfo);
+ }
+
dev_put(real_netdev);
if (ret == -ENOENT) {
/* Node is not associated anymore! It would be
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index 5f1aeeded0e3..85faf25c2912 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -803,6 +803,8 @@ static void batadv_bla_del_claim(struct batadv_priv *bat_priv,
const u8 *mac, const unsigned short vid)
{
struct batadv_bla_claim search_claim, *claim;
+ struct batadv_bla_claim *claim_removed_entry;
+ struct hlist_node *claim_removed_node;
ether_addr_copy(search_claim.addr, mac);
search_claim.vid = vid;
@@ -813,10 +815,18 @@ static void batadv_bla_del_claim(struct batadv_priv *bat_priv,
batadv_dbg(BATADV_DBG_BLA, bat_priv, "%s(): %pM, vid %d\n", __func__,
mac, batadv_print_vid(vid));
- batadv_hash_remove(bat_priv->bla.claim_hash, batadv_compare_claim,
- batadv_choose_claim, claim);
- batadv_claim_put(claim); /* reference from the hash is gone */
+ claim_removed_node = batadv_hash_remove(bat_priv->bla.claim_hash,
+ batadv_compare_claim,
+ batadv_choose_claim, claim);
+ if (!claim_removed_node)
+ goto free_claim;
+ /* reference from the hash is gone */
+ claim_removed_entry = hlist_entry(claim_removed_node,
+ struct batadv_bla_claim, hash_entry);
+ batadv_claim_put(claim_removed_entry);
+
+free_claim:
/* don't need the reference from hash_find() anymore */
batadv_claim_put(claim);
}
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index a60bacf7120b..2895e3b26e93 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -1394,7 +1394,6 @@ bool batadv_dat_snoop_incoming_arp_reply(struct batadv_priv *bat_priv,
hw_src, &ip_src, hw_dst, &ip_dst,
dat_entry->mac_addr, &dat_entry->ip);
dropped = true;
- goto out;
}
/* Update our internal cache with both the IP addresses the node got
@@ -1403,6 +1402,9 @@ bool batadv_dat_snoop_incoming_arp_reply(struct batadv_priv *bat_priv,
batadv_dat_entry_add(bat_priv, ip_src, hw_src, vid);
batadv_dat_entry_add(bat_priv, ip_dst, hw_dst, vid);
+ if (dropped)
+ goto out;
+
/* If BLA is enabled, only forward ARP replies if we have claimed the
* source of the ARP reply or if no one else of the same backbone has
* already claimed that client. This prevents that different gateways
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index 2f0d42f2f913..08690d06b7be 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -20,7 +20,6 @@
#include "main.h"
#include <linux/atomic.h>
-#include <linux/bug.h>
#include <linux/byteorder/generic.h>
#include <linux/errno.h>
#include <linux/gfp.h>
@@ -179,8 +178,10 @@ static bool batadv_is_on_batman_iface(const struct net_device *net_dev)
parent_dev = __dev_get_by_index((struct net *)parent_net,
dev_get_iflink(net_dev));
/* if we got a NULL parent_dev there is something broken.. */
- if (WARN(!parent_dev, "Cannot find parent device"))
+ if (!parent_dev) {
+ pr_err("Cannot find parent device\n");
return false;
+ }
if (batadv_mutual_parents(net_dev, net, parent_dev, parent_net))
return false;
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index 69c0d85bceb3..79b8a2d8793e 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -160,6 +160,7 @@ int batadv_mesh_init(struct net_device *soft_iface)
spin_lock_init(&bat_priv->tt.commit_lock);
spin_lock_init(&bat_priv->gw.list_lock);
#ifdef CONFIG_BATMAN_ADV_MCAST
+ spin_lock_init(&bat_priv->mcast.mla_lock);
spin_lock_init(&bat_priv->mcast.want_lists_lock);
#endif
spin_lock_init(&bat_priv->tvlv.container_list_lock);
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index 86725d792e15..b90fe25d6b0b 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -325,8 +325,6 @@ static void batadv_mcast_mla_list_free(struct hlist_head *mcast_list)
* translation table except the ones listed in the given mcast_list.
*
* If mcast_list is NULL then all are retracted.
- *
- * Do not call outside of the mcast worker! (or cancel mcast worker first)
*/
static void batadv_mcast_mla_tt_retract(struct batadv_priv *bat_priv,
struct hlist_head *mcast_list)
@@ -334,8 +332,6 @@ static void batadv_mcast_mla_tt_retract(struct batadv_priv *bat_priv,
struct batadv_hw_addr *mcast_entry;
struct hlist_node *tmp;
- WARN_ON(delayed_work_pending(&bat_priv->mcast.work));
-
hlist_for_each_entry_safe(mcast_entry, tmp, &bat_priv->mcast.mla_list,
list) {
if (mcast_list &&
@@ -359,8 +355,6 @@ static void batadv_mcast_mla_tt_retract(struct batadv_priv *bat_priv,
*
* Adds multicast listener announcements from the given mcast_list to the
* translation table if they have not been added yet.
- *
- * Do not call outside of the mcast worker! (or cancel mcast worker first)
*/
static void batadv_mcast_mla_tt_add(struct batadv_priv *bat_priv,
struct hlist_head *mcast_list)
@@ -368,8 +362,6 @@ static void batadv_mcast_mla_tt_add(struct batadv_priv *bat_priv,
struct batadv_hw_addr *mcast_entry;
struct hlist_node *tmp;
- WARN_ON(delayed_work_pending(&bat_priv->mcast.work));
-
if (!mcast_list)
return;
@@ -658,7 +650,10 @@ static void batadv_mcast_mla_update(struct work_struct *work)
priv_mcast = container_of(delayed_work, struct batadv_priv_mcast, work);
bat_priv = container_of(priv_mcast, struct batadv_priv, mcast);
+ spin_lock(&bat_priv->mcast.mla_lock);
__batadv_mcast_mla_update(bat_priv);
+ spin_unlock(&bat_priv->mcast.mla_lock);
+
batadv_mcast_start_timer(bat_priv);
}
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 626ddca332db..a2976adeeedc 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -221,10 +221,14 @@ static netdev_tx_t batadv_interface_tx(struct sk_buff *skb,
netif_trans_update(soft_iface);
vid = batadv_get_vid(skb, 0);
+
+ skb_reset_mac_header(skb);
ethhdr = eth_hdr(skb);
switch (ntohs(ethhdr->h_proto)) {
case ETH_P_8021Q:
+ if (!pskb_may_pull(skb, sizeof(*vhdr)))
+ goto dropped;
vhdr = vlan_eth_hdr(skb);
/* drop batman-in-batman packets to prevent loops */
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index d21624c44665..359ec1a6e822 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -616,14 +616,26 @@ static void batadv_tt_global_free(struct batadv_priv *bat_priv,
struct batadv_tt_global_entry *tt_global,
const char *message)
{
+ struct batadv_tt_global_entry *tt_removed_entry;
+ struct hlist_node *tt_removed_node;
+
batadv_dbg(BATADV_DBG_TT, bat_priv,
"Deleting global tt entry %pM (vid: %d): %s\n",
tt_global->common.addr,
batadv_print_vid(tt_global->common.vid), message);
- batadv_hash_remove(bat_priv->tt.global_hash, batadv_compare_tt,
- batadv_choose_tt, &tt_global->common);
- batadv_tt_global_entry_put(tt_global);
+ tt_removed_node = batadv_hash_remove(bat_priv->tt.global_hash,
+ batadv_compare_tt,
+ batadv_choose_tt,
+ &tt_global->common);
+ if (!tt_removed_node)
+ return;
+
+ /* drop reference of remove hash entry */
+ tt_removed_entry = hlist_entry(tt_removed_node,
+ struct batadv_tt_global_entry,
+ common.hash_entry);
+ batadv_tt_global_entry_put(tt_removed_entry);
}
/**
@@ -1332,9 +1344,10 @@ u16 batadv_tt_local_remove(struct batadv_priv *bat_priv, const u8 *addr,
unsigned short vid, const char *message,
bool roaming)
{
+ struct batadv_tt_local_entry *tt_removed_entry;
struct batadv_tt_local_entry *tt_local_entry;
u16 flags, curr_flags = BATADV_NO_FLAGS;
- void *tt_entry_exists;
+ struct hlist_node *tt_removed_node;
tt_local_entry = batadv_tt_local_hash_find(bat_priv, addr, vid);
if (!tt_local_entry)
@@ -1363,15 +1376,18 @@ u16 batadv_tt_local_remove(struct batadv_priv *bat_priv, const u8 *addr,
*/
batadv_tt_local_event(bat_priv, tt_local_entry, BATADV_TT_CLIENT_DEL);
- tt_entry_exists = batadv_hash_remove(bat_priv->tt.local_hash,
+ tt_removed_node = batadv_hash_remove(bat_priv->tt.local_hash,
batadv_compare_tt,
batadv_choose_tt,
&tt_local_entry->common);
- if (!tt_entry_exists)
+ if (!tt_removed_node)
goto out;
- /* extra call to free the local tt entry */
- batadv_tt_local_entry_put(tt_local_entry);
+ /* drop reference of remove hash entry */
+ tt_removed_entry = hlist_entry(tt_removed_node,
+ struct batadv_tt_local_entry,
+ common.hash_entry);
+ batadv_tt_local_entry_put(tt_removed_entry);
out:
if (tt_local_entry)
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 343d304851a5..eeee3e61c625 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -1216,6 +1216,11 @@ struct batadv_priv_mcast {
unsigned char bridged:1;
/**
+ * @mla_lock: a lock protecting mla_list and mla_flags
+ */
+ spinlock_t mla_lock;
+
+ /**
* @num_want_all_unsnoopables: number of nodes wanting unsnoopable IP
* traffic
*/
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index deacc52d7ff1..8d12198eaa94 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -154,15 +154,25 @@ void bt_sock_unlink(struct bt_sock_list *l, struct sock *sk)
}
EXPORT_SYMBOL(bt_sock_unlink);
-void bt_accept_enqueue(struct sock *parent, struct sock *sk)
+void bt_accept_enqueue(struct sock *parent, struct sock *sk, bool bh)
{
BT_DBG("parent %p, sk %p", parent, sk);
sock_hold(sk);
- lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
+
+ if (bh)
+ bh_lock_sock_nested(sk);
+ else
+ lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
+
list_add_tail(&bt_sk(sk)->accept_q, &bt_sk(parent)->accept_q);
bt_sk(sk)->parent = parent;
- release_sock(sk);
+
+ if (bh)
+ bh_unlock_sock(sk);
+ else
+ release_sock(sk);
+
parent->sk_ack_backlog++;
}
EXPORT_SYMBOL(bt_accept_enqueue);
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index bd4978ce8c45..3cf0764d5793 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -1276,6 +1276,14 @@ int hci_conn_check_link_mode(struct hci_conn *conn)
!test_bit(HCI_CONN_ENCRYPT, &conn->flags))
return 0;
+ /* The minimum encryption key size needs to be enforced by the
+ * host stack before establishing any L2CAP connections. The
+ * specification in theory allows a minimum of 1, but to align
+ * BR/EDR and LE transports, a minimum of 7 is chosen.
+ */
+ if (conn->enc_key_size < HCI_MIN_ENC_KEY_SIZE)
+ return 0;
+
return 1;
}
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 74b29c7d841c..5afd67ef797a 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -3084,6 +3084,8 @@ struct hci_dev *hci_alloc_dev(void)
hdev->le_max_tx_time = 0x0148;
hdev->le_max_rx_len = 0x001b;
hdev->le_max_rx_time = 0x0148;
+ hdev->le_max_key_size = SMP_MAX_ENC_KEY_SIZE;
+ hdev->le_min_key_size = SMP_MIN_ENC_KEY_SIZE;
hdev->le_tx_def_phys = HCI_LE_SET_PHY_1M;
hdev->le_rx_def_phys = HCI_LE_SET_PHY_1M;
@@ -4272,6 +4274,9 @@ void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status,
return;
}
+ /* If we reach this point this event matches the last command sent */
+ hci_dev_clear_flag(hdev, HCI_CMD_PENDING);
+
/* If the command succeeded and there's still more commands in
* this request the request is not yet complete.
*/
@@ -4382,6 +4387,8 @@ static void hci_cmd_work(struct work_struct *work)
hdev->sent_cmd = skb_clone(skb, GFP_KERNEL);
if (hdev->sent_cmd) {
+ if (hci_req_status_pend(hdev))
+ hci_dev_set_flag(hdev, HCI_CMD_PENDING);
atomic_dec(&hdev->cmd_cnt);
hci_send_frame(hdev, skb);
if (test_bit(HCI_RESET, &hdev->flags))
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index f12555f23a49..3e7badb3ac2d 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -3357,6 +3357,12 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb,
hci_req_cmd_complete(hdev, *opcode, *status, req_complete,
req_complete_skb);
+ if (hci_dev_test_flag(hdev, HCI_CMD_PENDING)) {
+ bt_dev_err(hdev,
+ "unexpected event for opcode 0x%4.4x", *opcode);
+ return;
+ }
+
if (atomic_read(&hdev->cmd_cnt) && !skb_queue_empty(&hdev->cmd_q))
queue_work(hdev->workqueue, &hdev->cmd_work);
}
@@ -3464,6 +3470,12 @@ static void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb,
hci_req_cmd_complete(hdev, *opcode, ev->status, req_complete,
req_complete_skb);
+ if (hci_dev_test_flag(hdev, HCI_CMD_PENDING)) {
+ bt_dev_err(hdev,
+ "unexpected event for opcode 0x%4.4x", *opcode);
+ return;
+ }
+
if (atomic_read(&hdev->cmd_cnt) && !skb_queue_empty(&hdev->cmd_q))
queue_work(hdev->workqueue, &hdev->cmd_work);
}
@@ -5668,6 +5680,12 @@ static bool hci_get_cmd_complete(struct hci_dev *hdev, u16 opcode,
return true;
}
+ /* Check if request ended in Command Status - no way to retreive
+ * any extra parameters in this case.
+ */
+ if (hdr->evt == HCI_EV_CMD_STATUS)
+ return false;
+
if (hdr->evt != HCI_EV_CMD_COMPLETE) {
bt_dev_err(hdev, "last event is not cmd complete (0x%2.2x)",
hdr->evt);
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index e8c9ef1e1922..9448ebd3780a 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -46,6 +46,11 @@ void hci_req_purge(struct hci_request *req)
skb_queue_purge(&req->cmd_q);
}
+bool hci_req_status_pend(struct hci_dev *hdev)
+{
+ return hdev->req_status == HCI_REQ_PEND;
+}
+
static int req_run(struct hci_request *req, hci_req_complete_t complete,
hci_req_complete_skb_t complete_skb)
{
diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h
index 692cc8b13368..55b2050cc9ff 100644
--- a/net/bluetooth/hci_request.h
+++ b/net/bluetooth/hci_request.h
@@ -37,6 +37,7 @@ struct hci_request {
void hci_req_init(struct hci_request *req, struct hci_dev *hdev);
void hci_req_purge(struct hci_request *req);
+bool hci_req_status_pend(struct hci_dev *hdev);
int hci_req_run(struct hci_request *req, hci_req_complete_t complete);
int hci_req_run_skb(struct hci_request *req, hci_req_complete_skb_t complete);
void hci_req_add(struct hci_request *req, u16 opcode, u32 plen,
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 1506e1632394..d4e2a166ae17 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -831,8 +831,6 @@ static int hci_sock_release(struct socket *sock)
if (!sk)
return 0;
- hdev = hci_pi(sk)->hdev;
-
switch (hci_pi(sk)->channel) {
case HCI_CHANNEL_MONITOR:
atomic_dec(&monitor_promisc);
@@ -854,6 +852,7 @@ static int hci_sock_release(struct socket *sock)
bt_sock_unlink(&hci_sk_list, sk);
+ hdev = hci_pi(sk)->hdev;
if (hdev) {
if (hci_pi(sk)->channel == HCI_CHANNEL_USER) {
/* When releasing a user channel exclusive access,
diff --git a/net/bluetooth/hidp/sock.c b/net/bluetooth/hidp/sock.c
index 1eaac01f85de..7f36fa73ffee 100644
--- a/net/bluetooth/hidp/sock.c
+++ b/net/bluetooth/hidp/sock.c
@@ -76,6 +76,7 @@ static int hidp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long
sockfd_put(csock);
return err;
}
+ ca.name[sizeof(ca.name)-1] = 0;
err = hidp_connection_add(&ca, csock, isock);
if (!err && copy_to_user(argp, &ca, sizeof(ca)))
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index d17a4736e47c..2c6eabf294b3 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -3336,16 +3336,22 @@ static int l2cap_parse_conf_req(struct l2cap_chan *chan, void *data, size_t data
while (len >= L2CAP_CONF_OPT_SIZE) {
len -= l2cap_get_conf_opt(&req, &type, &olen, &val);
+ if (len < 0)
+ break;
hint = type & L2CAP_CONF_HINT;
type &= L2CAP_CONF_MASK;
switch (type) {
case L2CAP_CONF_MTU:
+ if (olen != 2)
+ break;
mtu = val;
break;
case L2CAP_CONF_FLUSH_TO:
+ if (olen != 2)
+ break;
chan->flush_to = val;
break;
@@ -3353,26 +3359,30 @@ static int l2cap_parse_conf_req(struct l2cap_chan *chan, void *data, size_t data
break;
case L2CAP_CONF_RFC:
- if (olen == sizeof(rfc))
- memcpy(&rfc, (void *) val, olen);
+ if (olen != sizeof(rfc))
+ break;
+ memcpy(&rfc, (void *) val, olen);
break;
case L2CAP_CONF_FCS:
+ if (olen != 1)
+ break;
if (val == L2CAP_FCS_NONE)
set_bit(CONF_RECV_NO_FCS, &chan->conf_state);
break;
case L2CAP_CONF_EFS:
- if (olen == sizeof(efs)) {
- remote_efs = 1;
- memcpy(&efs, (void *) val, olen);
- }
+ if (olen != sizeof(efs))
+ break;
+ remote_efs = 1;
+ memcpy(&efs, (void *) val, olen);
break;
case L2CAP_CONF_EWS:
+ if (olen != 2)
+ break;
if (!(chan->conn->local_fixed_chan & L2CAP_FC_A2MP))
return -ECONNREFUSED;
-
set_bit(FLAG_EXT_CTRL, &chan->flags);
set_bit(CONF_EWS_RECV, &chan->conf_state);
chan->tx_win_max = L2CAP_DEFAULT_EXT_WINDOW;
@@ -3382,7 +3392,6 @@ static int l2cap_parse_conf_req(struct l2cap_chan *chan, void *data, size_t data
default:
if (hint)
break;
-
result = L2CAP_CONF_UNKNOWN;
*((u8 *) ptr++) = type;
break;
@@ -3547,58 +3556,65 @@ static int l2cap_parse_conf_rsp(struct l2cap_chan *chan, void *rsp, int len,
while (len >= L2CAP_CONF_OPT_SIZE) {
len -= l2cap_get_conf_opt(&rsp, &type, &olen, &val);
+ if (len < 0)
+ break;
switch (type) {
case L2CAP_CONF_MTU:
+ if (olen != 2)
+ break;
if (val < L2CAP_DEFAULT_MIN_MTU) {
*result = L2CAP_CONF_UNACCEPT;
chan->imtu = L2CAP_DEFAULT_MIN_MTU;
} else
chan->imtu = val;
- l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, chan->imtu, endptr - ptr);
+ l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, chan->imtu,
+ endptr - ptr);
break;
case L2CAP_CONF_FLUSH_TO:
+ if (olen != 2)
+ break;
chan->flush_to = val;
- l2cap_add_conf_opt(&ptr, L2CAP_CONF_FLUSH_TO,
- 2, chan->flush_to, endptr - ptr);
+ l2cap_add_conf_opt(&ptr, L2CAP_CONF_FLUSH_TO, 2,
+ chan->flush_to, endptr - ptr);
break;
case L2CAP_CONF_RFC:
- if (olen == sizeof(rfc))
- memcpy(&rfc, (void *)val, olen);
-
+ if (olen != sizeof(rfc))
+ break;
+ memcpy(&rfc, (void *)val, olen);
if (test_bit(CONF_STATE2_DEVICE, &chan->conf_state) &&
rfc.mode != chan->mode)
return -ECONNREFUSED;
-
chan->fcs = 0;
-
- l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC,
- sizeof(rfc), (unsigned long) &rfc, endptr - ptr);
+ l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, sizeof(rfc),
+ (unsigned long) &rfc, endptr - ptr);
break;
case L2CAP_CONF_EWS:
+ if (olen != 2)
+ break;
chan->ack_win = min_t(u16, val, chan->ack_win);
l2cap_add_conf_opt(&ptr, L2CAP_CONF_EWS, 2,
chan->tx_win, endptr - ptr);
break;
case L2CAP_CONF_EFS:
- if (olen == sizeof(efs)) {
- memcpy(&efs, (void *)val, olen);
-
- if (chan->local_stype != L2CAP_SERV_NOTRAFIC &&
- efs.stype != L2CAP_SERV_NOTRAFIC &&
- efs.stype != chan->local_stype)
- return -ECONNREFUSED;
-
- l2cap_add_conf_opt(&ptr, L2CAP_CONF_EFS, sizeof(efs),
- (unsigned long) &efs, endptr - ptr);
- }
+ if (olen != sizeof(efs))
+ break;
+ memcpy(&efs, (void *)val, olen);
+ if (chan->local_stype != L2CAP_SERV_NOTRAFIC &&
+ efs.stype != L2CAP_SERV_NOTRAFIC &&
+ efs.stype != chan->local_stype)
+ return -ECONNREFUSED;
+ l2cap_add_conf_opt(&ptr, L2CAP_CONF_EFS, sizeof(efs),
+ (unsigned long) &efs, endptr - ptr);
break;
case L2CAP_CONF_FCS:
+ if (olen != 1)
+ break;
if (*result == L2CAP_CONF_PENDING)
if (val == L2CAP_FCS_NONE)
set_bit(CONF_RECV_NO_FCS,
@@ -3727,13 +3743,18 @@ static void l2cap_conf_rfc_get(struct l2cap_chan *chan, void *rsp, int len)
while (len >= L2CAP_CONF_OPT_SIZE) {
len -= l2cap_get_conf_opt(&rsp, &type, &olen, &val);
+ if (len < 0)
+ break;
switch (type) {
case L2CAP_CONF_RFC:
- if (olen == sizeof(rfc))
- memcpy(&rfc, (void *)val, olen);
+ if (olen != sizeof(rfc))
+ break;
+ memcpy(&rfc, (void *)val, olen);
break;
case L2CAP_CONF_EWS:
+ if (olen != 2)
+ break;
txwin_ext = val;
break;
}
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index 686bdc6b35b0..a3a2cd55e23a 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -1252,7 +1252,7 @@ static struct l2cap_chan *l2cap_sock_new_connection_cb(struct l2cap_chan *chan)
l2cap_sock_init(sk, parent);
- bt_accept_enqueue(parent, sk);
+ bt_accept_enqueue(parent, sk, false);
release_sock(parent);
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index d606e9212291..c044ff2f73e6 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -988,7 +988,7 @@ int rfcomm_connect_ind(struct rfcomm_session *s, u8 channel, struct rfcomm_dlc *
rfcomm_pi(sk)->channel = channel;
sk->sk_state = BT_CONFIG;
- bt_accept_enqueue(parent, sk);
+ bt_accept_enqueue(parent, sk, true);
/* Accept connection and return socket DLC */
*d = rfcomm_pi(sk)->dlc;
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 8f0f9279eac9..a4ca55df7390 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -193,7 +193,7 @@ static void __sco_chan_add(struct sco_conn *conn, struct sock *sk,
conn->sk = sk;
if (parent)
- bt_accept_enqueue(parent, sk);
+ bt_accept_enqueue(parent, sk, true);
}
static int sco_chan_add(struct sco_conn *conn, struct sock *sk,
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index 73f7211d0431..a1c1b7e8a45c 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -88,9 +88,6 @@ struct smp_dev {
u8 local_rand[16];
bool debug_key;
- u8 min_key_size;
- u8 max_key_size;
-
struct crypto_cipher *tfm_aes;
struct crypto_shash *tfm_cmac;
struct crypto_kpp *tfm_ecdh;
@@ -720,7 +717,7 @@ static void build_pairing_cmd(struct l2cap_conn *conn,
if (rsp == NULL) {
req->io_capability = conn->hcon->io_capability;
req->oob_flag = oob_flag;
- req->max_key_size = SMP_DEV(hdev)->max_key_size;
+ req->max_key_size = hdev->le_max_key_size;
req->init_key_dist = local_dist;
req->resp_key_dist = remote_dist;
req->auth_req = (authreq & AUTH_REQ_MASK(hdev));
@@ -731,7 +728,7 @@ static void build_pairing_cmd(struct l2cap_conn *conn,
rsp->io_capability = conn->hcon->io_capability;
rsp->oob_flag = oob_flag;
- rsp->max_key_size = SMP_DEV(hdev)->max_key_size;
+ rsp->max_key_size = hdev->le_max_key_size;
rsp->init_key_dist = req->init_key_dist & remote_dist;
rsp->resp_key_dist = req->resp_key_dist & local_dist;
rsp->auth_req = (authreq & AUTH_REQ_MASK(hdev));
@@ -745,7 +742,7 @@ static u8 check_enc_key_size(struct l2cap_conn *conn, __u8 max_key_size)
struct hci_dev *hdev = conn->hcon->hdev;
struct smp_chan *smp = chan->data;
- if (max_key_size > SMP_DEV(hdev)->max_key_size ||
+ if (max_key_size > hdev->le_max_key_size ||
max_key_size < SMP_MIN_ENC_KEY_SIZE)
return SMP_ENC_KEY_SIZE;
@@ -3264,8 +3261,6 @@ static struct l2cap_chan *smp_add_cid(struct hci_dev *hdev, u16 cid)
smp->tfm_aes = tfm_aes;
smp->tfm_cmac = tfm_cmac;
smp->tfm_ecdh = tfm_ecdh;
- smp->min_key_size = SMP_MIN_ENC_KEY_SIZE;
- smp->max_key_size = SMP_MAX_ENC_KEY_SIZE;
create_chan:
chan = l2cap_chan_create();
@@ -3391,7 +3386,7 @@ static ssize_t le_min_key_size_read(struct file *file,
struct hci_dev *hdev = file->private_data;
char buf[4];
- snprintf(buf, sizeof(buf), "%2u\n", SMP_DEV(hdev)->min_key_size);
+ snprintf(buf, sizeof(buf), "%2u\n", hdev->le_min_key_size);
return simple_read_from_buffer(user_buf, count, ppos, buf, strlen(buf));
}
@@ -3412,11 +3407,11 @@ static ssize_t le_min_key_size_write(struct file *file,
sscanf(buf, "%hhu", &key_size);
- if (key_size > SMP_DEV(hdev)->max_key_size ||
+ if (key_size > hdev->le_max_key_size ||
key_size < SMP_MIN_ENC_KEY_SIZE)
return -EINVAL;
- SMP_DEV(hdev)->min_key_size = key_size;
+ hdev->le_min_key_size = key_size;
return count;
}
@@ -3435,7 +3430,7 @@ static ssize_t le_max_key_size_read(struct file *file,
struct hci_dev *hdev = file->private_data;
char buf[4];
- snprintf(buf, sizeof(buf), "%2u\n", SMP_DEV(hdev)->max_key_size);
+ snprintf(buf, sizeof(buf), "%2u\n", hdev->le_max_key_size);
return simple_read_from_buffer(user_buf, count, ppos, buf, strlen(buf));
}
@@ -3457,10 +3452,10 @@ static ssize_t le_max_key_size_write(struct file *file,
sscanf(buf, "%hhu", &key_size);
if (key_size > SMP_MAX_ENC_KEY_SIZE ||
- key_size < SMP_DEV(hdev)->min_key_size)
+ key_size < hdev->le_min_key_size)
return -EINVAL;
- SMP_DEV(hdev)->max_key_size = key_size;
+ hdev->le_max_key_size = key_size;
return count;
}
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 502f66349530..4d4b9b5ea1c1 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -1088,6 +1088,8 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
err = -ENOMEM;
goto err_unlock;
}
+ if (swdev_notify)
+ fdb->added_by_user = 1;
fdb->added_by_external_learn = 1;
fdb_notify(br, fdb, RTM_NEWNEIGH, swdev_notify);
} else {
@@ -1107,6 +1109,9 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
modified = true;
}
+ if (swdev_notify)
+ fdb->added_by_user = 1;
+
if (modified)
fdb_notify(br, fdb, RTM_NEWNEIGH, swdev_notify);
}
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 5372e2042adf..48ddc60b4fbd 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -36,10 +36,10 @@ static inline int should_deliver(const struct net_bridge_port *p,
int br_dev_queue_push_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
{
+ skb_push(skb, ETH_HLEN);
if (!is_skb_forwardable(skb->dev, skb))
goto drop;
- skb_push(skb, ETH_HLEN);
br_drop_fake_rtable(skb);
if (skb->ip_summed == CHECKSUM_PARTIAL &&
@@ -65,6 +65,7 @@ EXPORT_SYMBOL_GPL(br_dev_queue_push_xmit);
int br_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
+ skb->tstamp = 0;
return NF_HOOK(NFPROTO_BRIDGE, NF_BR_POST_ROUTING,
net, sk, skb, NULL, skb->dev,
br_dev_queue_push_xmit);
@@ -97,12 +98,11 @@ static void __br_forward(const struct net_bridge_port *to,
net = dev_net(indev);
} else {
if (unlikely(netpoll_tx_running(to->br->dev))) {
- if (!is_skb_forwardable(skb->dev, skb)) {
+ skb_push(skb, ETH_HLEN);
+ if (!is_skb_forwardable(skb->dev, skb))
kfree_skb(skb);
- } else {
- skb_push(skb, ETH_HLEN);
+ else
br_netpoll_send_skb(to, skb);
- }
return;
}
br_hook = NF_BR_LOCAL_OUT;
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 0363f1bdc401..ed2b6002ae53 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -603,13 +603,15 @@ int br_add_if(struct net_bridge *br, struct net_device *dev,
call_netdevice_notifiers(NETDEV_JOIN, dev);
err = dev_set_allmulti(dev, 1);
- if (err)
- goto put_back;
+ if (err) {
+ kfree(p); /* kobject not yet init'd, manually free */
+ goto err1;
+ }
err = kobject_init_and_add(&p->kobj, &brport_ktype, &(dev->dev.kobj),
SYSFS_BRIDGE_PORT_ATTR);
if (err)
- goto err1;
+ goto err2;
err = br_sysfs_addif(p);
if (err)
@@ -692,12 +694,9 @@ err3:
sysfs_remove_link(br->ifobj, p->dev->name);
err2:
kobject_put(&p->kobj);
- p = NULL; /* kobject_put frees */
-err1:
dev_set_allmulti(dev, -1);
-put_back:
+err1:
dev_put(dev);
- kfree(p);
return err;
}
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 72074276c088..fed0ff446abb 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -195,13 +195,10 @@ static void __br_handle_local_finish(struct sk_buff *skb)
/* note: already called with rcu_read_lock */
static int br_handle_local_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
- struct net_bridge_port *p = br_port_get_rcu(skb->dev);
-
__br_handle_local_finish(skb);
- BR_INPUT_SKB_CB(skb)->brdev = p->br->dev;
- br_pass_frame_up(skb);
- return 0;
+ /* return 1 to signal the okfn() was called so it's ok to use the skb */
+ return 1;
}
/*
@@ -278,10 +275,18 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb)
goto forward;
}
- /* Deliver packet to local host only */
- NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, dev_net(skb->dev),
- NULL, skb, skb->dev, NULL, br_handle_local_finish);
- return RX_HANDLER_CONSUMED;
+ /* The else clause should be hit when nf_hook():
+ * - returns < 0 (drop/error)
+ * - returns = 0 (stolen/nf_queue)
+ * Thus return 1 from the okfn() to signal the skb is ok to pass
+ */
+ if (NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN,
+ dev_net(skb->dev), NULL, skb, skb->dev, NULL,
+ br_handle_local_finish) == 1) {
+ return RX_HANDLER_PASS;
+ } else {
+ return RX_HANDLER_CONSUMED;
+ }
}
forward:
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 6dec8e9b3451..75901c4641b1 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1420,14 +1420,7 @@ static void br_multicast_query_received(struct net_bridge *br,
return;
br_multicast_update_query_timer(br, query, max_delay);
-
- /* Based on RFC4541, section 2.1.1 IGMP Forwarding Rules,
- * the arrival port for IGMP Queries where the source address
- * is 0.0.0.0 should not be added to router port list.
- */
- if ((saddr->proto == htons(ETH_P_IP) && saddr->u.ip4) ||
- saddr->proto == htons(ETH_P_IPV6))
- br_multicast_mark_router(br, port);
+ br_multicast_mark_router(br, port);
}
static void br_ip4_multicast_query(struct net_bridge *br,
@@ -2159,7 +2152,8 @@ static void br_multicast_start_querier(struct net_bridge *br,
__br_multicast_open(br, query);
- list_for_each_entry(port, &br->port_list, list) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(port, &br->port_list, list) {
if (port->state == BR_STATE_DISABLED ||
port->state == BR_STATE_BLOCKING)
continue;
@@ -2171,6 +2165,7 @@ static void br_multicast_start_querier(struct net_bridge *br,
br_multicast_enable(&port->ip6_own_query);
#endif
}
+ rcu_read_unlock();
}
int br_multicast_toggle(struct net_bridge *br, unsigned long val)
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 37278dc280eb..212c184c1eee 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -278,7 +278,7 @@ int br_nf_pre_routing_finish_bridge(struct net *net, struct sock *sk, struct sk_
struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
int ret;
- if (neigh->hh.hh_len) {
+ if ((neigh->nud_state & NUD_CONNECTED) && neigh->hh.hh_len) {
neigh_hh_bridge(&neigh->hh, skb);
skb->dev = nf_bridge->physindev;
ret = br_handle_frame_finish(net, sk, skb);
@@ -515,6 +515,7 @@ static unsigned int br_nf_pre_routing(void *priv,
nf_bridge->ipv4_daddr = ip_hdr(skb)->daddr;
skb->protocol = htons(ETH_P_IP);
+ skb->transport_header = skb->network_header + ip_hdr(skb)->ihl * 4;
NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, state->net, state->sk, skb,
skb->dev, NULL,
@@ -884,11 +885,6 @@ static const struct nf_br_ops br_ops = {
.br_dev_xmit_hook = br_nf_dev_xmit,
};
-void br_netfilter_enable(void)
-{
-}
-EXPORT_SYMBOL_GPL(br_netfilter_enable);
-
/* For br_nf_post_routing, we need (prio = NF_BR_PRI_LAST), because
* br_dev_queue_push_xmit is called afterwards */
static const struct nf_hook_ops br_nf_ops[] = {
diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c
index 96c072e71ea2..09d5e0c7b3ba 100644
--- a/net/bridge/br_netfilter_ipv6.c
+++ b/net/bridge/br_netfilter_ipv6.c
@@ -131,6 +131,7 @@ int br_validate_ipv6(struct net *net, struct sk_buff *skb)
IPSTATS_MIB_INDISCARDS);
goto drop;
}
+ hdr = ipv6_hdr(skb);
}
if (hdr->nexthdr == NEXTHDR_HOP && br_nf_check_hbh_len(skb))
goto drop;
@@ -234,6 +235,8 @@ unsigned int br_nf_pre_routing_ipv6(void *priv,
nf_bridge->ipv6_daddr = ipv6_hdr(skb)->daddr;
skb->protocol = htons(ETH_P_IPV6);
+ skb->transport_header = skb->network_header + sizeof(struct ipv6hdr);
+
NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, state->net, state->sk, skb,
skb->dev, NULL,
br_nf_pre_routing_finish_ipv6);
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 491828713e0b..0bb4d712b80c 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -31,10 +31,6 @@
/* needed for logical [in,out]-dev filtering */
#include "../br_private.h"
-#define BUGPRINT(format, args...) printk("kernel msg: ebtables bug: please "\
- "report to author: "format, ## args)
-/* #define BUGPRINT(format, args...) */
-
/* Each cpu has its own set of counters, so there is no need for write_lock in
* the softirq
* For reading or updating the counters, the user context needs to
@@ -466,8 +462,6 @@ static int ebt_verify_pointers(const struct ebt_replace *repl,
/* we make userspace set this right,
* so there is no misunderstanding
*/
- BUGPRINT("EBT_ENTRY_OR_ENTRIES shouldn't be set "
- "in distinguisher\n");
return -EINVAL;
}
if (i != NF_BR_NUMHOOKS)
@@ -485,18 +479,14 @@ static int ebt_verify_pointers(const struct ebt_replace *repl,
offset += e->next_offset;
}
}
- if (offset != limit) {
- BUGPRINT("entries_size too small\n");
+ if (offset != limit)
return -EINVAL;
- }
/* check if all valid hooks have a chain */
for (i = 0; i < NF_BR_NUMHOOKS; i++) {
if (!newinfo->hook_entry[i] &&
- (valid_hooks & (1 << i))) {
- BUGPRINT("Valid hook without chain\n");
+ (valid_hooks & (1 << i)))
return -EINVAL;
- }
}
return 0;
}
@@ -523,26 +513,20 @@ ebt_check_entry_size_and_hooks(const struct ebt_entry *e,
/* this checks if the previous chain has as many entries
* as it said it has
*/
- if (*n != *cnt) {
- BUGPRINT("nentries does not equal the nr of entries "
- "in the chain\n");
+ if (*n != *cnt)
return -EINVAL;
- }
+
if (((struct ebt_entries *)e)->policy != EBT_DROP &&
((struct ebt_entries *)e)->policy != EBT_ACCEPT) {
/* only RETURN from udc */
if (i != NF_BR_NUMHOOKS ||
- ((struct ebt_entries *)e)->policy != EBT_RETURN) {
- BUGPRINT("bad policy\n");
+ ((struct ebt_entries *)e)->policy != EBT_RETURN)
return -EINVAL;
- }
}
if (i == NF_BR_NUMHOOKS) /* it's a user defined chain */
(*udc_cnt)++;
- if (((struct ebt_entries *)e)->counter_offset != *totalcnt) {
- BUGPRINT("counter_offset != totalcnt");
+ if (((struct ebt_entries *)e)->counter_offset != *totalcnt)
return -EINVAL;
- }
*n = ((struct ebt_entries *)e)->nentries;
*cnt = 0;
return 0;
@@ -550,15 +534,13 @@ ebt_check_entry_size_and_hooks(const struct ebt_entry *e,
/* a plain old entry, heh */
if (sizeof(struct ebt_entry) > e->watchers_offset ||
e->watchers_offset > e->target_offset ||
- e->target_offset >= e->next_offset) {
- BUGPRINT("entry offsets not in right order\n");
+ e->target_offset >= e->next_offset)
return -EINVAL;
- }
+
/* this is not checked anywhere else */
- if (e->next_offset - e->target_offset < sizeof(struct ebt_entry_target)) {
- BUGPRINT("target size too small\n");
+ if (e->next_offset - e->target_offset < sizeof(struct ebt_entry_target))
return -EINVAL;
- }
+
(*cnt)++;
(*totalcnt)++;
return 0;
@@ -678,18 +660,15 @@ ebt_check_entry(struct ebt_entry *e, struct net *net,
if (e->bitmask == 0)
return 0;
- if (e->bitmask & ~EBT_F_MASK) {
- BUGPRINT("Unknown flag for bitmask\n");
+ if (e->bitmask & ~EBT_F_MASK)
return -EINVAL;
- }
- if (e->invflags & ~EBT_INV_MASK) {
- BUGPRINT("Unknown flag for inv bitmask\n");
+
+ if (e->invflags & ~EBT_INV_MASK)
return -EINVAL;
- }
- if ((e->bitmask & EBT_NOPROTO) && (e->bitmask & EBT_802_3)) {
- BUGPRINT("NOPROTO & 802_3 not allowed\n");
+
+ if ((e->bitmask & EBT_NOPROTO) && (e->bitmask & EBT_802_3))
return -EINVAL;
- }
+
/* what hook do we belong to? */
for (i = 0; i < NF_BR_NUMHOOKS; i++) {
if (!newinfo->hook_entry[i])
@@ -748,13 +727,11 @@ ebt_check_entry(struct ebt_entry *e, struct net *net,
t->u.target = target;
if (t->u.target == &ebt_standard_target) {
if (gap < sizeof(struct ebt_standard_target)) {
- BUGPRINT("Standard target size too big\n");
ret = -EFAULT;
goto cleanup_watchers;
}
if (((struct ebt_standard_target *)t)->verdict <
-NUM_STANDARD_TARGETS) {
- BUGPRINT("Invalid standard target\n");
ret = -EFAULT;
goto cleanup_watchers;
}
@@ -813,10 +790,9 @@ static int check_chainloops(const struct ebt_entries *chain, struct ebt_cl_stack
if (strcmp(t->u.name, EBT_STANDARD_TARGET))
goto letscontinue;
if (e->target_offset + sizeof(struct ebt_standard_target) >
- e->next_offset) {
- BUGPRINT("Standard target size too big\n");
+ e->next_offset)
return -1;
- }
+
verdict = ((struct ebt_standard_target *)t)->verdict;
if (verdict >= 0) { /* jump to another chain */
struct ebt_entries *hlp2 =
@@ -825,14 +801,12 @@ static int check_chainloops(const struct ebt_entries *chain, struct ebt_cl_stack
if (hlp2 == cl_s[i].cs.chaininfo)
break;
/* bad destination or loop */
- if (i == udc_cnt) {
- BUGPRINT("bad destination\n");
+ if (i == udc_cnt)
return -1;
- }
- if (cl_s[i].cs.n) {
- BUGPRINT("loop\n");
+
+ if (cl_s[i].cs.n)
return -1;
- }
+
if (cl_s[i].hookmask & (1 << hooknr))
goto letscontinue;
/* this can't be 0, so the loop test is correct */
@@ -865,24 +839,21 @@ static int translate_table(struct net *net, const char *name,
i = 0;
while (i < NF_BR_NUMHOOKS && !newinfo->hook_entry[i])
i++;
- if (i == NF_BR_NUMHOOKS) {
- BUGPRINT("No valid hooks specified\n");
+ if (i == NF_BR_NUMHOOKS)
return -EINVAL;
- }
- if (newinfo->hook_entry[i] != (struct ebt_entries *)newinfo->entries) {
- BUGPRINT("Chains don't start at beginning\n");
+
+ if (newinfo->hook_entry[i] != (struct ebt_entries *)newinfo->entries)
return -EINVAL;
- }
+
/* make sure chains are ordered after each other in same order
* as their corresponding hooks
*/
for (j = i + 1; j < NF_BR_NUMHOOKS; j++) {
if (!newinfo->hook_entry[j])
continue;
- if (newinfo->hook_entry[j] <= newinfo->hook_entry[i]) {
- BUGPRINT("Hook order must be followed\n");
+ if (newinfo->hook_entry[j] <= newinfo->hook_entry[i])
return -EINVAL;
- }
+
i = j;
}
@@ -900,15 +871,11 @@ static int translate_table(struct net *net, const char *name,
if (ret != 0)
return ret;
- if (i != j) {
- BUGPRINT("nentries does not equal the nr of entries in the "
- "(last) chain\n");
+ if (i != j)
return -EINVAL;
- }
- if (k != newinfo->nentries) {
- BUGPRINT("Total nentries is wrong\n");
+
+ if (k != newinfo->nentries)
return -EINVAL;
- }
/* get the location of the udc, put them in an array
* while we're at it, allocate the chainstack
@@ -942,7 +909,6 @@ static int translate_table(struct net *net, const char *name,
ebt_get_udc_positions, newinfo, &i, cl_s);
/* sanity check */
if (i != udc_cnt) {
- BUGPRINT("i != udc_cnt\n");
vfree(cl_s);
return -EFAULT;
}
@@ -1042,7 +1008,6 @@ static int do_replace_finish(struct net *net, struct ebt_replace *repl,
goto free_unlock;
if (repl->num_counters && repl->num_counters != t->private->nentries) {
- BUGPRINT("Wrong nr. of counters requested\n");
ret = -EINVAL;
goto free_unlock;
}
@@ -1118,15 +1083,12 @@ static int do_replace(struct net *net, const void __user *user,
if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
return -EFAULT;
- if (len != sizeof(tmp) + tmp.entries_size) {
- BUGPRINT("Wrong len argument\n");
+ if (len != sizeof(tmp) + tmp.entries_size)
return -EINVAL;
- }
- if (tmp.entries_size == 0) {
- BUGPRINT("Entries_size never zero\n");
+ if (tmp.entries_size == 0)
return -EINVAL;
- }
+
/* overflow check */
if (tmp.nentries >= ((INT_MAX - sizeof(struct ebt_table_info)) /
NR_CPUS - SMP_CACHE_BYTES) / sizeof(struct ebt_counter))
@@ -1137,21 +1099,22 @@ static int do_replace(struct net *net, const void __user *user,
tmp.name[sizeof(tmp.name) - 1] = 0;
countersize = COUNTER_OFFSET(tmp.nentries) * nr_cpu_ids;
- newinfo = vmalloc(sizeof(*newinfo) + countersize);
+ newinfo = __vmalloc(sizeof(*newinfo) + countersize, GFP_KERNEL_ACCOUNT,
+ PAGE_KERNEL);
if (!newinfo)
return -ENOMEM;
if (countersize)
memset(newinfo->counters, 0, countersize);
- newinfo->entries = vmalloc(tmp.entries_size);
+ newinfo->entries = __vmalloc(tmp.entries_size, GFP_KERNEL_ACCOUNT,
+ PAGE_KERNEL);
if (!newinfo->entries) {
ret = -ENOMEM;
goto free_newinfo;
}
if (copy_from_user(
newinfo->entries, tmp.entries, tmp.entries_size) != 0) {
- BUGPRINT("Couldn't copy entries from userspace\n");
ret = -EFAULT;
goto free_entries;
}
@@ -1192,10 +1155,8 @@ int ebt_register_table(struct net *net, const struct ebt_table *input_table,
if (input_table == NULL || (repl = input_table->table) == NULL ||
repl->entries == NULL || repl->entries_size == 0 ||
- repl->counters != NULL || input_table->private != NULL) {
- BUGPRINT("Bad table data for ebt_register_table!!!\n");
+ repl->counters != NULL || input_table->private != NULL)
return -EINVAL;
- }
/* Don't add one table to multiple lists. */
table = kmemdup(input_table, sizeof(struct ebt_table), GFP_KERNEL);
@@ -1233,13 +1194,10 @@ int ebt_register_table(struct net *net, const struct ebt_table *input_table,
((char *)repl->hook_entry[i] - repl->entries);
}
ret = translate_table(net, repl->name, newinfo);
- if (ret != 0) {
- BUGPRINT("Translate_table failed\n");
+ if (ret != 0)
goto free_chainstack;
- }
if (table->check && table->check(newinfo, table->valid_hooks)) {
- BUGPRINT("The table doesn't like its own initial data, lol\n");
ret = -EINVAL;
goto free_chainstack;
}
@@ -1250,7 +1208,6 @@ int ebt_register_table(struct net *net, const struct ebt_table *input_table,
list_for_each_entry(t, &net->xt.tables[NFPROTO_BRIDGE], list) {
if (strcmp(t->name, table->name) == 0) {
ret = -EEXIST;
- BUGPRINT("Table name already exists\n");
goto free_unlock;
}
}
@@ -1318,7 +1275,6 @@ static int do_update_counters(struct net *net, const char *name,
goto free_tmp;
if (num_counters != t->private->nentries) {
- BUGPRINT("Wrong nr of counters\n");
ret = -EINVAL;
goto unlock_mutex;
}
@@ -1445,10 +1401,8 @@ static int copy_counters_to_user(struct ebt_table *t,
if (num_counters == 0)
return 0;
- if (num_counters != nentries) {
- BUGPRINT("Num_counters wrong\n");
+ if (num_counters != nentries)
return -EINVAL;
- }
counterstmp = vmalloc(array_size(nentries, sizeof(*counterstmp)));
if (!counterstmp)
@@ -1494,15 +1448,11 @@ static int copy_everything_to_user(struct ebt_table *t, void __user *user,
(tmp.num_counters ? nentries * sizeof(struct ebt_counter) : 0))
return -EINVAL;
- if (tmp.nentries != nentries) {
- BUGPRINT("Nentries wrong\n");
+ if (tmp.nentries != nentries)
return -EINVAL;
- }
- if (tmp.entries_size != entries_size) {
- BUGPRINT("Wrong size\n");
+ if (tmp.entries_size != entries_size)
return -EINVAL;
- }
ret = copy_counters_to_user(t, oldcounters, tmp.counters,
tmp.num_counters, nentries);
@@ -1574,7 +1524,6 @@ static int do_ebt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
}
mutex_unlock(&ebt_mutex);
if (copy_to_user(user, &tmp, *len) != 0) {
- BUGPRINT("c2u Didn't work\n");
ret = -EFAULT;
break;
}
@@ -2083,7 +2032,8 @@ static int ebt_size_mwt(struct compat_ebt_entry_mwt *match32,
if (match_kern)
match_kern->match_size = ret;
- if (WARN_ON(type == EBT_COMPAT_TARGET && size_left))
+ /* rule should have no remaining data after target */
+ if (type == EBT_COMPAT_TARGET && size_left)
return -EINVAL;
match32 = (struct compat_ebt_entry_mwt *) buf;
@@ -2291,9 +2241,12 @@ static int compat_do_replace(struct net *net, void __user *user,
xt_compat_lock(NFPROTO_BRIDGE);
- ret = xt_compat_init_offsets(NFPROTO_BRIDGE, tmp.nentries);
- if (ret < 0)
- goto out_unlock;
+ if (tmp.nentries) {
+ ret = xt_compat_init_offsets(NFPROTO_BRIDGE, tmp.nentries);
+ if (ret < 0)
+ goto out_unlock;
+ }
+
ret = compat_copy_entries(entries_tmp, tmp.entries_size, &state);
if (ret < 0)
goto out_unlock;
diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c
index 08cbed7d940e..419e8edf23ba 100644
--- a/net/bridge/netfilter/nft_reject_bridge.c
+++ b/net/bridge/netfilter/nft_reject_bridge.c
@@ -229,6 +229,7 @@ static bool reject6_br_csum_ok(struct sk_buff *skb, int hook)
pskb_trim_rcsum(skb, ntohs(ip6h->payload_len) + sizeof(*ip6h)))
return false;
+ ip6h = ipv6_hdr(skb);
thoff = ipv6_skip_exthdr(skb, ((u8*)(ip6h+1) - skb->data), &proto, &fo);
if (thoff < 0 || thoff >= skb->len || (fo & htons(~0x7)) != 0)
return false;
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 0af8f0db892a..79bb8afa9c0c 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -67,6 +67,9 @@
*/
#define MAX_NFRAMES 256
+/* limit timers to 400 days for sending/timeouts */
+#define BCM_TIMER_SEC_MAX (400 * 24 * 60 * 60)
+
/* use of last_frames[index].flags */
#define RX_RECV 0x40 /* received data for this element */
#define RX_THR 0x80 /* element not been sent due to throttle feature */
@@ -140,6 +143,22 @@ static inline ktime_t bcm_timeval_to_ktime(struct bcm_timeval tv)
return ktime_set(tv.tv_sec, tv.tv_usec * NSEC_PER_USEC);
}
+/* check limitations for timeval provided by user */
+static bool bcm_is_invalid_tv(struct bcm_msg_head *msg_head)
+{
+ if ((msg_head->ival1.tv_sec < 0) ||
+ (msg_head->ival1.tv_sec > BCM_TIMER_SEC_MAX) ||
+ (msg_head->ival1.tv_usec < 0) ||
+ (msg_head->ival1.tv_usec >= USEC_PER_SEC) ||
+ (msg_head->ival2.tv_sec < 0) ||
+ (msg_head->ival2.tv_sec > BCM_TIMER_SEC_MAX) ||
+ (msg_head->ival2.tv_usec < 0) ||
+ (msg_head->ival2.tv_usec >= USEC_PER_SEC))
+ return true;
+
+ return false;
+}
+
#define CFSIZ(flags) ((flags & CAN_FD_FRAME) ? CANFD_MTU : CAN_MTU)
#define OPSIZ sizeof(struct bcm_op)
#define MHSIZ sizeof(struct bcm_msg_head)
@@ -873,6 +892,10 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
if (msg_head->nframes < 1 || msg_head->nframes > MAX_NFRAMES)
return -EINVAL;
+ /* check timeval limitations */
+ if ((msg_head->flags & SETTIMER) && bcm_is_invalid_tv(msg_head))
+ return -EINVAL;
+
/* check the given can_id */
op = bcm_find_op(&bo->tx_ops, msg_head, ifindex);
if (op) {
@@ -1053,6 +1076,10 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
(!(msg_head->can_id & CAN_RTR_FLAG))))
return -EINVAL;
+ /* check timeval limitations */
+ if ((msg_head->flags & SETTIMER) && bcm_is_invalid_tv(msg_head))
+ return -EINVAL;
+
/* check the given can_id */
op = bcm_find_op(&bo->rx_ops, msg_head, ifindex);
if (op) {
diff --git a/net/can/gw.c b/net/can/gw.c
index faa3da88a127..53859346dc9a 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -416,13 +416,29 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data)
while (modidx < MAX_MODFUNCTIONS && gwj->mod.modfunc[modidx])
(*gwj->mod.modfunc[modidx++])(cf, &gwj->mod);
- /* check for checksum updates when the CAN frame has been modified */
+ /* Has the CAN frame been modified? */
if (modidx) {
- if (gwj->mod.csumfunc.crc8)
+ /* get available space for the processed CAN frame type */
+ int max_len = nskb->len - offsetof(struct can_frame, data);
+
+ /* dlc may have changed, make sure it fits to the CAN frame */
+ if (cf->can_dlc > max_len)
+ goto out_delete;
+
+ /* check for checksum updates in classic CAN length only */
+ if (gwj->mod.csumfunc.crc8) {
+ if (cf->can_dlc > 8)
+ goto out_delete;
+
(*gwj->mod.csumfunc.crc8)(cf, &gwj->mod.csum.crc8);
+ }
+
+ if (gwj->mod.csumfunc.xor) {
+ if (cf->can_dlc > 8)
+ goto out_delete;
- if (gwj->mod.csumfunc.xor)
(*gwj->mod.csumfunc.xor)(cf, &gwj->mod.csum.xor);
+ }
}
/* clear the skb timestamp if not configured the other way */
@@ -434,6 +450,14 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data)
gwj->dropped_frames++;
else
gwj->handled_frames++;
+
+ return;
+
+ out_delete:
+ /* delete frame due to misconfiguration */
+ gwj->deleted_frames++;
+ kfree_skb(nskb);
+ return;
}
static inline int cgw_register_filter(struct net *net, struct cgw_job *gwj)
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 87afb9ec4c68..20f0c3d7ed50 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -729,7 +729,6 @@ int __ceph_open_session(struct ceph_client *client, unsigned long started)
}
EXPORT_SYMBOL(__ceph_open_session);
-
int ceph_open_session(struct ceph_client *client)
{
int ret;
@@ -745,6 +744,23 @@ int ceph_open_session(struct ceph_client *client)
}
EXPORT_SYMBOL(ceph_open_session);
+int ceph_wait_for_latest_osdmap(struct ceph_client *client,
+ unsigned long timeout)
+{
+ u64 newest_epoch;
+ int ret;
+
+ ret = ceph_monc_get_version(&client->monc, "osdmap", &newest_epoch);
+ if (ret)
+ return ret;
+
+ if (client->osdc.osdmap->epoch >= newest_epoch)
+ return 0;
+
+ ceph_osdc_maybe_request_map(&client->osdc);
+ return ceph_monc_wait_osdmap(&client->monc, newest_epoch, timeout);
+}
+EXPORT_SYMBOL(ceph_wait_for_latest_osdmap);
static int __init init_ceph_lib(void)
{
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 9a1c27c61de8..f7d7f32ac673 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -2091,6 +2091,8 @@ static int process_connect(struct ceph_connection *con)
dout("process_connect on %p tag %d\n", con, (int)con->in_tag);
if (con->auth) {
+ int len = le32_to_cpu(con->in_reply.authorizer_len);
+
/*
* Any connection that defines ->get_authorizer()
* should also define ->add_authorizer_challenge() and
@@ -2100,8 +2102,7 @@ static int process_connect(struct ceph_connection *con)
*/
if (con->in_reply.tag == CEPH_MSGR_TAG_CHALLENGE_AUTHORIZER) {
ret = con->ops->add_authorizer_challenge(
- con, con->auth->authorizer_reply_buf,
- le32_to_cpu(con->in_reply.authorizer_len));
+ con, con->auth->authorizer_reply_buf, len);
if (ret < 0)
return ret;
@@ -2111,10 +2112,12 @@ static int process_connect(struct ceph_connection *con)
return 0;
}
- ret = con->ops->verify_authorizer_reply(con);
- if (ret < 0) {
- con->error_msg = "bad authorize reply";
- return ret;
+ if (len) {
+ ret = con->ops->verify_authorizer_reply(con);
+ if (ret < 0) {
+ con->error_msg = "bad authorize reply";
+ return ret;
+ }
}
}
@@ -3240,9 +3243,10 @@ void ceph_con_keepalive(struct ceph_connection *con)
dout("con_keepalive %p\n", con);
mutex_lock(&con->mutex);
clear_standby(con);
+ con_flag_set(con, CON_FLAG_KEEPALIVE_PENDING);
mutex_unlock(&con->mutex);
- if (con_flag_test_and_set(con, CON_FLAG_KEEPALIVE_PENDING) == 0 &&
- con_flag_test_and_set(con, CON_FLAG_WRITE_PENDING) == 0)
+
+ if (con_flag_test_and_set(con, CON_FLAG_WRITE_PENDING) == 0)
queue_con(con);
}
EXPORT_SYMBOL(ceph_con_keepalive);
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index 18deb3d889c4..a53e4fbb6319 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -922,6 +922,15 @@ int ceph_monc_blacklist_add(struct ceph_mon_client *monc,
mutex_unlock(&monc->mutex);
ret = wait_generic_request(req);
+ if (!ret)
+ /*
+ * Make sure we have the osdmap that includes the blacklist
+ * entry. This is needed to ensure that the OSDs pick up the
+ * new blacklist before processing any future requests from
+ * this client.
+ */
+ ret = ceph_wait_for_latest_osdmap(monc->client, 0);
+
out:
put_generic_request(req);
return ret;
diff --git a/net/compat.c b/net/compat.c
index 3b2105f6549d..3c4b0283b29a 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -467,12 +467,14 @@ int compat_sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
ctv = (struct compat_timeval __user *) userstamp;
err = -ENOENT;
sock_enable_timestamp(sk, SOCK_TIMESTAMP);
- tv = ktime_to_timeval(sk->sk_stamp);
+ tv = ktime_to_timeval(sock_read_timestamp(sk));
+
if (tv.tv_sec == -1)
return err;
if (tv.tv_sec == 0) {
- sk->sk_stamp = ktime_get_real();
- tv = ktime_to_timeval(sk->sk_stamp);
+ ktime_t kt = ktime_get_real();
+ sock_write_timestamp(sk, kt);
+ tv = ktime_to_timeval(kt);
}
err = 0;
if (put_user(tv.tv_sec, &ctv->tv_sec) ||
@@ -494,12 +496,13 @@ int compat_sock_get_timestampns(struct sock *sk, struct timespec __user *usersta
ctv = (struct compat_timespec __user *) userstamp;
err = -ENOENT;
sock_enable_timestamp(sk, SOCK_TIMESTAMP);
- ts = ktime_to_timespec(sk->sk_stamp);
+ ts = ktime_to_timespec(sock_read_timestamp(sk));
if (ts.tv_sec == -1)
return err;
if (ts.tv_sec == 0) {
- sk->sk_stamp = ktime_get_real();
- ts = ktime_to_timespec(sk->sk_stamp);
+ ktime_t kt = ktime_get_real();
+ sock_write_timestamp(sk, kt);
+ ts = ktime_to_timespec(kt);
}
err = 0;
if (put_user(ts.tv_sec, &ctv->tv_sec) ||
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 57f3a6fcfc1e..a487df53a453 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -279,7 +279,7 @@ struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags,
break;
sk_busy_loop(sk, flags & MSG_DONTWAIT);
- } while (!skb_queue_empty(&sk->sk_receive_queue));
+ } while (sk->sk_receive_queue.prev != *last);
error = -EAGAIN;
diff --git a/net/core/dev.c b/net/core/dev.c
index 6c0b8b894aa4..63b3058dd172 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1182,7 +1182,21 @@ int dev_change_name(struct net_device *dev, const char *newname)
BUG_ON(!dev_net(dev));
net = dev_net(dev);
- if (dev->flags & IFF_UP)
+
+ /* Some auto-enslaved devices e.g. failover slaves are
+ * special, as userspace might rename the device after
+ * the interface had been brought up and running since
+ * the point kernel initiated auto-enslavement. Allow
+ * live name change even when these slave devices are
+ * up and running.
+ *
+ * Typically, users of these auto-enslaving devices
+ * don't actually care about slave name change, as
+ * they are supposed to operate on master interface
+ * directly.
+ */
+ if (dev->flags & IFF_UP &&
+ likely(!(dev->priv_flags & IFF_LIVE_RENAME_OK)))
return -EBUSY;
mutex_lock(&devnet_rename_mutex);
@@ -1813,7 +1827,7 @@ EXPORT_SYMBOL_GPL(net_dec_egress_queue);
#endif
static DEFINE_STATIC_KEY_FALSE(netstamp_needed_key);
-#ifdef HAVE_JUMP_LABEL
+#ifdef CONFIG_JUMP_LABEL
static atomic_t netstamp_needed_deferred;
static atomic_t netstamp_wanted;
static void netstamp_clear(struct work_struct *work)
@@ -1832,7 +1846,7 @@ static DECLARE_WORK(netstamp_work, netstamp_clear);
void net_enable_timestamp(void)
{
-#ifdef HAVE_JUMP_LABEL
+#ifdef CONFIG_JUMP_LABEL
int wanted;
while (1) {
@@ -1852,7 +1866,7 @@ EXPORT_SYMBOL(net_enable_timestamp);
void net_disable_timestamp(void)
{
-#ifdef HAVE_JUMP_LABEL
+#ifdef CONFIG_JUMP_LABEL
int wanted;
while (1) {
@@ -4975,8 +4989,10 @@ static inline void __netif_receive_skb_list_ptype(struct list_head *head,
if (pt_prev->list_func != NULL)
pt_prev->list_func(head, pt_prev, orig_dev);
else
- list_for_each_entry_safe(skb, next, head, list)
+ list_for_each_entry_safe(skb, next, head, list) {
+ skb_list_del_init(skb);
pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
+ }
}
static void __netif_receive_skb_list_core(struct list_head *head, bool pfmemalloc)
@@ -5728,7 +5744,6 @@ static struct sk_buff *napi_frags_skb(struct napi_struct *napi)
skb_reset_mac_header(skb);
skb_gro_reset_offset(skb);
- eth = skb_gro_header_fast(skb, 0);
if (unlikely(skb_gro_header_hard(skb, hlen))) {
eth = skb_gro_header_slow(skb, hlen, 0);
if (unlikely(!eth)) {
@@ -5738,6 +5753,7 @@ static struct sk_buff *napi_frags_skb(struct napi_struct *napi)
return NULL;
}
} else {
+ eth = (const struct ethhdr *)skb->data;
gro_pull_from_frag0(skb, hlen);
NAPI_GRO_CB(skb)->frag0 += hlen;
NAPI_GRO_CB(skb)->frag0_len -= hlen;
@@ -8073,7 +8089,7 @@ static netdev_features_t netdev_sync_upper_features(struct net_device *lower,
netdev_features_t feature;
int feature_bit;
- for_each_netdev_feature(&upper_disables, feature_bit) {
+ for_each_netdev_feature(upper_disables, feature_bit) {
feature = __NETIF_F_BIT(feature_bit);
if (!(upper->wanted_features & feature)
&& (features & feature)) {
@@ -8093,7 +8109,7 @@ static void netdev_sync_lower_features(struct net_device *upper,
netdev_features_t feature;
int feature_bit;
- for_each_netdev_feature(&upper_disables, feature_bit) {
+ for_each_netdev_feature(upper_disables, feature_bit) {
feature = __NETIF_F_BIT(feature_bit);
if (!(features & feature) && (lower->features & feature)) {
netdev_dbg(upper, "Disabling feature %pNF on lower dev %s.\n",
@@ -8633,6 +8649,9 @@ int init_dummy_netdev(struct net_device *dev)
set_bit(__LINK_STATE_PRESENT, &dev->state);
set_bit(__LINK_STATE_START, &dev->state);
+ /* napi_busy_loop stats accounting wants this */
+ dev_net_set(dev, &init_net);
+
/* Note : We dont allocate pcpu_refcnt for dummy devices,
* because users of this 'device' dont need to change
* its refcount.
@@ -8731,7 +8750,7 @@ static void netdev_wait_allrefs(struct net_device *dev)
refcnt = netdev_refcnt_read(dev);
- if (time_after(jiffies, warning_time + 10 * HZ)) {
+ if (refcnt && time_after(jiffies, warning_time + 10 * HZ)) {
pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n",
dev->name, refcnt);
warning_time = jiffies;
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index aeabc4831fca..996813f345d5 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -880,8 +880,13 @@ static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev,
if (rc >= 0)
info.n_priv_flags = rc;
}
- if (ops->get_regs_len)
- info.regdump_len = ops->get_regs_len(dev);
+ if (ops->get_regs_len) {
+ int ret = ops->get_regs_len(dev);
+
+ if (ret > 0)
+ info.regdump_len = ret;
+ }
+
if (ops->get_eeprom_len)
info.eedump_len = ops->get_eeprom_len(dev);
@@ -1424,6 +1429,9 @@ static int ethtool_get_regs(struct net_device *dev, char __user *useraddr)
return -EFAULT;
reglen = ops->get_regs_len(dev);
+ if (reglen <= 0)
+ return reglen;
+
if (regs.len > reglen)
regs.len = reglen;
@@ -1434,13 +1442,16 @@ static int ethtool_get_regs(struct net_device *dev, char __user *useraddr)
return -ENOMEM;
}
+ if (regs.len < reglen)
+ reglen = regs.len;
+
ops->get_regs(dev, &regs, regbuf);
ret = -EFAULT;
if (copy_to_user(useraddr, &regs, sizeof(regs)))
goto out;
useraddr += offsetof(struct ethtool_regs, data);
- if (regbuf && copy_to_user(useraddr, regbuf, regs.len))
+ if (copy_to_user(useraddr, regbuf, reglen))
goto out;
ret = 0;
@@ -1863,11 +1874,16 @@ static int ethtool_get_strings(struct net_device *dev, void __user *useraddr)
WARN_ON_ONCE(!ret);
gstrings.len = ret;
- data = vzalloc(array_size(gstrings.len, ETH_GSTRING_LEN));
- if (gstrings.len && !data)
- return -ENOMEM;
- __ethtool_get_strings(dev, gstrings.string_set, data);
+ if (gstrings.len) {
+ data = vzalloc(array_size(gstrings.len, ETH_GSTRING_LEN));
+ if (!data)
+ return -ENOMEM;
+
+ __ethtool_get_strings(dev, gstrings.string_set, data);
+ } else {
+ data = NULL;
+ }
ret = -EFAULT;
if (copy_to_user(useraddr, &gstrings, sizeof(gstrings)))
@@ -1963,11 +1979,15 @@ static int ethtool_get_stats(struct net_device *dev, void __user *useraddr)
return -EFAULT;
stats.n_stats = n_stats;
- data = vzalloc(array_size(n_stats, sizeof(u64)));
- if (n_stats && !data)
- return -ENOMEM;
- ops->get_ethtool_stats(dev, &stats, data);
+ if (n_stats) {
+ data = vzalloc(array_size(n_stats, sizeof(u64)));
+ if (!data)
+ return -ENOMEM;
+ ops->get_ethtool_stats(dev, &stats, data);
+ } else {
+ data = NULL;
+ }
ret = -EFAULT;
if (copy_to_user(useraddr, &stats, sizeof(stats)))
@@ -2007,16 +2027,21 @@ static int ethtool_get_phy_stats(struct net_device *dev, void __user *useraddr)
return -EFAULT;
stats.n_stats = n_stats;
- data = vzalloc(array_size(n_stats, sizeof(u64)));
- if (n_stats && !data)
- return -ENOMEM;
- if (dev->phydev && !ops->get_ethtool_phy_stats) {
- ret = phy_ethtool_get_stats(dev->phydev, &stats, data);
- if (ret < 0)
- return ret;
+ if (n_stats) {
+ data = vzalloc(array_size(n_stats, sizeof(u64)));
+ if (!data)
+ return -ENOMEM;
+
+ if (dev->phydev && !ops->get_ethtool_phy_stats) {
+ ret = phy_ethtool_get_stats(dev->phydev, &stats, data);
+ if (ret < 0)
+ goto out;
+ } else {
+ ops->get_ethtool_phy_stats(dev, &stats, data);
+ }
} else {
- ops->get_ethtool_phy_stats(dev, &stats, data);
+ data = NULL;
}
ret = -EFAULT;
diff --git a/net/core/failover.c b/net/core/failover.c
index 4a92a98ccce9..b5cd3c727285 100644
--- a/net/core/failover.c
+++ b/net/core/failover.c
@@ -80,14 +80,14 @@ static int failover_slave_register(struct net_device *slave_dev)
goto err_upper_link;
}
- slave_dev->priv_flags |= IFF_FAILOVER_SLAVE;
+ slave_dev->priv_flags |= (IFF_FAILOVER_SLAVE | IFF_LIVE_RENAME_OK);
if (fops && fops->slave_register &&
!fops->slave_register(slave_dev, failover_dev))
return NOTIFY_OK;
netdev_upper_dev_unlink(slave_dev, failover_dev);
- slave_dev->priv_flags &= ~IFF_FAILOVER_SLAVE;
+ slave_dev->priv_flags &= ~(IFF_FAILOVER_SLAVE | IFF_LIVE_RENAME_OK);
err_upper_link:
netdev_rx_handler_unregister(slave_dev);
done:
@@ -121,7 +121,7 @@ int failover_slave_unregister(struct net_device *slave_dev)
netdev_rx_handler_unregister(slave_dev);
netdev_upper_dev_unlink(slave_dev, failover_dev);
- slave_dev->priv_flags &= ~IFF_FAILOVER_SLAVE;
+ slave_dev->priv_flags &= ~(IFF_FAILOVER_SLAVE | IFF_LIVE_RENAME_OK);
if (fops && fops->slave_unregister &&
!fops->slave_unregister(slave_dev, failover_dev))
diff --git a/net/core/filter.c b/net/core/filter.c
index 86e2739dad96..2dd1f2eef4fa 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2018,18 +2018,19 @@ static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb)
static int __bpf_redirect_no_mac(struct sk_buff *skb, struct net_device *dev,
u32 flags)
{
- /* skb->mac_len is not set on normal egress */
- unsigned int mlen = skb->network_header - skb->mac_header;
+ unsigned int mlen = skb_network_offset(skb);
- __skb_pull(skb, mlen);
+ if (mlen) {
+ __skb_pull(skb, mlen);
- /* At ingress, the mac header has already been pulled once.
- * At egress, skb_pospull_rcsum has to be done in case that
- * the skb is originated from ingress (i.e. a forwarded skb)
- * to ensure that rcsum starts at net header.
- */
- if (!skb_at_tc_ingress(skb))
- skb_postpull_rcsum(skb, skb_mac_header(skb), mlen);
+ /* At ingress, the mac header has already been pulled once.
+ * At egress, skb_pospull_rcsum has to be done in case that
+ * the skb is originated from ingress (i.e. a forwarded skb)
+ * to ensure that rcsum starts at net header.
+ */
+ if (!skb_at_tc_ingress(skb))
+ skb_postpull_rcsum(skb, skb_mac_header(skb), mlen);
+ }
skb_pop_mac_header(skb);
skb_reset_mac_len(skb);
return flags & BPF_F_INGRESS ?
@@ -2613,8 +2614,7 @@ static int bpf_skb_proto_4_to_6(struct sk_buff *skb)
u32 off = skb_mac_header_len(skb);
int ret;
- /* SCTP uses GSO_BY_FRAGS, thus cannot adjust it. */
- if (skb_is_gso(skb) && unlikely(skb_is_gso_sctp(skb)))
+ if (skb_is_gso(skb) && !skb_is_gso_tcp(skb))
return -ENOTSUPP;
ret = skb_cow(skb, len_diff);
@@ -2655,8 +2655,7 @@ static int bpf_skb_proto_6_to_4(struct sk_buff *skb)
u32 off = skb_mac_header_len(skb);
int ret;
- /* SCTP uses GSO_BY_FRAGS, thus cannot adjust it. */
- if (skb_is_gso(skb) && unlikely(skb_is_gso_sctp(skb)))
+ if (skb_is_gso(skb) && !skb_is_gso_tcp(skb))
return -ENOTSUPP;
ret = skb_unclone(skb, GFP_ATOMIC);
@@ -2781,8 +2780,7 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 len_diff)
u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb);
int ret;
- /* SCTP uses GSO_BY_FRAGS, thus cannot adjust it. */
- if (skb_is_gso(skb) && unlikely(skb_is_gso_sctp(skb)))
+ if (skb_is_gso(skb) && !skb_is_gso_tcp(skb))
return -ENOTSUPP;
ret = skb_cow(skb, len_diff);
@@ -2811,8 +2809,7 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 len_diff)
u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb);
int ret;
- /* SCTP uses GSO_BY_FRAGS, thus cannot adjust it. */
- if (skb_is_gso(skb) && unlikely(skb_is_gso_sctp(skb)))
+ if (skb_is_gso(skb) && !skb_is_gso_tcp(skb))
return -ENOTSUPP;
ret = skb_unclone(skb, GFP_ATOMIC);
@@ -3908,10 +3905,12 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
/* Only some socketops are supported */
switch (optname) {
case SO_RCVBUF:
+ val = min_t(u32, val, sysctl_rmem_max);
sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
sk->sk_rcvbuf = max_t(int, val * 2, SOCK_MIN_RCVBUF);
break;
case SO_SNDBUF:
+ val = min_t(u32, val, sysctl_wmem_max);
sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF);
break;
@@ -3929,7 +3928,10 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
sk->sk_rcvlowat = val ? : 1;
break;
case SO_MARK:
- sk->sk_mark = val;
+ if (sk->sk_mark != val) {
+ sk->sk_mark = val;
+ sk_dst_reset(sk);
+ }
break;
default:
ret = -EINVAL;
@@ -4000,7 +4002,7 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
/* Only some options are supported */
switch (optname) {
case TCP_BPF_IW:
- if (val <= 0 || tp->data_segs_out > 0)
+ if (val <= 0 || tp->data_segs_out > tp->syn_data)
ret = -EINVAL;
else
tp->snd_cwnd = val;
diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
index c48e46759e23..8bab88738691 100644
--- a/net/core/gen_stats.c
+++ b/net/core/gen_stats.c
@@ -256,7 +256,6 @@ __gnet_stats_copy_queue_cpu(struct gnet_stats_queue *qstats,
for_each_possible_cpu(i) {
const struct gnet_stats_queue *qcpu = per_cpu_ptr(q, i);
- qstats->qlen = 0;
qstats->backlog += qcpu->backlog;
qstats->drops += qcpu->drops;
qstats->requeues += qcpu->requeues;
@@ -272,7 +271,6 @@ void __gnet_stats_copy_queue(struct gnet_stats_queue *qstats,
if (cpu) {
__gnet_stats_copy_queue_cpu(qstats, cpu);
} else {
- qstats->qlen = q->qlen;
qstats->backlog = q->backlog;
qstats->drops = q->drops;
qstats->requeues = q->requeues;
diff --git a/net/core/gro_cells.c b/net/core/gro_cells.c
index 4b54e5f107c6..e095fb871d91 100644
--- a/net/core/gro_cells.c
+++ b/net/core/gro_cells.c
@@ -13,22 +13,36 @@ int gro_cells_receive(struct gro_cells *gcells, struct sk_buff *skb)
{
struct net_device *dev = skb->dev;
struct gro_cell *cell;
+ int res;
- if (!gcells->cells || skb_cloned(skb) || netif_elide_gro(dev))
- return netif_rx(skb);
+ rcu_read_lock();
+ if (unlikely(!(dev->flags & IFF_UP)))
+ goto drop;
+
+ if (!gcells->cells || skb_cloned(skb) || netif_elide_gro(dev)) {
+ res = netif_rx(skb);
+ goto unlock;
+ }
cell = this_cpu_ptr(gcells->cells);
if (skb_queue_len(&cell->napi_skbs) > netdev_max_backlog) {
+drop:
atomic_long_inc(&dev->rx_dropped);
kfree_skb(skb);
- return NET_RX_DROP;
+ res = NET_RX_DROP;
+ goto unlock;
}
__skb_queue_tail(&cell->napi_skbs, skb);
if (skb_queue_len(&cell->napi_skbs) == 1)
napi_schedule(&cell->napi);
- return NET_RX_SUCCESS;
+
+ res = NET_RX_SUCCESS;
+
+unlock:
+ rcu_read_unlock();
+ return res;
}
EXPORT_SYMBOL(gro_cells_receive);
@@ -84,6 +98,7 @@ void gro_cells_destroy(struct gro_cells *gcells)
for_each_possible_cpu(i) {
struct gro_cell *cell = per_cpu_ptr(gcells->cells, i);
+ napi_disable(&cell->napi);
netif_napi_del(&cell->napi);
__skb_queue_purge(&cell->napi_skbs);
}
diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c
index 3e85437f7106..a648568c5e8f 100644
--- a/net/core/lwt_bpf.c
+++ b/net/core/lwt_bpf.c
@@ -63,6 +63,7 @@ static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt,
lwt->name ? : "<unknown>");
ret = BPF_OK;
} else {
+ skb_reset_mac_header(skb);
ret = skb_do_redirect(skb);
if (ret == 0)
ret = BPF_REDIRECT;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 4e07824eec5e..4e4ac77c6816 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -30,6 +30,7 @@
#include <linux/times.h>
#include <net/net_namespace.h>
#include <net/neighbour.h>
+#include <net/arp.h>
#include <net/dst.h>
#include <net/sock.h>
#include <net/netevent.h>
@@ -2536,7 +2537,13 @@ int neigh_xmit(int index, struct net_device *dev,
if (!tbl)
goto out;
rcu_read_lock_bh();
- neigh = __neigh_lookup_noref(tbl, addr, dev);
+ if (index == NEIGH_ARP_TABLE) {
+ u32 key = *((u32 *)addr);
+
+ neigh = __ipv4_neigh_lookup_noref(dev, key);
+ } else {
+ neigh = __neigh_lookup_noref(tbl, addr, dev);
+ }
if (!neigh)
neigh = __neigh_create(tbl, addr, dev, false);
err = PTR_ERR(neigh);
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index bd67c4d0fcfd..bf9a3b6ac885 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -934,6 +934,8 @@ static int rx_queue_add_kobject(struct net_device *dev, int index)
if (error)
return error;
+ dev_hold(queue->dev);
+
if (dev->sysfs_rx_queue_group) {
error = sysfs_create_group(kobj, dev->sysfs_rx_queue_group);
if (error) {
@@ -943,7 +945,6 @@ static int rx_queue_add_kobject(struct net_device *dev, int index)
}
kobject_uevent(kobj, KOBJ_ADD);
- dev_hold(queue->dev);
return error;
}
@@ -1472,6 +1473,8 @@ static int netdev_queue_add_kobject(struct net_device *dev, int index)
if (error)
return error;
+ dev_hold(queue->dev);
+
#ifdef CONFIG_BQL
error = sysfs_create_group(kobj, &dql_group);
if (error) {
@@ -1481,7 +1484,6 @@ static int netdev_queue_add_kobject(struct net_device *dev, int index)
#endif
kobject_uevent(kobj, KOBJ_ADD);
- dev_hold(queue->dev);
return 0;
}
@@ -1547,6 +1549,9 @@ static int register_queue_kobjects(struct net_device *dev)
error:
netdev_queue_update_kobjects(dev, txq, 0);
net_rx_queue_update_kobjects(dev, rxq, 0);
+#ifdef CONFIG_SYSFS
+ kset_unregister(dev->queues_kset);
+#endif
return error;
}
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 670c84b1bfc2..7320f0844a50 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -304,6 +304,7 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
refcount_set(&net->count, 1);
refcount_set(&net->passive, 1);
+ get_random_bytes(&net->hash_mix, sizeof(u32));
net->dev_base_seq = 1;
net->user_ns = user_ns;
idr_init(&net->netns_ids);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index b71d9eef334e..9d472d626aaa 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3065,7 +3065,13 @@ static int pktgen_wait_thread_run(struct pktgen_thread *t)
{
while (thread_is_running(t)) {
+ /* note: 't' will still be around even after the unlock/lock
+ * cycle because pktgen_thread threads are only cleared at
+ * net exit
+ */
+ mutex_unlock(&pktgen_thread_lock);
msleep_interruptible(100);
+ mutex_lock(&pktgen_thread_lock);
if (signal_pending(current))
goto signal;
@@ -3080,6 +3086,10 @@ static int pktgen_wait_all_threads_run(struct pktgen_net *pn)
struct pktgen_thread *t;
int sig = 1;
+ /* prevent from racing with rmmod */
+ if (!try_module_get(THIS_MODULE))
+ return sig;
+
mutex_lock(&pktgen_thread_lock);
list_for_each_entry(t, &pn->pktgen_threads, th_list) {
@@ -3093,6 +3103,7 @@ static int pktgen_wait_all_threads_run(struct pktgen_net *pn)
t->control |= (T_STOP);
mutex_unlock(&pktgen_thread_lock);
+ module_put(THIS_MODULE);
return sig;
}
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index ebde98b565e9..3932eed379a4 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1496,14 +1496,15 @@ static int put_master_ifindex(struct sk_buff *skb, struct net_device *dev)
return ret;
}
-static int nla_put_iflink(struct sk_buff *skb, const struct net_device *dev)
+static int nla_put_iflink(struct sk_buff *skb, const struct net_device *dev,
+ bool force)
{
int ifindex = dev_get_iflink(dev);
- if (dev->ifindex == ifindex)
- return 0;
+ if (force || dev->ifindex != ifindex)
+ return nla_put_u32(skb, IFLA_LINK, ifindex);
- return nla_put_u32(skb, IFLA_LINK, ifindex);
+ return 0;
}
static noinline_for_stack int nla_put_ifalias(struct sk_buff *skb,
@@ -1520,6 +1521,8 @@ static int rtnl_fill_link_netnsid(struct sk_buff *skb,
const struct net_device *dev,
struct net *src_net)
{
+ bool put_iflink = false;
+
if (dev->rtnl_link_ops && dev->rtnl_link_ops->get_link_net) {
struct net *link_net = dev->rtnl_link_ops->get_link_net(dev);
@@ -1528,10 +1531,12 @@ static int rtnl_fill_link_netnsid(struct sk_buff *skb,
if (nla_put_s32(skb, IFLA_LINK_NETNSID, id))
return -EMSGSIZE;
+
+ put_iflink = true;
}
}
- return 0;
+ return nla_put_iflink(skb, dev, put_iflink);
}
static int rtnl_fill_link_af(struct sk_buff *skb,
@@ -1617,7 +1622,6 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
#ifdef CONFIG_RPS
nla_put_u32(skb, IFLA_NUM_RX_QUEUES, dev->num_rx_queues) ||
#endif
- nla_put_iflink(skb, dev) ||
put_master_ifindex(skb, dev) ||
nla_put_u8(skb, IFLA_CARRIER, netif_carrier_ok(dev)) ||
(dev->qdisc &&
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 001de1882f36..e20b1f25a273 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -356,6 +356,8 @@ static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
*/
void *netdev_alloc_frag(unsigned int fragsz)
{
+ fragsz = SKB_DATA_ALIGN(fragsz);
+
return __netdev_alloc_frag(fragsz, GFP_ATOMIC);
}
EXPORT_SYMBOL(netdev_alloc_frag);
@@ -373,6 +375,8 @@ static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
void *napi_alloc_frag(unsigned int fragsz)
{
+ fragsz = SKB_DATA_ALIGN(fragsz);
+
return __napi_alloc_frag(fragsz, GFP_ATOMIC);
}
EXPORT_SYMBOL(napi_alloc_frag);
@@ -3843,7 +3847,7 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
unsigned int delta_truesize;
struct sk_buff *lp;
- if (unlikely(p->len + len >= 65536))
+ if (unlikely(p->len + len >= 65536 || NAPI_GRO_CB(skb)->flush))
return -E2BIG;
lp = NAPI_GRO_CB(p)->last;
@@ -5082,7 +5086,8 @@ EXPORT_SYMBOL_GPL(skb_gso_validate_mac_len);
static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb)
{
- int mac_len;
+ int mac_len, meta_len;
+ void *meta;
if (skb_cow(skb, skb_headroom(skb)) < 0) {
kfree_skb(skb);
@@ -5094,6 +5099,13 @@ static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb)
memmove(skb_mac_header(skb) + VLAN_HLEN, skb_mac_header(skb),
mac_len - VLAN_HLEN - ETH_TLEN);
}
+
+ meta_len = skb_metadata_len(skb);
+ if (meta_len) {
+ meta = skb_metadata_end(skb) - meta_len;
+ memmove(meta + VLAN_HLEN, meta, meta_len);
+ }
+
skb->mac_header += VLAN_HLEN;
return skb;
}
@@ -5273,7 +5285,6 @@ struct sk_buff *alloc_skb_with_frags(unsigned long header_len,
unsigned long chunk;
struct sk_buff *skb;
struct page *page;
- gfp_t gfp_head;
int i;
*errcode = -EMSGSIZE;
@@ -5283,12 +5294,8 @@ struct sk_buff *alloc_skb_with_frags(unsigned long header_len,
if (npages > MAX_SKB_FRAGS)
return NULL;
- gfp_head = gfp_mask;
- if (gfp_head & __GFP_DIRECT_RECLAIM)
- gfp_head |= __GFP_RETRY_MAYFAIL;
-
*errcode = -ENOBUFS;
- skb = alloc_skb(header_len, gfp_head);
+ skb = alloc_skb(header_len, gfp_mask);
if (!skb)
return NULL;
diff --git a/net/core/sock.c b/net/core/sock.c
index 748765e35423..c9668dcb5eb9 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -698,6 +698,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
break;
case SO_DONTROUTE:
sock_valbool_flag(sk, SOCK_LOCALROUTE, valbool);
+ sk_dst_reset(sk);
break;
case SO_BROADCAST:
sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
@@ -2803,6 +2804,9 @@ void sock_init_data(struct socket *sock, struct sock *sk)
sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
sk->sk_stamp = SK_DEFAULT_STAMP;
+#if BITS_PER_LONG==32
+ seqlock_init(&sk->sk_stamp_seq);
+#endif
atomic_set(&sk->sk_zckey, 0);
#ifdef CONFIG_NET_RX_BUSY_POLL
@@ -2902,12 +2906,13 @@ int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
struct timeval tv;
sock_enable_timestamp(sk, SOCK_TIMESTAMP);
- tv = ktime_to_timeval(sk->sk_stamp);
+ tv = ktime_to_timeval(sock_read_timestamp(sk));
if (tv.tv_sec == -1)
return -ENOENT;
if (tv.tv_sec == 0) {
- sk->sk_stamp = ktime_get_real();
- tv = ktime_to_timeval(sk->sk_stamp);
+ ktime_t kt = ktime_get_real();
+ sock_write_timestamp(sk, kt);
+ tv = ktime_to_timeval(kt);
}
return copy_to_user(userstamp, &tv, sizeof(tv)) ? -EFAULT : 0;
}
@@ -2918,11 +2923,12 @@ int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
struct timespec ts;
sock_enable_timestamp(sk, SOCK_TIMESTAMP);
- ts = ktime_to_timespec(sk->sk_stamp);
+ ts = ktime_to_timespec(sock_read_timestamp(sk));
if (ts.tv_sec == -1)
return -ENOENT;
if (ts.tv_sec == 0) {
- sk->sk_stamp = ktime_get_real();
+ ktime_t kt = ktime_get_real();
+ sock_write_timestamp(sk, kt);
ts = ktime_to_timespec(sk->sk_stamp);
}
return copy_to_user(userstamp, &ts, sizeof(ts)) ? -EFAULT : 0;
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index b1a2c5e38530..37b4667128a3 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -279,7 +279,6 @@ static int proc_dointvec_minmax_bpf_enable(struct ctl_table *table, int write,
return ret;
}
-# ifdef CONFIG_HAVE_EBPF_JIT
static int
proc_dointvec_minmax_bpf_restricted(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
@@ -290,7 +289,6 @@ proc_dointvec_minmax_bpf_restricted(struct ctl_table *table, int write,
return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
}
-# endif
#endif
static struct ctl_table net_core_table[] = {
@@ -397,6 +395,14 @@ static struct ctl_table net_core_table[] = {
.extra2 = &one,
},
# endif
+ {
+ .procname = "bpf_jit_limit",
+ .data = &bpf_jit_limit,
+ .maxlen = sizeof(int),
+ .mode = 0600,
+ .proc_handler = proc_dointvec_minmax_bpf_restricted,
+ .extra1 = &one,
+ },
#endif
{
.procname = "netdev_tstamp_prequeue",
diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h
index 6eb837a47b5c..baaaeb2b2c42 100644
--- a/net/dccp/ccid.h
+++ b/net/dccp/ccid.h
@@ -202,7 +202,7 @@ static inline void ccid_hc_tx_packet_recv(struct ccid *ccid, struct sock *sk,
static inline int ccid_hc_tx_parse_options(struct ccid *ccid, struct sock *sk,
u8 pkt, u8 opt, u8 *val, u8 len)
{
- if (ccid->ccid_ops->ccid_hc_tx_parse_options == NULL)
+ if (!ccid || !ccid->ccid_ops->ccid_hc_tx_parse_options)
return 0;
return ccid->ccid_ops->ccid_hc_tx_parse_options(sk, pkt, opt, val, len);
}
@@ -214,7 +214,7 @@ static inline int ccid_hc_tx_parse_options(struct ccid *ccid, struct sock *sk,
static inline int ccid_hc_rx_parse_options(struct ccid *ccid, struct sock *sk,
u8 pkt, u8 opt, u8 *val, u8 len)
{
- if (ccid->ccid_ops->ccid_hc_rx_parse_options == NULL)
+ if (!ccid || !ccid->ccid_ops->ccid_hc_rx_parse_options)
return 0;
return ccid->ccid_ops->ccid_hc_rx_parse_options(sk, pkt, opt, val, len);
}
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 6344f1b18a6a..58a401e9cf09 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -433,8 +433,8 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk,
newnp->ipv6_mc_list = NULL;
newnp->ipv6_ac_list = NULL;
newnp->ipv6_fl_list = NULL;
- newnp->mcast_oif = inet6_iif(skb);
- newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
+ newnp->mcast_oif = inet_iif(skb);
+ newnp->mcast_hops = ip_hdr(skb)->ttl;
/*
* No need to charge this sock to the relevant IPv6 refcnt debug socks count
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 9f3209ff7ffd..601534a5bfe8 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -293,15 +293,22 @@ static int __init dsa_init_module(void)
rc = dsa_slave_register_notifier();
if (rc)
- return rc;
+ goto register_notifier_fail;
rc = dsa_legacy_register();
if (rc)
- return rc;
+ goto legacy_register_fail;
dev_add_pack(&dsa_pack_type);
return 0;
+
+legacy_register_fail:
+ dsa_slave_unregister_notifier();
+register_notifier_fail:
+ destroy_workqueue(dsa_owq);
+
+ return rc;
}
module_init(dsa_init_module);
diff --git a/net/dsa/master.c b/net/dsa/master.c
index c90ee3227dea..aae478d61101 100644
--- a/net/dsa/master.c
+++ b/net/dsa/master.c
@@ -158,6 +158,8 @@ static void dsa_master_ethtool_teardown(struct net_device *dev)
cpu_dp->orig_ethtool_ops = NULL;
}
+static struct lock_class_key dsa_master_addr_list_lock_key;
+
int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp)
{
/* If we use a tagging format that doesn't have an ethertype
@@ -167,6 +169,8 @@ int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp)
wmb();
dev->dsa_ptr = cpu_dp;
+ lockdep_set_class(&dev->addr_list_lock,
+ &dsa_master_addr_list_lock_key);
return dsa_master_ethtool_setup(dev);
}
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 1c45c1d6d241..b39720d0995d 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -140,11 +140,14 @@ static int dsa_slave_close(struct net_device *dev)
static void dsa_slave_change_rx_flags(struct net_device *dev, int change)
{
struct net_device *master = dsa_slave_to_master(dev);
-
- if (change & IFF_ALLMULTI)
- dev_set_allmulti(master, dev->flags & IFF_ALLMULTI ? 1 : -1);
- if (change & IFF_PROMISC)
- dev_set_promiscuity(master, dev->flags & IFF_PROMISC ? 1 : -1);
+ if (dev->flags & IFF_UP) {
+ if (change & IFF_ALLMULTI)
+ dev_set_allmulti(master,
+ dev->flags & IFF_ALLMULTI ? 1 : -1);
+ if (change & IFF_PROMISC)
+ dev_set_promiscuity(master,
+ dev->flags & IFF_PROMISC ? 1 : -1);
+ }
}
static void dsa_slave_set_rx_mode(struct net_device *dev)
@@ -639,7 +642,7 @@ static int dsa_slave_set_eee(struct net_device *dev, struct ethtool_eee *e)
int ret;
/* Port's PHY and MAC both need to be EEE capable */
- if (!dev->phydev && !dp->pl)
+ if (!dev->phydev || !dp->pl)
return -ENODEV;
if (!ds->ops->set_mac_eee)
@@ -659,7 +662,7 @@ static int dsa_slave_get_eee(struct net_device *dev, struct ethtool_eee *e)
int ret;
/* Port's PHY and MAC both need to be EEE capable */
- if (!dev->phydev && !dp->pl)
+ if (!dev->phydev || !dp->pl)
return -ENODEV;
if (!ds->ops->get_mac_eee)
diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index b8cd43c9ed5b..a97bf326b231 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c
@@ -94,9 +94,8 @@ static void hsr_check_announce(struct net_device *hsr_dev,
&& (old_operstate != IF_OPER_UP)) {
/* Went up */
hsr->announce_count = 0;
- hsr->announce_timer.expires = jiffies +
- msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL);
- add_timer(&hsr->announce_timer);
+ mod_timer(&hsr->announce_timer,
+ jiffies + msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL));
}
if ((hsr_dev->operstate != IF_OPER_UP) && (old_operstate == IF_OPER_UP))
@@ -332,6 +331,7 @@ static void hsr_announce(struct timer_list *t)
{
struct hsr_priv *hsr;
struct hsr_port *master;
+ unsigned long interval;
hsr = from_timer(hsr, t, announce_timer);
@@ -343,18 +343,16 @@ static void hsr_announce(struct timer_list *t)
hsr->protVersion);
hsr->announce_count++;
- hsr->announce_timer.expires = jiffies +
- msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL);
+ interval = msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL);
} else {
send_hsr_supervision_frame(master, HSR_TLV_LIFE_CHECK,
hsr->protVersion);
- hsr->announce_timer.expires = jiffies +
- msecs_to_jiffies(HSR_LIFE_CHECK_INTERVAL);
+ interval = msecs_to_jiffies(HSR_LIFE_CHECK_INTERVAL);
}
if (is_admin_up(master->dev))
- add_timer(&hsr->announce_timer);
+ mod_timer(&hsr->announce_timer, jiffies + interval);
rcu_read_unlock();
}
@@ -486,7 +484,7 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2],
res = hsr_add_port(hsr, hsr_dev, HSR_PT_MASTER);
if (res)
- return res;
+ goto err_add_port;
res = register_netdevice(hsr_dev);
if (res)
@@ -506,6 +504,8 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2],
fail:
hsr_for_each_port(hsr, port)
hsr_del_port(port);
+err_add_port:
+ hsr_del_node(&hsr->self_node_db);
return res;
}
diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c
index 286ceb41ac0c..9af16cb68f76 100644
--- a/net/hsr/hsr_framereg.c
+++ b/net/hsr/hsr_framereg.c
@@ -124,6 +124,18 @@ int hsr_create_self_node(struct list_head *self_node_db,
return 0;
}
+void hsr_del_node(struct list_head *self_node_db)
+{
+ struct hsr_node *node;
+
+ rcu_read_lock();
+ node = list_first_or_null_rcu(self_node_db, struct hsr_node, mac_list);
+ rcu_read_unlock();
+ if (node) {
+ list_del_rcu(&node->mac_list);
+ kfree(node);
+ }
+}
/* Allocate an hsr_node and add it to node_db. 'addr' is the node's AddressA;
* seq_out is used to initialize filtering of outgoing duplicate frames
diff --git a/net/hsr/hsr_framereg.h b/net/hsr/hsr_framereg.h
index 370b45998121..531fd3dfcac1 100644
--- a/net/hsr/hsr_framereg.h
+++ b/net/hsr/hsr_framereg.h
@@ -16,6 +16,7 @@
struct hsr_node;
+void hsr_del_node(struct list_head *self_node_db);
struct hsr_node *hsr_add_node(struct list_head *node_db, unsigned char addr[],
u16 seq_out);
struct hsr_node *hsr_get_node(struct hsr_port *port, struct sk_buff *skb,
diff --git a/net/ieee802154/6lowpan/tx.c b/net/ieee802154/6lowpan/tx.c
index ca53efa17be1..8bec827081cd 100644
--- a/net/ieee802154/6lowpan/tx.c
+++ b/net/ieee802154/6lowpan/tx.c
@@ -48,6 +48,9 @@ int lowpan_header_create(struct sk_buff *skb, struct net_device *ldev,
const struct ipv6hdr *hdr = ipv6_hdr(skb);
struct neighbour *n;
+ if (!daddr)
+ return -EINVAL;
+
/* TODO:
* if this package isn't ipv6 one, where should it be routed?
*/
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 777fa3b7fb13..f0165c5f376b 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -667,7 +667,8 @@ static int cipso_v4_map_lvl_valid(const struct cipso_v4_doi *doi_def, u8 level)
case CIPSO_V4_MAP_PASS:
return 0;
case CIPSO_V4_MAP_TRANS:
- if (doi_def->map.std->lvl.cipso[level] < CIPSO_V4_INV_LVL)
+ if ((level < doi_def->map.std->lvl.cipso_size) &&
+ (doi_def->map.std->lvl.cipso[level] < CIPSO_V4_INV_LVL))
return 0;
break;
}
@@ -1735,13 +1736,26 @@ validate_return:
*/
void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway)
{
+ unsigned char optbuf[sizeof(struct ip_options) + 40];
+ struct ip_options *opt = (struct ip_options *)optbuf;
+
if (ip_hdr(skb)->protocol == IPPROTO_ICMP || error != -EACCES)
return;
+ /*
+ * We might be called above the IP layer,
+ * so we can not use icmp_send and IPCB here.
+ */
+
+ memset(opt, 0, sizeof(struct ip_options));
+ opt->optlen = ip_hdr(skb)->ihl*4 - sizeof(struct iphdr);
+ if (__ip_options_compile(dev_net(skb->dev), opt, skb, NULL))
+ return;
+
if (gateway)
- icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_ANO, 0);
+ __icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_ANO, 0, opt);
else
- icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_ANO, 0);
+ __icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_ANO, 0, opt);
}
/**
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 97689012b357..114f9def1ec5 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -223,7 +223,7 @@ static void esp_output_fill_trailer(u8 *tail, int tfclen, int plen, __u8 proto)
tail[plen - 1] = proto;
}
-static void esp_output_udp_encap(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp)
+static int esp_output_udp_encap(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp)
{
int encap_type;
struct udphdr *uh;
@@ -231,6 +231,7 @@ static void esp_output_udp_encap(struct xfrm_state *x, struct sk_buff *skb, stru
__be16 sport, dport;
struct xfrm_encap_tmpl *encap = x->encap;
struct ip_esp_hdr *esph = esp->esph;
+ unsigned int len;
spin_lock_bh(&x->lock);
sport = encap->encap_sport;
@@ -238,11 +239,14 @@ static void esp_output_udp_encap(struct xfrm_state *x, struct sk_buff *skb, stru
encap_type = encap->encap_type;
spin_unlock_bh(&x->lock);
+ len = skb->len + esp->tailen - skb_transport_offset(skb);
+ if (len + sizeof(struct iphdr) >= IP_MAX_MTU)
+ return -EMSGSIZE;
+
uh = (struct udphdr *)esph;
uh->source = sport;
uh->dest = dport;
- uh->len = htons(skb->len + esp->tailen
- - skb_transport_offset(skb));
+ uh->len = htons(len);
uh->check = 0;
switch (encap_type) {
@@ -259,6 +263,8 @@ static void esp_output_udp_encap(struct xfrm_state *x, struct sk_buff *skb, stru
*skb_mac_header(skb) = IPPROTO_UDP;
esp->esph = esph;
+
+ return 0;
}
int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp)
@@ -272,8 +278,12 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
int tailen = esp->tailen;
/* this is non-NULL only with UDP Encapsulation */
- if (x->encap)
- esp_output_udp_encap(x, skb, esp);
+ if (x->encap) {
+ int err = esp_output_udp_encap(x, skb, esp);
+
+ if (err < 0)
+ return err;
+ }
if (!skb_cloned(skb)) {
if (tailen <= skb_tailroom(skb)) {
@@ -325,7 +335,7 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
skb->len += tailen;
skb->data_len += tailen;
skb->truesize += tailen;
- if (sk)
+ if (sk && sk_fullsock(sk))
refcount_add(tailen, &sk->sk_wmem_alloc);
goto out;
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 0113993e9b2c..dae743b649c1 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -203,7 +203,7 @@ static void fib_flush(struct net *net)
struct fib_table *tb;
hlist_for_each_entry_safe(tb, tmp, head, tb_hlist)
- flushed += fib_table_flush(net, tb);
+ flushed += fib_table_flush(net, tb, false);
}
if (flushed)
@@ -700,6 +700,10 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
case RTA_GATEWAY:
cfg->fc_gw = nla_get_be32(attr);
break;
+ case RTA_VIA:
+ NL_SET_ERR_MSG(extack, "IPv4 does not support RTA_VIA attribute");
+ err = -EINVAL;
+ goto errout;
case RTA_PRIORITY:
cfg->fc_priority = nla_get_u32(attr);
break;
@@ -1357,7 +1361,7 @@ static void ip_fib_net_exit(struct net *net)
hlist_for_each_entry_safe(tb, tmp, head, tb_hlist) {
hlist_del(&tb->tb_hlist);
- fib_table_flush(net, tb);
+ fib_table_flush(net, tb, true);
fib_free_table(tb);
}
}
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 5bc0c89e81e4..3955a6d7ea66 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1856,7 +1856,7 @@ void fib_table_flush_external(struct fib_table *tb)
}
/* Caller must hold RTNL. */
-int fib_table_flush(struct net *net, struct fib_table *tb)
+int fib_table_flush(struct net *net, struct fib_table *tb, bool flush_all)
{
struct trie *t = (struct trie *)tb->tb_data;
struct key_vector *pn = t->kv;
@@ -1904,8 +1904,17 @@ int fib_table_flush(struct net *net, struct fib_table *tb)
hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) {
struct fib_info *fi = fa->fa_info;
- if (!fi || !(fi->fib_flags & RTNH_F_DEAD) ||
- tb->tb_id != fa->tb_id) {
+ if (!fi || tb->tb_id != fa->tb_id ||
+ (!(fi->fib_flags & RTNH_F_DEAD) &&
+ !fib_props[fa->fa_type].error)) {
+ slen = fa->fa_slen;
+ continue;
+ }
+
+ /* Do not flush error routes if network namespace is
+ * not being dismantled
+ */
+ if (!flush_all && fib_props[fa->fa_type].error) {
slen = fa->fa_slen;
continue;
}
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index 500a59906b87..854ff1e4c41f 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -120,6 +120,7 @@ static int gue_udp_recv(struct sock *sk, struct sk_buff *skb)
struct guehdr *guehdr;
void *data;
u16 doffset = 0;
+ u8 proto_ctype;
if (!fou)
return 1;
@@ -211,13 +212,14 @@ static int gue_udp_recv(struct sock *sk, struct sk_buff *skb)
if (unlikely(guehdr->control))
return gue_control_message(skb, guehdr);
+ proto_ctype = guehdr->proto_ctype;
__skb_pull(skb, sizeof(struct udphdr) + hdrlen);
skb_reset_transport_header(skb);
if (iptunnel_pull_offloads(skb))
goto drop;
- return -guehdr->proto_ctype;
+ return -proto_ctype;
drop:
kfree_skb(skb);
diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c
index b798862b6be5..f21ea6125fc2 100644
--- a/net/ipv4/gre_demux.c
+++ b/net/ipv4/gre_demux.c
@@ -25,6 +25,7 @@
#include <linux/spinlock.h>
#include <net/protocol.h>
#include <net/gre.h>
+#include <net/erspan.h>
#include <net/icmp.h>
#include <net/route.h>
@@ -118,6 +119,22 @@ int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
hdr_len += 4;
}
tpi->hdr_len = hdr_len;
+
+ /* ERSPAN ver 1 and 2 protocol sets GRE key field
+ * to 0 and sets the configured key in the
+ * inner erspan header field
+ */
+ if (greh->protocol == htons(ETH_P_ERSPAN) ||
+ greh->protocol == htons(ETH_P_ERSPAN2)) {
+ struct erspan_base_hdr *ershdr;
+
+ if (!pskb_may_pull(skb, nhs + hdr_len + sizeof(*ershdr)))
+ return -EINVAL;
+
+ ershdr = (struct erspan_base_hdr *)options;
+ tpi->key = cpu_to_be32(get_session_id(ershdr));
+ }
+
return hdr_len;
}
EXPORT_SYMBOL(gre_parse_header);
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 4fe69e685569..1770ff1638bc 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -578,7 +578,8 @@ relookup_failed:
* MUST reply to only the first fragment.
*/
-void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
+void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
+ const struct ip_options *opt)
{
struct iphdr *iph;
int room;
@@ -699,7 +700,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
iph->tos;
mark = IP4_REPLY_MARK(net, skb_in->mark);
- if (ip_options_echo(net, &icmp_param.replyopts.opt.opt, skb_in))
+ if (__ip_options_echo(net, &icmp_param.replyopts.opt.opt, skb_in, opt))
goto out_unlock;
@@ -750,7 +751,7 @@ out_bh_enable:
local_bh_enable();
out:;
}
-EXPORT_SYMBOL(icmp_send);
+EXPORT_SYMBOL(__icmp_send);
static void icmp_socket_deliver(struct sk_buff *skb, u32 info)
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 4da39446da2d..d187ee8156a1 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -190,6 +190,17 @@ static void ip_ma_put(struct ip_mc_list *im)
pmc != NULL; \
pmc = rtnl_dereference(pmc->next_rcu))
+static void ip_sf_list_clear_all(struct ip_sf_list *psf)
+{
+ struct ip_sf_list *next;
+
+ while (psf) {
+ next = psf->sf_next;
+ kfree(psf);
+ psf = next;
+ }
+}
+
#ifdef CONFIG_IP_MULTICAST
/*
@@ -635,6 +646,13 @@ static void igmpv3_clear_zeros(struct ip_sf_list **ppsf)
}
}
+static void kfree_pmc(struct ip_mc_list *pmc)
+{
+ ip_sf_list_clear_all(pmc->sources);
+ ip_sf_list_clear_all(pmc->tomb);
+ kfree(pmc);
+}
+
static void igmpv3_send_cr(struct in_device *in_dev)
{
struct ip_mc_list *pmc, *pmc_prev, *pmc_next;
@@ -671,7 +689,7 @@ static void igmpv3_send_cr(struct in_device *in_dev)
else
in_dev->mc_tomb = pmc_next;
in_dev_put(pmc->interface);
- kfree(pmc);
+ kfree_pmc(pmc);
} else
pmc_prev = pmc;
}
@@ -1201,14 +1219,18 @@ static void igmpv3_del_delrec(struct in_device *in_dev, struct ip_mc_list *im)
im->interface = pmc->interface;
if (im->sfmode == MCAST_INCLUDE) {
im->tomb = pmc->tomb;
+ pmc->tomb = NULL;
+
im->sources = pmc->sources;
+ pmc->sources = NULL;
+
for (psf = im->sources; psf; psf = psf->sf_next)
psf->sf_crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
} else {
im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
}
in_dev_put(pmc->interface);
- kfree(pmc);
+ kfree_pmc(pmc);
}
spin_unlock_bh(&im->lock);
}
@@ -1229,21 +1251,18 @@ static void igmpv3_clear_delrec(struct in_device *in_dev)
nextpmc = pmc->next;
ip_mc_clear_src(pmc);
in_dev_put(pmc->interface);
- kfree(pmc);
+ kfree_pmc(pmc);
}
/* clear dead sources, too */
rcu_read_lock();
for_each_pmc_rcu(in_dev, pmc) {
- struct ip_sf_list *psf, *psf_next;
+ struct ip_sf_list *psf;
spin_lock_bh(&pmc->lock);
psf = pmc->tomb;
pmc->tomb = NULL;
spin_unlock_bh(&pmc->lock);
- for (; psf; psf = psf_next) {
- psf_next = psf->sf_next;
- kfree(psf);
- }
+ ip_sf_list_clear_all(psf);
}
rcu_read_unlock();
}
@@ -2114,7 +2133,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
static void ip_mc_clear_src(struct ip_mc_list *pmc)
{
- struct ip_sf_list *psf, *nextpsf, *tomb, *sources;
+ struct ip_sf_list *tomb, *sources;
spin_lock_bh(&pmc->lock);
tomb = pmc->tomb;
@@ -2126,14 +2145,8 @@ static void ip_mc_clear_src(struct ip_mc_list *pmc)
pmc->sfcount[MCAST_EXCLUDE] = 1;
spin_unlock_bh(&pmc->lock);
- for (psf = tomb; psf; psf = nextpsf) {
- nextpsf = psf->sf_next;
- kfree(psf);
- }
- for (psf = sources; psf; psf = nextpsf) {
- nextpsf = psf->sf_next;
- kfree(psf);
- }
+ ip_sf_list_clear_all(tomb);
+ ip_sf_list_clear_all(sources);
}
/* Join a multicast group
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 4e5bc4b2f14e..5731670c560b 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -108,6 +108,7 @@ static size_t inet_sk_attr_size(struct sock *sk,
+ nla_total_size(1) /* INET_DIAG_TOS */
+ nla_total_size(1) /* INET_DIAG_TCLASS */
+ nla_total_size(4) /* INET_DIAG_MARK */
+ + nla_total_size(4) /* INET_DIAG_CLASS_ID */
+ nla_total_size(sizeof(struct inet_diag_meminfo))
+ nla_total_size(sizeof(struct inet_diag_msg))
+ nla_total_size(SK_MEMINFO_VARS * sizeof(u32))
@@ -287,12 +288,19 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
goto errout;
}
- if (ext & (1 << (INET_DIAG_CLASS_ID - 1))) {
+ if (ext & (1 << (INET_DIAG_CLASS_ID - 1)) ||
+ ext & (1 << (INET_DIAG_TCLASS - 1))) {
u32 classid = 0;
#ifdef CONFIG_SOCK_CGROUP_DATA
classid = sock_cgroup_classid(&sk->sk_cgrp_data);
#endif
+ /* Fallback to socket priority if class id isn't set.
+ * Classful qdiscs use it as direct reference to class.
+ * For cgroup2 classid is always zero.
+ */
+ if (!classid)
+ classid = sk->sk_priority;
if (nla_put_u32(skb, INET_DIAG_CLASS_ID, classid))
goto errout;
@@ -998,7 +1006,9 @@ next_chunk:
if (!inet_diag_bc_sk(bc, sk))
goto next_normal;
- sock_hold(sk);
+ if (!refcount_inc_not_zero(&sk->sk_refcnt))
+ goto next_normal;
+
num_arr[accum] = num;
sk_arr[accum] = sk;
if (++accum == SKARR_SZ)
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 760a9e52e02b..9f69411251d0 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -25,6 +25,62 @@
#include <net/sock.h>
#include <net/inet_frag.h>
#include <net/inet_ecn.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+
+/* Use skb->cb to track consecutive/adjacent fragments coming at
+ * the end of the queue. Nodes in the rb-tree queue will
+ * contain "runs" of one or more adjacent fragments.
+ *
+ * Invariants:
+ * - next_frag is NULL at the tail of a "run";
+ * - the head of a "run" has the sum of all fragment lengths in frag_run_len.
+ */
+struct ipfrag_skb_cb {
+ union {
+ struct inet_skb_parm h4;
+ struct inet6_skb_parm h6;
+ };
+ struct sk_buff *next_frag;
+ int frag_run_len;
+};
+
+#define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb))
+
+static void fragcb_clear(struct sk_buff *skb)
+{
+ RB_CLEAR_NODE(&skb->rbnode);
+ FRAG_CB(skb)->next_frag = NULL;
+ FRAG_CB(skb)->frag_run_len = skb->len;
+}
+
+/* Append skb to the last "run". */
+static void fragrun_append_to_last(struct inet_frag_queue *q,
+ struct sk_buff *skb)
+{
+ fragcb_clear(skb);
+
+ FRAG_CB(q->last_run_head)->frag_run_len += skb->len;
+ FRAG_CB(q->fragments_tail)->next_frag = skb;
+ q->fragments_tail = skb;
+}
+
+/* Create a new "run" with the skb. */
+static void fragrun_create(struct inet_frag_queue *q, struct sk_buff *skb)
+{
+ BUILD_BUG_ON(sizeof(struct ipfrag_skb_cb) > sizeof(skb->cb));
+ fragcb_clear(skb);
+
+ if (q->last_run_head)
+ rb_link_node(&skb->rbnode, &q->last_run_head->rbnode,
+ &q->last_run_head->rbnode.rb_right);
+ else
+ rb_link_node(&skb->rbnode, NULL, &q->rb_fragments.rb_node);
+ rb_insert_color(&skb->rbnode, &q->rb_fragments);
+
+ q->fragments_tail = skb;
+ q->last_run_head = skb;
+}
/* Given the OR values of all fragments, apply RFC 3168 5.3 requirements
* Value : 0xff if frame should be dropped.
@@ -123,6 +179,28 @@ static void inet_frag_destroy_rcu(struct rcu_head *head)
kmem_cache_free(f->frags_cachep, q);
}
+unsigned int inet_frag_rbtree_purge(struct rb_root *root)
+{
+ struct rb_node *p = rb_first(root);
+ unsigned int sum = 0;
+
+ while (p) {
+ struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode);
+
+ p = rb_next(p);
+ rb_erase(&skb->rbnode, root);
+ while (skb) {
+ struct sk_buff *next = FRAG_CB(skb)->next_frag;
+
+ sum += skb->truesize;
+ kfree_skb(skb);
+ skb = next;
+ }
+ }
+ return sum;
+}
+EXPORT_SYMBOL(inet_frag_rbtree_purge);
+
void inet_frag_destroy(struct inet_frag_queue *q)
{
struct sk_buff *fp;
@@ -224,3 +302,218 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key)
return fq;
}
EXPORT_SYMBOL(inet_frag_find);
+
+int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb,
+ int offset, int end)
+{
+ struct sk_buff *last = q->fragments_tail;
+
+ /* RFC5722, Section 4, amended by Errata ID : 3089
+ * When reassembling an IPv6 datagram, if
+ * one or more its constituent fragments is determined to be an
+ * overlapping fragment, the entire datagram (and any constituent
+ * fragments) MUST be silently discarded.
+ *
+ * Duplicates, however, should be ignored (i.e. skb dropped, but the
+ * queue/fragments kept for later reassembly).
+ */
+ if (!last)
+ fragrun_create(q, skb); /* First fragment. */
+ else if (last->ip_defrag_offset + last->len < end) {
+ /* This is the common case: skb goes to the end. */
+ /* Detect and discard overlaps. */
+ if (offset < last->ip_defrag_offset + last->len)
+ return IPFRAG_OVERLAP;
+ if (offset == last->ip_defrag_offset + last->len)
+ fragrun_append_to_last(q, skb);
+ else
+ fragrun_create(q, skb);
+ } else {
+ /* Binary search. Note that skb can become the first fragment,
+ * but not the last (covered above).
+ */
+ struct rb_node **rbn, *parent;
+
+ rbn = &q->rb_fragments.rb_node;
+ do {
+ struct sk_buff *curr;
+ int curr_run_end;
+
+ parent = *rbn;
+ curr = rb_to_skb(parent);
+ curr_run_end = curr->ip_defrag_offset +
+ FRAG_CB(curr)->frag_run_len;
+ if (end <= curr->ip_defrag_offset)
+ rbn = &parent->rb_left;
+ else if (offset >= curr_run_end)
+ rbn = &parent->rb_right;
+ else if (offset >= curr->ip_defrag_offset &&
+ end <= curr_run_end)
+ return IPFRAG_DUP;
+ else
+ return IPFRAG_OVERLAP;
+ } while (*rbn);
+ /* Here we have parent properly set, and rbn pointing to
+ * one of its NULL left/right children. Insert skb.
+ */
+ fragcb_clear(skb);
+ rb_link_node(&skb->rbnode, parent, rbn);
+ rb_insert_color(&skb->rbnode, &q->rb_fragments);
+ }
+
+ skb->ip_defrag_offset = offset;
+
+ return IPFRAG_OK;
+}
+EXPORT_SYMBOL(inet_frag_queue_insert);
+
+void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb,
+ struct sk_buff *parent)
+{
+ struct sk_buff *fp, *head = skb_rb_first(&q->rb_fragments);
+ struct sk_buff **nextp;
+ int delta;
+
+ if (head != skb) {
+ fp = skb_clone(skb, GFP_ATOMIC);
+ if (!fp)
+ return NULL;
+ FRAG_CB(fp)->next_frag = FRAG_CB(skb)->next_frag;
+ if (RB_EMPTY_NODE(&skb->rbnode))
+ FRAG_CB(parent)->next_frag = fp;
+ else
+ rb_replace_node(&skb->rbnode, &fp->rbnode,
+ &q->rb_fragments);
+ if (q->fragments_tail == skb)
+ q->fragments_tail = fp;
+ skb_morph(skb, head);
+ FRAG_CB(skb)->next_frag = FRAG_CB(head)->next_frag;
+ rb_replace_node(&head->rbnode, &skb->rbnode,
+ &q->rb_fragments);
+ consume_skb(head);
+ head = skb;
+ }
+ WARN_ON(head->ip_defrag_offset != 0);
+
+ delta = -head->truesize;
+
+ /* Head of list must not be cloned. */
+ if (skb_unclone(head, GFP_ATOMIC))
+ return NULL;
+
+ delta += head->truesize;
+ if (delta)
+ add_frag_mem_limit(q->net, delta);
+
+ /* If the first fragment is fragmented itself, we split
+ * it to two chunks: the first with data and paged part
+ * and the second, holding only fragments.
+ */
+ if (skb_has_frag_list(head)) {
+ struct sk_buff *clone;
+ int i, plen = 0;
+
+ clone = alloc_skb(0, GFP_ATOMIC);
+ if (!clone)
+ return NULL;
+ skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
+ skb_frag_list_init(head);
+ for (i = 0; i < skb_shinfo(head)->nr_frags; i++)
+ plen += skb_frag_size(&skb_shinfo(head)->frags[i]);
+ clone->data_len = head->data_len - plen;
+ clone->len = clone->data_len;
+ head->truesize += clone->truesize;
+ clone->csum = 0;
+ clone->ip_summed = head->ip_summed;
+ add_frag_mem_limit(q->net, clone->truesize);
+ skb_shinfo(head)->frag_list = clone;
+ nextp = &clone->next;
+ } else {
+ nextp = &skb_shinfo(head)->frag_list;
+ }
+
+ return nextp;
+}
+EXPORT_SYMBOL(inet_frag_reasm_prepare);
+
+void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head,
+ void *reasm_data)
+{
+ struct sk_buff **nextp = (struct sk_buff **)reasm_data;
+ struct rb_node *rbn;
+ struct sk_buff *fp;
+
+ skb_push(head, head->data - skb_network_header(head));
+
+ /* Traverse the tree in order, to build frag_list. */
+ fp = FRAG_CB(head)->next_frag;
+ rbn = rb_next(&head->rbnode);
+ rb_erase(&head->rbnode, &q->rb_fragments);
+ while (rbn || fp) {
+ /* fp points to the next sk_buff in the current run;
+ * rbn points to the next run.
+ */
+ /* Go through the current run. */
+ while (fp) {
+ *nextp = fp;
+ nextp = &fp->next;
+ fp->prev = NULL;
+ memset(&fp->rbnode, 0, sizeof(fp->rbnode));
+ fp->sk = NULL;
+ head->data_len += fp->len;
+ head->len += fp->len;
+ if (head->ip_summed != fp->ip_summed)
+ head->ip_summed = CHECKSUM_NONE;
+ else if (head->ip_summed == CHECKSUM_COMPLETE)
+ head->csum = csum_add(head->csum, fp->csum);
+ head->truesize += fp->truesize;
+ fp = FRAG_CB(fp)->next_frag;
+ }
+ /* Move to the next run. */
+ if (rbn) {
+ struct rb_node *rbnext = rb_next(rbn);
+
+ fp = rb_to_skb(rbn);
+ rb_erase(rbn, &q->rb_fragments);
+ rbn = rbnext;
+ }
+ }
+ sub_frag_mem_limit(q->net, head->truesize);
+
+ *nextp = NULL;
+ skb_mark_not_on_list(head);
+ head->prev = NULL;
+ head->tstamp = q->stamp;
+}
+EXPORT_SYMBOL(inet_frag_reasm_finish);
+
+struct sk_buff *inet_frag_pull_head(struct inet_frag_queue *q)
+{
+ struct sk_buff *head;
+
+ if (q->fragments) {
+ head = q->fragments;
+ q->fragments = head->next;
+ } else {
+ struct sk_buff *skb;
+
+ head = skb_rb_first(&q->rb_fragments);
+ if (!head)
+ return NULL;
+ skb = FRAG_CB(head)->next_frag;
+ if (skb)
+ rb_replace_node(&head->rbnode, &skb->rbnode,
+ &q->rb_fragments);
+ else
+ rb_erase(&head->rbnode, &q->rb_fragments);
+ memset(&head->rbnode, 0, sizeof(head->rbnode));
+ barrier();
+ }
+ if (head == q->fragments_tail)
+ q->fragments_tail = NULL;
+
+ sub_frag_mem_limit(q->net, head->truesize);
+
+ return head;
+}
+EXPORT_SYMBOL(inet_frag_pull_head);
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index d757b9642d0d..be778599bfed 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -216,6 +216,7 @@ struct inet_peer *inet_getpeer(struct inet_peer_base *base,
atomic_set(&p->rid, 0);
p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW;
p->rate_tokens = 0;
+ p->n_redirects = 0;
/* 60*HZ is arbitrary, but chosen enough high so that the first
* calculation of tokens is at its maximum.
*/
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 32662e9e5d21..d5984d31ab93 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -72,6 +72,7 @@ static int ip_forward_finish(struct net *net, struct sock *sk, struct sk_buff *s
if (unlikely(opt->optlen))
ip_forward_options(skb);
+ skb->tstamp = 0;
return dst_output(net, sk, skb);
}
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index f686d7761acb..5a1d39e32196 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -57,57 +57,6 @@
*/
static const char ip_frag_cache_name[] = "ip4-frags";
-/* Use skb->cb to track consecutive/adjacent fragments coming at
- * the end of the queue. Nodes in the rb-tree queue will
- * contain "runs" of one or more adjacent fragments.
- *
- * Invariants:
- * - next_frag is NULL at the tail of a "run";
- * - the head of a "run" has the sum of all fragment lengths in frag_run_len.
- */
-struct ipfrag_skb_cb {
- struct inet_skb_parm h;
- struct sk_buff *next_frag;
- int frag_run_len;
-};
-
-#define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb))
-
-static void ip4_frag_init_run(struct sk_buff *skb)
-{
- BUILD_BUG_ON(sizeof(struct ipfrag_skb_cb) > sizeof(skb->cb));
-
- FRAG_CB(skb)->next_frag = NULL;
- FRAG_CB(skb)->frag_run_len = skb->len;
-}
-
-/* Append skb to the last "run". */
-static void ip4_frag_append_to_last_run(struct inet_frag_queue *q,
- struct sk_buff *skb)
-{
- RB_CLEAR_NODE(&skb->rbnode);
- FRAG_CB(skb)->next_frag = NULL;
-
- FRAG_CB(q->last_run_head)->frag_run_len += skb->len;
- FRAG_CB(q->fragments_tail)->next_frag = skb;
- q->fragments_tail = skb;
-}
-
-/* Create a new "run" with the skb. */
-static void ip4_frag_create_run(struct inet_frag_queue *q, struct sk_buff *skb)
-{
- if (q->last_run_head)
- rb_link_node(&skb->rbnode, &q->last_run_head->rbnode,
- &q->last_run_head->rbnode.rb_right);
- else
- rb_link_node(&skb->rbnode, NULL, &q->rb_fragments.rb_node);
- rb_insert_color(&skb->rbnode, &q->rb_fragments);
-
- ip4_frag_init_run(skb);
- q->fragments_tail = skb;
- q->last_run_head = skb;
-}
-
/* Describe an entry in the "incomplete datagrams" queue. */
struct ipq {
struct inet_frag_queue q;
@@ -212,27 +161,9 @@ static void ip_expire(struct timer_list *t)
* pull the head out of the tree in order to be able to
* deal with head->dev.
*/
- if (qp->q.fragments) {
- head = qp->q.fragments;
- qp->q.fragments = head->next;
- } else {
- head = skb_rb_first(&qp->q.rb_fragments);
- if (!head)
- goto out;
- if (FRAG_CB(head)->next_frag)
- rb_replace_node(&head->rbnode,
- &FRAG_CB(head)->next_frag->rbnode,
- &qp->q.rb_fragments);
- else
- rb_erase(&head->rbnode, &qp->q.rb_fragments);
- memset(&head->rbnode, 0, sizeof(head->rbnode));
- barrier();
- }
- if (head == qp->q.fragments_tail)
- qp->q.fragments_tail = NULL;
-
- sub_frag_mem_limit(qp->q.net, head->truesize);
-
+ head = inet_frag_pull_head(&qp->q);
+ if (!head)
+ goto out;
head->dev = dev_get_by_index_rcu(net, qp->iif);
if (!head->dev)
goto out;
@@ -345,12 +276,10 @@ static int ip_frag_reinit(struct ipq *qp)
static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
{
struct net *net = container_of(qp->q.net, struct net, ipv4.frags);
- struct rb_node **rbn, *parent;
- struct sk_buff *skb1, *prev_tail;
+ int ihl, end, flags, offset;
+ struct sk_buff *prev_tail;
struct net_device *dev;
unsigned int fragsize;
- int flags, offset;
- int ihl, end;
int err = -ENOENT;
u8 ecn;
@@ -382,7 +311,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
*/
if (end < qp->q.len ||
((qp->q.flags & INET_FRAG_LAST_IN) && end != qp->q.len))
- goto err;
+ goto discard_qp;
qp->q.flags |= INET_FRAG_LAST_IN;
qp->q.len = end;
} else {
@@ -394,75 +323,33 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
if (end > qp->q.len) {
/* Some bits beyond end -> corruption. */
if (qp->q.flags & INET_FRAG_LAST_IN)
- goto err;
+ goto discard_qp;
qp->q.len = end;
}
}
if (end == offset)
- goto err;
+ goto discard_qp;
err = -ENOMEM;
if (!pskb_pull(skb, skb_network_offset(skb) + ihl))
- goto err;
+ goto discard_qp;
err = pskb_trim_rcsum(skb, end - offset);
if (err)
- goto err;
+ goto discard_qp;
/* Note : skb->rbnode and skb->dev share the same location. */
dev = skb->dev;
/* Makes sure compiler wont do silly aliasing games */
barrier();
- /* RFC5722, Section 4, amended by Errata ID : 3089
- * When reassembling an IPv6 datagram, if
- * one or more its constituent fragments is determined to be an
- * overlapping fragment, the entire datagram (and any constituent
- * fragments) MUST be silently discarded.
- *
- * We do the same here for IPv4 (and increment an snmp counter).
- */
-
- /* Find out where to put this fragment. */
prev_tail = qp->q.fragments_tail;
- if (!prev_tail)
- ip4_frag_create_run(&qp->q, skb); /* First fragment. */
- else if (prev_tail->ip_defrag_offset + prev_tail->len < end) {
- /* This is the common case: skb goes to the end. */
- /* Detect and discard overlaps. */
- if (offset < prev_tail->ip_defrag_offset + prev_tail->len)
- goto discard_qp;
- if (offset == prev_tail->ip_defrag_offset + prev_tail->len)
- ip4_frag_append_to_last_run(&qp->q, skb);
- else
- ip4_frag_create_run(&qp->q, skb);
- } else {
- /* Binary search. Note that skb can become the first fragment,
- * but not the last (covered above).
- */
- rbn = &qp->q.rb_fragments.rb_node;
- do {
- parent = *rbn;
- skb1 = rb_to_skb(parent);
- if (end <= skb1->ip_defrag_offset)
- rbn = &parent->rb_left;
- else if (offset >= skb1->ip_defrag_offset +
- FRAG_CB(skb1)->frag_run_len)
- rbn = &parent->rb_right;
- else /* Found an overlap with skb1. */
- goto discard_qp;
- } while (*rbn);
- /* Here we have parent properly set, and rbn pointing to
- * one of its NULL left/right children. Insert skb.
- */
- ip4_frag_init_run(skb);
- rb_link_node(&skb->rbnode, parent, rbn);
- rb_insert_color(&skb->rbnode, &qp->q.rb_fragments);
- }
+ err = inet_frag_queue_insert(&qp->q, skb, offset, end);
+ if (err)
+ goto insert_error;
if (dev)
qp->iif = dev->ifindex;
- skb->ip_defrag_offset = offset;
qp->q.stamp = skb->tstamp;
qp->q.meat += skb->len;
@@ -487,16 +374,24 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
skb->_skb_refdst = 0UL;
err = ip_frag_reasm(qp, skb, prev_tail, dev);
skb->_skb_refdst = orefdst;
+ if (err)
+ inet_frag_kill(&qp->q);
return err;
}
skb_dst_drop(skb);
return -EINPROGRESS;
-discard_qp:
- inet_frag_kill(&qp->q);
+insert_error:
+ if (err == IPFRAG_DUP) {
+ kfree_skb(skb);
+ return -EINVAL;
+ }
err = -EINVAL;
__IP_INC_STATS(net, IPSTATS_MIB_REASM_OVERLAPS);
+discard_qp:
+ inet_frag_kill(&qp->q);
+ __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS);
err:
kfree_skb(skb);
return err;
@@ -508,13 +403,8 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
{
struct net *net = container_of(qp->q.net, struct net, ipv4.frags);
struct iphdr *iph;
- struct sk_buff *fp, *head = skb_rb_first(&qp->q.rb_fragments);
- struct sk_buff **nextp; /* To build frag_list. */
- struct rb_node *rbn;
- int len;
- int ihlen;
- int delta;
- int err;
+ void *reasm_data;
+ int len, err;
u8 ecn;
ipq_kill(qp);
@@ -524,117 +414,23 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
err = -EINVAL;
goto out_fail;
}
- /* Make the one we just received the head. */
- if (head != skb) {
- fp = skb_clone(skb, GFP_ATOMIC);
- if (!fp)
- goto out_nomem;
- FRAG_CB(fp)->next_frag = FRAG_CB(skb)->next_frag;
- if (RB_EMPTY_NODE(&skb->rbnode))
- FRAG_CB(prev_tail)->next_frag = fp;
- else
- rb_replace_node(&skb->rbnode, &fp->rbnode,
- &qp->q.rb_fragments);
- if (qp->q.fragments_tail == skb)
- qp->q.fragments_tail = fp;
- skb_morph(skb, head);
- FRAG_CB(skb)->next_frag = FRAG_CB(head)->next_frag;
- rb_replace_node(&head->rbnode, &skb->rbnode,
- &qp->q.rb_fragments);
- consume_skb(head);
- head = skb;
- }
-
- WARN_ON(head->ip_defrag_offset != 0);
- /* Allocate a new buffer for the datagram. */
- ihlen = ip_hdrlen(head);
- len = ihlen + qp->q.len;
+ /* Make the one we just received the head. */
+ reasm_data = inet_frag_reasm_prepare(&qp->q, skb, prev_tail);
+ if (!reasm_data)
+ goto out_nomem;
+ len = ip_hdrlen(skb) + qp->q.len;
err = -E2BIG;
if (len > 65535)
goto out_oversize;
- delta = - head->truesize;
-
- /* Head of list must not be cloned. */
- if (skb_unclone(head, GFP_ATOMIC))
- goto out_nomem;
-
- delta += head->truesize;
- if (delta)
- add_frag_mem_limit(qp->q.net, delta);
-
- /* If the first fragment is fragmented itself, we split
- * it to two chunks: the first with data and paged part
- * and the second, holding only fragments. */
- if (skb_has_frag_list(head)) {
- struct sk_buff *clone;
- int i, plen = 0;
-
- clone = alloc_skb(0, GFP_ATOMIC);
- if (!clone)
- goto out_nomem;
- skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
- skb_frag_list_init(head);
- for (i = 0; i < skb_shinfo(head)->nr_frags; i++)
- plen += skb_frag_size(&skb_shinfo(head)->frags[i]);
- clone->len = clone->data_len = head->data_len - plen;
- head->truesize += clone->truesize;
- clone->csum = 0;
- clone->ip_summed = head->ip_summed;
- add_frag_mem_limit(qp->q.net, clone->truesize);
- skb_shinfo(head)->frag_list = clone;
- nextp = &clone->next;
- } else {
- nextp = &skb_shinfo(head)->frag_list;
- }
-
- skb_push(head, head->data - skb_network_header(head));
+ inet_frag_reasm_finish(&qp->q, skb, reasm_data);
- /* Traverse the tree in order, to build frag_list. */
- fp = FRAG_CB(head)->next_frag;
- rbn = rb_next(&head->rbnode);
- rb_erase(&head->rbnode, &qp->q.rb_fragments);
- while (rbn || fp) {
- /* fp points to the next sk_buff in the current run;
- * rbn points to the next run.
- */
- /* Go through the current run. */
- while (fp) {
- *nextp = fp;
- nextp = &fp->next;
- fp->prev = NULL;
- memset(&fp->rbnode, 0, sizeof(fp->rbnode));
- fp->sk = NULL;
- head->data_len += fp->len;
- head->len += fp->len;
- if (head->ip_summed != fp->ip_summed)
- head->ip_summed = CHECKSUM_NONE;
- else if (head->ip_summed == CHECKSUM_COMPLETE)
- head->csum = csum_add(head->csum, fp->csum);
- head->truesize += fp->truesize;
- fp = FRAG_CB(fp)->next_frag;
- }
- /* Move to the next run. */
- if (rbn) {
- struct rb_node *rbnext = rb_next(rbn);
+ skb->dev = dev;
+ IPCB(skb)->frag_max_size = max(qp->max_df_size, qp->q.max_size);
- fp = rb_to_skb(rbn);
- rb_erase(rbn, &qp->q.rb_fragments);
- rbn = rbnext;
- }
- }
- sub_frag_mem_limit(qp->q.net, head->truesize);
-
- *nextp = NULL;
- head->next = NULL;
- head->prev = NULL;
- head->dev = dev;
- head->tstamp = qp->q.stamp;
- IPCB(head)->frag_max_size = max(qp->max_df_size, qp->q.max_size);
-
- iph = ip_hdr(head);
+ iph = ip_hdr(skb);
iph->tot_len = htons(len);
iph->tos |= ecn;
@@ -647,7 +443,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
* from one very small df-fragment and one large non-df frag.
*/
if (qp->max_df_size == qp->q.max_size) {
- IPCB(head)->flags |= IPSKB_FRAG_PMTU;
+ IPCB(skb)->flags |= IPSKB_FRAG_PMTU;
iph->frag_off = htons(IP_DF);
} else {
iph->frag_off = 0;
@@ -745,28 +541,6 @@ struct sk_buff *ip_check_defrag(struct net *net, struct sk_buff *skb, u32 user)
}
EXPORT_SYMBOL(ip_check_defrag);
-unsigned int inet_frag_rbtree_purge(struct rb_root *root)
-{
- struct rb_node *p = rb_first(root);
- unsigned int sum = 0;
-
- while (p) {
- struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode);
-
- p = rb_next(p);
- rb_erase(&skb->rbnode, root);
- while (skb) {
- struct sk_buff *next = FRAG_CB(skb)->next_frag;
-
- sum += skb->truesize;
- kfree_skb(skb);
- skb = next;
- }
- }
- return sum;
-}
-EXPORT_SYMBOL(inet_frag_rbtree_purge);
-
#ifdef CONFIG_SYSCTL
static int dist_min;
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 8cce0e9ea08c..3c734832bb7c 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -260,7 +260,6 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
struct net *net = dev_net(skb->dev);
struct metadata_dst *tun_dst = NULL;
struct erspan_base_hdr *ershdr;
- struct erspan_metadata *pkt_md;
struct ip_tunnel_net *itn;
struct ip_tunnel *tunnel;
const struct iphdr *iph;
@@ -269,20 +268,11 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
int len;
itn = net_generic(net, erspan_net_id);
- len = gre_hdr_len + sizeof(*ershdr);
-
- /* Check based hdr len */
- if (unlikely(!pskb_may_pull(skb, len)))
- return PACKET_REJECT;
iph = ip_hdr(skb);
ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len);
ver = ershdr->ver;
- /* The original GRE header does not have key field,
- * Use ERSPAN 10-bit session ID as key.
- */
- tpi->key = cpu_to_be32(get_session_id(ershdr));
tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex,
tpi->flags | TUNNEL_KEY,
iph->saddr, iph->daddr, tpi->key);
@@ -292,9 +282,6 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
if (unlikely(!pskb_may_pull(skb, len)))
return PACKET_REJECT;
- ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len);
- pkt_md = (struct erspan_metadata *)(ershdr + 1);
-
if (__iptunnel_pull_header(skb,
len,
htons(ETH_P_TEB),
@@ -302,8 +289,9 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
goto drop;
if (tunnel->collect_md) {
+ struct erspan_metadata *pkt_md, *md;
struct ip_tunnel_info *info;
- struct erspan_metadata *md;
+ unsigned char *gh;
__be64 tun_id;
__be16 flags;
@@ -316,6 +304,14 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
if (!tun_dst)
return PACKET_REJECT;
+ /* skb can be uncloned in __iptunnel_pull_header, so
+ * old pkt_md is no longer valid and we need to reset
+ * it
+ */
+ gh = skb_network_header(skb) +
+ skb_network_header_len(skb);
+ pkt_md = (struct erspan_metadata *)(gh + gre_hdr_len +
+ sizeof(*ershdr));
md = ip_tunnel_info_opts(&tun_dst->u.tun_info);
md->version = ver;
md2 = &md->u.md2;
@@ -570,8 +566,7 @@ err_free_skb:
dev->stats.tx_dropped++;
}
-static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev,
- __be16 proto)
+static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
struct ip_tunnel_info *tun_info;
@@ -579,10 +574,10 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev,
struct erspan_metadata *md;
struct rtable *rt = NULL;
bool truncate = false;
+ __be16 df, proto;
struct flowi4 fl;
int tunnel_hlen;
int version;
- __be16 df;
int nhoff;
int thoff;
@@ -627,18 +622,20 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev,
if (version == 1) {
erspan_build_header(skb, ntohl(tunnel_id_to_key32(key->tun_id)),
ntohl(md->u.index), truncate, true);
+ proto = htons(ETH_P_ERSPAN);
} else if (version == 2) {
erspan_build_header_v2(skb,
ntohl(tunnel_id_to_key32(key->tun_id)),
md->u.md2.dir,
get_hwid(&md->u.md2),
truncate, true);
+ proto = htons(ETH_P_ERSPAN2);
} else {
goto err_free_rt;
}
gre_build_header(skb, 8, TUNNEL_SEQ,
- htons(ETH_P_ERSPAN), 0, htonl(tunnel->o_seqno++));
+ proto, 0, htonl(tunnel->o_seqno++));
df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
@@ -677,6 +674,9 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
struct ip_tunnel *tunnel = netdev_priv(dev);
const struct iphdr *tnl_params;
+ if (!pskb_inet_may_pull(skb))
+ goto free_skb;
+
if (tunnel->collect_md) {
gre_fb_xmit(skb, dev, skb->protocol);
return NETDEV_TX_OK;
@@ -719,9 +719,13 @@ static netdev_tx_t erspan_xmit(struct sk_buff *skb,
{
struct ip_tunnel *tunnel = netdev_priv(dev);
bool truncate = false;
+ __be16 proto;
+
+ if (!pskb_inet_may_pull(skb))
+ goto free_skb;
if (tunnel->collect_md) {
- erspan_fb_xmit(skb, dev, skb->protocol);
+ erspan_fb_xmit(skb, dev);
return NETDEV_TX_OK;
}
@@ -737,19 +741,22 @@ static netdev_tx_t erspan_xmit(struct sk_buff *skb,
}
/* Push ERSPAN header */
- if (tunnel->erspan_ver == 1)
+ if (tunnel->erspan_ver == 1) {
erspan_build_header(skb, ntohl(tunnel->parms.o_key),
tunnel->index,
truncate, true);
- else if (tunnel->erspan_ver == 2)
+ proto = htons(ETH_P_ERSPAN);
+ } else if (tunnel->erspan_ver == 2) {
erspan_build_header_v2(skb, ntohl(tunnel->parms.o_key),
tunnel->dir, tunnel->hwid,
truncate, true);
- else
+ proto = htons(ETH_P_ERSPAN2);
+ } else {
goto free_skb;
+ }
tunnel->parms.o_flags &= ~TUNNEL_KEY;
- __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_ERSPAN));
+ __gre_xmit(skb, dev, &tunnel->parms.iph, proto);
return NETDEV_TX_OK;
free_skb:
@@ -763,6 +770,9 @@ static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
{
struct ip_tunnel *tunnel = netdev_priv(dev);
+ if (!pskb_inet_may_pull(skb))
+ goto free_skb;
+
if (tunnel->collect_md) {
gre_fb_xmit(skb, dev, htons(ETH_P_TEB));
return NETDEV_TX_OK;
@@ -1457,12 +1467,17 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
{
struct ip_tunnel *t = netdev_priv(dev);
struct ip_tunnel_parm *p = &t->parms;
+ __be16 o_flags = p->o_flags;
+
+ if ((t->erspan_ver == 1 || t->erspan_ver == 2) &&
+ !t->collect_md)
+ o_flags |= TUNNEL_KEY;
if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
nla_put_be16(skb, IFLA_GRE_IFLAGS,
gre_tnl_flags_to_gre_flags(p->i_flags)) ||
nla_put_be16(skb, IFLA_GRE_OFLAGS,
- gre_tnl_flags_to_gre_flags(p->o_flags)) ||
+ gre_tnl_flags_to_gre_flags(o_flags)) ||
nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 27c863f6dd83..c3a0683e83df 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -258,11 +258,10 @@ int ip_local_deliver(struct sk_buff *skb)
ip_local_deliver_finish);
}
-static inline bool ip_rcv_options(struct sk_buff *skb)
+static inline bool ip_rcv_options(struct sk_buff *skb, struct net_device *dev)
{
struct ip_options *opt;
const struct iphdr *iph;
- struct net_device *dev = skb->dev;
/* It looks as overkill, because not all
IP options require packet mangling.
@@ -298,7 +297,7 @@ static inline bool ip_rcv_options(struct sk_buff *skb)
}
}
- if (ip_options_rcv_srr(skb))
+ if (ip_options_rcv_srr(skb, dev))
goto drop;
}
@@ -308,11 +307,10 @@ drop:
}
static int ip_rcv_finish_core(struct net *net, struct sock *sk,
- struct sk_buff *skb)
+ struct sk_buff *skb, struct net_device *dev)
{
const struct iphdr *iph = ip_hdr(skb);
int (*edemux)(struct sk_buff *skb);
- struct net_device *dev = skb->dev;
struct rtable *rt;
int err;
@@ -355,7 +353,7 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk,
}
#endif
- if (iph->ihl > 5 && ip_rcv_options(skb))
+ if (iph->ihl > 5 && ip_rcv_options(skb, dev))
goto drop;
rt = skb_rtable(skb);
@@ -401,6 +399,7 @@ drop_error:
static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
+ struct net_device *dev = skb->dev;
int ret;
/* if ingress device is enslaved to an L3 master device pass the
@@ -410,7 +409,7 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
if (!skb)
return NET_RX_SUCCESS;
- ret = ip_rcv_finish_core(net, sk, skb);
+ ret = ip_rcv_finish_core(net, sk, skb, dev);
if (ret != NET_RX_DROP)
ret = dst_input(skb);
return ret;
@@ -489,6 +488,7 @@ static struct sk_buff *ip_rcv_core(struct sk_buff *skb, struct net *net)
goto drop;
}
+ iph = ip_hdr(skb);
skb->transport_header = skb->network_header + iph->ihl*4;
/* Remove any debris in the socket control block */
@@ -549,6 +549,7 @@ static void ip_list_rcv_finish(struct net *net, struct sock *sk,
INIT_LIST_HEAD(&sublist);
list_for_each_entry_safe(skb, next, head, list) {
+ struct net_device *dev = skb->dev;
struct dst_entry *dst;
skb_list_del_init(skb);
@@ -558,7 +559,7 @@ static void ip_list_rcv_finish(struct net *net, struct sock *sk,
skb = l3mdev_ip_rcv(skb);
if (!skb)
continue;
- if (ip_rcv_finish_core(net, sk, skb) == NET_RX_DROP)
+ if (ip_rcv_finish_core(net, sk, skb, dev) == NET_RX_DROP)
continue;
dst = skb_dst(skb);
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index ed194d46c00e..3db31bb9df50 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -251,8 +251,9 @@ static void spec_dst_fill(__be32 *spec_dst, struct sk_buff *skb)
* If opt == NULL, then skb->data should point to IP header.
*/
-int ip_options_compile(struct net *net,
- struct ip_options *opt, struct sk_buff *skb)
+int __ip_options_compile(struct net *net,
+ struct ip_options *opt, struct sk_buff *skb,
+ __be32 *info)
{
__be32 spec_dst = htonl(INADDR_ANY);
unsigned char *pp_ptr = NULL;
@@ -468,11 +469,22 @@ eol:
return 0;
error:
- if (skb) {
- icmp_send(skb, ICMP_PARAMETERPROB, 0, htonl((pp_ptr-iph)<<24));
- }
+ if (info)
+ *info = htonl((pp_ptr-iph)<<24);
return -EINVAL;
}
+
+int ip_options_compile(struct net *net,
+ struct ip_options *opt, struct sk_buff *skb)
+{
+ int ret;
+ __be32 info;
+
+ ret = __ip_options_compile(net, opt, skb, &info);
+ if (ret != 0 && skb)
+ icmp_send(skb, ICMP_PARAMETERPROB, 0, info);
+ return ret;
+}
EXPORT_SYMBOL(ip_options_compile);
/*
@@ -600,7 +612,7 @@ void ip_forward_options(struct sk_buff *skb)
}
}
-int ip_options_rcv_srr(struct sk_buff *skb)
+int ip_options_rcv_srr(struct sk_buff *skb, struct net_device *dev)
{
struct ip_options *opt = &(IPCB(skb)->opt);
int srrspace, srrptr;
@@ -635,7 +647,7 @@ int ip_options_rcv_srr(struct sk_buff *skb)
orefdst = skb->_skb_refdst;
skb_dst_set(skb, NULL);
- err = ip_route_input(skb, nexthop, iph->saddr, iph->tos, skb->dev);
+ err = ip_route_input(skb, nexthop, iph->saddr, iph->tos, dev);
rt2 = skb_rtable(skb);
if (err || (rt2->rt_type != RTN_UNICAST && rt2->rt_type != RTN_LOCAL)) {
skb_dst_drop(skb);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 9c4e72e9c60a..73894ed12a70 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -519,6 +519,7 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
to->pkt_type = from->pkt_type;
to->priority = from->priority;
to->protocol = from->protocol;
+ to->skb_iif = from->skb_iif;
skb_dst_drop(to);
skb_dst_copy(to, from);
to->dev = from->dev;
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 26c36cccabdc..b7a26120d552 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -148,19 +148,17 @@ static void ip_cmsg_recv_security(struct msghdr *msg, struct sk_buff *skb)
static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb)
{
+ __be16 _ports[2], *ports;
struct sockaddr_in sin;
- __be16 *ports;
- int end;
-
- end = skb_transport_offset(skb) + 4;
- if (end > 0 && !pskb_may_pull(skb, end))
- return;
/* All current transport protocols have the port numbers in the
* first four bytes of the transport header and this function is
* written with this assumption in mind.
*/
- ports = (__be16 *)skb_transport_header(skb);
+ ports = skb_header_pointer(skb, skb_transport_offset(skb),
+ sizeof(_ports), &_ports);
+ if (!ports)
+ return;
sin.sin_family = AF_INET;
sin.sin_addr.s_addr = ip_hdr(skb)->daddr;
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 284a22154b4e..c4f5602308ed 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -627,7 +627,6 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
const struct iphdr *tnl_params, u8 protocol)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
- unsigned int inner_nhdr_len = 0;
const struct iphdr *inner_iph;
struct flowi4 fl4;
u8 tos, ttl;
@@ -637,14 +636,6 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
__be32 dst;
bool connected;
- /* ensure we can access the inner net header, for several users below */
- if (skb->protocol == htons(ETH_P_IP))
- inner_nhdr_len = sizeof(struct iphdr);
- else if (skb->protocol == htons(ETH_P_IPV6))
- inner_nhdr_len = sizeof(struct ipv6hdr);
- if (unlikely(!pskb_may_pull(skb, inner_nhdr_len)))
- goto tx_error;
-
inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
connected = (tunnel->parms.iph.daddr != 0);
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index f38cb21d773d..808f8d15c519 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -74,6 +74,33 @@ drop:
return 0;
}
+static int vti_input_ipip(struct sk_buff *skb, int nexthdr, __be32 spi,
+ int encap_type)
+{
+ struct ip_tunnel *tunnel;
+ const struct iphdr *iph = ip_hdr(skb);
+ struct net *net = dev_net(skb->dev);
+ struct ip_tunnel_net *itn = net_generic(net, vti_net_id);
+
+ tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
+ iph->saddr, iph->daddr, 0);
+ if (tunnel) {
+ if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
+ goto drop;
+
+ XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = tunnel;
+
+ skb->dev = tunnel->dev;
+
+ return xfrm_input(skb, nexthdr, spi, encap_type);
+ }
+
+ return -EINVAL;
+drop:
+ kfree_skb(skb);
+ return 0;
+}
+
static int vti_rcv(struct sk_buff *skb)
{
XFRM_SPI_SKB_CB(skb)->family = AF_INET;
@@ -82,6 +109,14 @@ static int vti_rcv(struct sk_buff *skb)
return vti_input(skb, ip_hdr(skb)->protocol, 0, 0);
}
+static int vti_rcv_ipip(struct sk_buff *skb)
+{
+ XFRM_SPI_SKB_CB(skb)->family = AF_INET;
+ XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr);
+
+ return vti_input_ipip(skb, ip_hdr(skb)->protocol, ip_hdr(skb)->saddr, 0);
+}
+
static int vti_rcv_cb(struct sk_buff *skb, int err)
{
unsigned short family;
@@ -241,6 +276,9 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
struct ip_tunnel *tunnel = netdev_priv(dev);
struct flowi fl;
+ if (!pskb_inet_may_pull(skb))
+ goto tx_err;
+
memset(&fl, 0, sizeof(fl));
switch (skb->protocol) {
@@ -253,15 +291,18 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
break;
default:
- dev->stats.tx_errors++;
- dev_kfree_skb(skb);
- return NETDEV_TX_OK;
+ goto tx_err;
}
/* override mark with tunnel output key */
fl.flowi_mark = be32_to_cpu(tunnel->parms.o_key);
return vti_xmit(skb, dev, &fl);
+
+tx_err:
+ dev->stats.tx_errors++;
+ kfree_skb(skb);
+ return NETDEV_TX_OK;
}
static int vti4_err(struct sk_buff *skb, u32 info)
@@ -429,6 +470,12 @@ static struct xfrm4_protocol vti_ipcomp4_protocol __read_mostly = {
.priority = 100,
};
+static struct xfrm_tunnel ipip_handler __read_mostly = {
+ .handler = vti_rcv_ipip,
+ .err_handler = vti4_err,
+ .priority = 0,
+};
+
static int __net_init vti_init_net(struct net *net)
{
int err;
@@ -597,6 +644,13 @@ static int __init vti_init(void)
if (err < 0)
goto xfrm_proto_comp_failed;
+ msg = "ipip tunnel";
+ err = xfrm4_tunnel_register(&ipip_handler, AF_INET);
+ if (err < 0) {
+ pr_info("%s: cant't register tunnel\n",__func__);
+ goto xfrm_tunnel_failed;
+ }
+
msg = "netlink interface";
err = rtnl_link_register(&vti_link_ops);
if (err < 0)
@@ -605,6 +659,8 @@ static int __init vti_init(void)
return err;
rtnl_link_failed:
+ xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
+xfrm_tunnel_failed:
xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP);
xfrm_proto_comp_failed:
xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH);
@@ -620,6 +676,7 @@ pernet_dev_failed:
static void __exit vti_fini(void)
{
rtnl_link_unregister(&vti_link_ops);
+ xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP);
xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH);
xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP);
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 5660adcf7a04..f6275aa19b6a 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -69,6 +69,8 @@
#include <net/nexthop.h>
#include <net/switchdev.h>
+#include <linux/nospec.h>
+
struct ipmr_rule {
struct fib_rule common;
};
@@ -1612,6 +1614,7 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
return -EFAULT;
if (vr.vifi >= mrt->maxvif)
return -EINVAL;
+ vr.vifi = array_index_nospec(vr.vifi, mrt->maxvif);
read_lock(&mrt_lock);
vif = &mrt->vif_table[vr.vifi];
if (VIF_EXISTS(mrt, vr.vifi)) {
@@ -1686,6 +1689,7 @@ int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
return -EFAULT;
if (vr.vifi >= mrt->maxvif)
return -EINVAL;
+ vr.vifi = array_index_nospec(vr.vifi, mrt->maxvif);
read_lock(&mrt_lock);
vif = &mrt->vif_table[vr.vifi];
if (VIF_EXISTS(mrt, vr.vifi)) {
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 2c8d313ae216..2fa196325988 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -56,18 +56,15 @@ struct clusterip_config {
#endif
enum clusterip_hashmode hash_mode; /* which hashing mode */
u_int32_t hash_initval; /* hash initialization */
- struct rcu_head rcu;
-
+ struct rcu_head rcu; /* for call_rcu_bh */
+ struct net *net; /* netns for pernet list */
char ifname[IFNAMSIZ]; /* device ifname */
- struct notifier_block notifier; /* refresh c->ifindex in it */
};
#ifdef CONFIG_PROC_FS
static const struct file_operations clusterip_proc_fops;
#endif
-static unsigned int clusterip_net_id __read_mostly;
-
struct clusterip_net {
struct list_head configs;
/* lock protects the configs list */
@@ -75,19 +72,35 @@ struct clusterip_net {
#ifdef CONFIG_PROC_FS
struct proc_dir_entry *procdir;
+ /* mutex protects the config->pde*/
+ struct mutex mutex;
#endif
};
+static unsigned int clusterip_net_id __read_mostly;
+static inline struct clusterip_net *clusterip_pernet(struct net *net)
+{
+ return net_generic(net, clusterip_net_id);
+}
+
static inline void
clusterip_config_get(struct clusterip_config *c)
{
refcount_inc(&c->refcount);
}
-
static void clusterip_config_rcu_free(struct rcu_head *head)
{
- kfree(container_of(head, struct clusterip_config, rcu));
+ struct clusterip_config *config;
+ struct net_device *dev;
+
+ config = container_of(head, struct clusterip_config, rcu);
+ dev = dev_get_by_name(config->net, config->ifname);
+ if (dev) {
+ dev_mc_del(dev, config->clustermac);
+ dev_put(dev);
+ }
+ kfree(config);
}
static inline void
@@ -101,25 +114,24 @@ clusterip_config_put(struct clusterip_config *c)
* entry(rule) is removed, remove the config from lists, but don't free it
* yet, since proc-files could still be holding references */
static inline void
-clusterip_config_entry_put(struct net *net, struct clusterip_config *c)
+clusterip_config_entry_put(struct clusterip_config *c)
{
- struct clusterip_net *cn = net_generic(net, clusterip_net_id);
+ struct clusterip_net *cn = clusterip_pernet(c->net);
local_bh_disable();
if (refcount_dec_and_lock(&c->entries, &cn->lock)) {
+ list_del_rcu(&c->list);
+ spin_unlock(&cn->lock);
+ local_bh_enable();
/* In case anyone still accesses the file, the open/close
* functions are also incrementing the refcount on their own,
* so it's safe to remove the entry even if it's in use. */
#ifdef CONFIG_PROC_FS
+ mutex_lock(&cn->mutex);
if (cn->procdir)
proc_remove(c->pde);
+ mutex_unlock(&cn->mutex);
#endif
- list_del_rcu(&c->list);
- spin_unlock(&cn->lock);
- local_bh_enable();
-
- unregister_netdevice_notifier(&c->notifier);
-
return;
}
local_bh_enable();
@@ -129,7 +141,7 @@ static struct clusterip_config *
__clusterip_config_find(struct net *net, __be32 clusterip)
{
struct clusterip_config *c;
- struct clusterip_net *cn = net_generic(net, clusterip_net_id);
+ struct clusterip_net *cn = clusterip_pernet(net);
list_for_each_entry_rcu(c, &cn->configs, list) {
if (c->clusterip == clusterip)
@@ -181,32 +193,37 @@ clusterip_netdev_event(struct notifier_block *this, unsigned long event,
void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct net *net = dev_net(dev);
+ struct clusterip_net *cn = clusterip_pernet(net);
struct clusterip_config *c;
- c = container_of(this, struct clusterip_config, notifier);
- switch (event) {
- case NETDEV_REGISTER:
- if (!strcmp(dev->name, c->ifname)) {
- c->ifindex = dev->ifindex;
- dev_mc_add(dev, c->clustermac);
- }
- break;
- case NETDEV_UNREGISTER:
- if (dev->ifindex == c->ifindex) {
- dev_mc_del(dev, c->clustermac);
- c->ifindex = -1;
- }
- break;
- case NETDEV_CHANGENAME:
- if (!strcmp(dev->name, c->ifname)) {
- c->ifindex = dev->ifindex;
- dev_mc_add(dev, c->clustermac);
- } else if (dev->ifindex == c->ifindex) {
- dev_mc_del(dev, c->clustermac);
- c->ifindex = -1;
+ spin_lock_bh(&cn->lock);
+ list_for_each_entry_rcu(c, &cn->configs, list) {
+ switch (event) {
+ case NETDEV_REGISTER:
+ if (!strcmp(dev->name, c->ifname)) {
+ c->ifindex = dev->ifindex;
+ dev_mc_add(dev, c->clustermac);
+ }
+ break;
+ case NETDEV_UNREGISTER:
+ if (dev->ifindex == c->ifindex) {
+ dev_mc_del(dev, c->clustermac);
+ c->ifindex = -1;
+ }
+ break;
+ case NETDEV_CHANGENAME:
+ if (!strcmp(dev->name, c->ifname)) {
+ c->ifindex = dev->ifindex;
+ dev_mc_add(dev, c->clustermac);
+ } else if (dev->ifindex == c->ifindex) {
+ dev_mc_del(dev, c->clustermac);
+ c->ifindex = -1;
+ }
+ break;
}
- break;
}
+ spin_unlock_bh(&cn->lock);
return NOTIFY_DONE;
}
@@ -215,30 +232,44 @@ static struct clusterip_config *
clusterip_config_init(struct net *net, const struct ipt_clusterip_tgt_info *i,
__be32 ip, const char *iniface)
{
- struct clusterip_net *cn = net_generic(net, clusterip_net_id);
+ struct clusterip_net *cn = clusterip_pernet(net);
struct clusterip_config *c;
+ struct net_device *dev;
int err;
+ if (iniface[0] == '\0') {
+ pr_info("Please specify an interface name\n");
+ return ERR_PTR(-EINVAL);
+ }
+
c = kzalloc(sizeof(*c), GFP_ATOMIC);
if (!c)
return ERR_PTR(-ENOMEM);
- strcpy(c->ifname, iniface);
- c->ifindex = -1;
- c->clusterip = ip;
+ dev = dev_get_by_name(net, iniface);
+ if (!dev) {
+ pr_info("no such interface %s\n", iniface);
+ kfree(c);
+ return ERR_PTR(-ENOENT);
+ }
+ c->ifindex = dev->ifindex;
+ strcpy(c->ifname, dev->name);
memcpy(&c->clustermac, &i->clustermac, ETH_ALEN);
+ dev_mc_add(dev, c->clustermac);
+ dev_put(dev);
+
+ c->clusterip = ip;
c->num_total_nodes = i->num_total_nodes;
clusterip_config_init_nodelist(c, i);
c->hash_mode = i->hash_mode;
c->hash_initval = i->hash_initval;
+ c->net = net;
refcount_set(&c->refcount, 1);
spin_lock_bh(&cn->lock);
if (__clusterip_config_find(net, ip)) {
- spin_unlock_bh(&cn->lock);
- kfree(c);
-
- return ERR_PTR(-EBUSY);
+ err = -EBUSY;
+ goto out_config_put;
}
list_add_rcu(&c->list, &cn->configs);
@@ -250,9 +281,11 @@ clusterip_config_init(struct net *net, const struct ipt_clusterip_tgt_info *i,
/* create proc dir entry */
sprintf(buffer, "%pI4", &ip);
+ mutex_lock(&cn->mutex);
c->pde = proc_create_data(buffer, 0600,
cn->procdir,
&clusterip_proc_fops, c);
+ mutex_unlock(&cn->mutex);
if (!c->pde) {
err = -ENOMEM;
goto err;
@@ -260,22 +293,17 @@ clusterip_config_init(struct net *net, const struct ipt_clusterip_tgt_info *i,
}
#endif
- c->notifier.notifier_call = clusterip_netdev_event;
- err = register_netdevice_notifier(&c->notifier);
- if (!err) {
- refcount_set(&c->entries, 1);
- return c;
- }
+ refcount_set(&c->entries, 1);
+ return c;
#ifdef CONFIG_PROC_FS
- proc_remove(c->pde);
err:
#endif
spin_lock_bh(&cn->lock);
list_del_rcu(&c->list);
+out_config_put:
spin_unlock_bh(&cn->lock);
clusterip_config_put(c);
-
return ERR_PTR(err);
}
@@ -475,34 +503,20 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par)
&e->ip.dst.s_addr);
return -EINVAL;
} else {
- struct net_device *dev;
-
- if (e->ip.iniface[0] == '\0') {
- pr_info("Please specify an interface name\n");
- return -EINVAL;
- }
-
- dev = dev_get_by_name(par->net, e->ip.iniface);
- if (!dev) {
- pr_info("no such interface %s\n",
- e->ip.iniface);
- return -ENOENT;
- }
- dev_put(dev);
-
config = clusterip_config_init(par->net, cipinfo,
e->ip.dst.s_addr,
e->ip.iniface);
if (IS_ERR(config))
return PTR_ERR(config);
}
- }
+ } else if (memcmp(&config->clustermac, &cipinfo->clustermac, ETH_ALEN))
+ return -EINVAL;
ret = nf_ct_netns_get(par->net, par->family);
if (ret < 0) {
pr_info("cannot load conntrack support for proto=%u\n",
par->family);
- clusterip_config_entry_put(par->net, config);
+ clusterip_config_entry_put(config);
clusterip_config_put(config);
return ret;
}
@@ -524,7 +538,7 @@ static void clusterip_tg_destroy(const struct xt_tgdtor_param *par)
/* if no more entries are referencing the config, remove it
* from the list and destroy the proc entry */
- clusterip_config_entry_put(par->net, cipinfo->config);
+ clusterip_config_entry_put(cipinfo->config);
clusterip_config_put(cipinfo->config);
@@ -806,7 +820,7 @@ static const struct file_operations clusterip_proc_fops = {
static int clusterip_net_init(struct net *net)
{
- struct clusterip_net *cn = net_generic(net, clusterip_net_id);
+ struct clusterip_net *cn = clusterip_pernet(net);
int ret;
INIT_LIST_HEAD(&cn->configs);
@@ -824,6 +838,7 @@ static int clusterip_net_init(struct net *net)
pr_err("Unable to proc dir entry\n");
return -ENOMEM;
}
+ mutex_init(&cn->mutex);
#endif /* CONFIG_PROC_FS */
return 0;
@@ -831,13 +846,15 @@ static int clusterip_net_init(struct net *net)
static void clusterip_net_exit(struct net *net)
{
- struct clusterip_net *cn = net_generic(net, clusterip_net_id);
#ifdef CONFIG_PROC_FS
+ struct clusterip_net *cn = clusterip_pernet(net);
+
+ mutex_lock(&cn->mutex);
proc_remove(cn->procdir);
cn->procdir = NULL;
+ mutex_unlock(&cn->mutex);
#endif
nf_unregister_net_hook(net, &cip_arp_ops);
- WARN_ON_ONCE(!list_empty(&cn->configs));
}
static struct pernet_operations clusterip_net_ops = {
@@ -847,6 +864,10 @@ static struct pernet_operations clusterip_net_ops = {
.size = sizeof(struct clusterip_net),
};
+struct notifier_block cip_netdev_notifier = {
+ .notifier_call = clusterip_netdev_event
+};
+
static int __init clusterip_tg_init(void)
{
int ret;
@@ -859,11 +880,17 @@ static int __init clusterip_tg_init(void)
if (ret < 0)
goto cleanup_subsys;
+ ret = register_netdevice_notifier(&cip_netdev_notifier);
+ if (ret < 0)
+ goto unregister_target;
+
pr_info("ClusterIP Version %s loaded successfully\n",
CLUSTERIP_VERSION);
return 0;
+unregister_target:
+ xt_unregister_target(&clusterip_tg_reg);
cleanup_subsys:
unregister_pernet_subsys(&clusterip_net_ops);
return ret;
@@ -873,6 +900,7 @@ static void __exit clusterip_tg_exit(void)
{
pr_info("ClusterIP Version %s unloading\n", CLUSTERIP_VERSION);
+ unregister_netdevice_notifier(&cip_netdev_notifier);
xt_unregister_target(&clusterip_tg_reg);
unregister_pernet_subsys(&clusterip_net_ops);
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic_main.c b/net/ipv4/netfilter/nf_nat_snmp_basic_main.c
index ac110c1d55b5..481437fc1eb2 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic_main.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic_main.c
@@ -104,6 +104,8 @@ static void fast_csum(struct snmp_ctx *ctx, unsigned char offset)
int snmp_version(void *context, size_t hdrlen, unsigned char tag,
const void *data, size_t datalen)
{
+ if (datalen != 1)
+ return -EINVAL;
if (*(unsigned char *)data > 1)
return -ENOTSUPP;
return 1;
@@ -113,8 +115,11 @@ int snmp_helper(void *context, size_t hdrlen, unsigned char tag,
const void *data, size_t datalen)
{
struct snmp_ctx *ctx = (struct snmp_ctx *)context;
- __be32 *pdata = (__be32 *)data;
+ __be32 *pdata;
+ if (datalen != 4)
+ return -EINVAL;
+ pdata = (__be32 *)data;
if (*pdata == ctx->from) {
pr_debug("%s: %pI4 to %pI4\n", __func__,
(void *)&ctx->from, (void *)&ctx->to);
diff --git a/net/ipv4/netlink.c b/net/ipv4/netlink.c
index f86bb4f06609..d8e3a1fb8e82 100644
--- a/net/ipv4/netlink.c
+++ b/net/ipv4/netlink.c
@@ -3,9 +3,10 @@
#include <linux/types.h>
#include <net/net_namespace.h>
#include <net/netlink.h>
+#include <linux/in6.h>
#include <net/ip.h>
-int rtm_getroute_parse_ip_proto(struct nlattr *attr, u8 *ip_proto,
+int rtm_getroute_parse_ip_proto(struct nlattr *attr, u8 *ip_proto, u8 family,
struct netlink_ext_ack *extack)
{
*ip_proto = nla_get_u8(attr);
@@ -13,11 +14,19 @@ int rtm_getroute_parse_ip_proto(struct nlattr *attr, u8 *ip_proto,
switch (*ip_proto) {
case IPPROTO_TCP:
case IPPROTO_UDP:
+ return 0;
case IPPROTO_ICMP:
+ if (family != AF_INET)
+ break;
+ return 0;
+#if IS_ENABLED(CONFIG_IPV6)
+ case IPPROTO_ICMPV6:
+ if (family != AF_INET6)
+ break;
return 0;
- default:
- NL_SET_ERR_MSG(extack, "Unsupported ip proto");
- return -EOPNOTSUPP;
+#endif
}
+ NL_SET_ERR_MSG(extack, "Unsupported ip proto");
+ return -EOPNOTSUPP;
}
EXPORT_SYMBOL_GPL(rtm_getroute_parse_ip_proto);
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 33df4d76db2d..711a5c75bd4b 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -174,6 +174,7 @@ static int icmp_filter(const struct sock *sk, const struct sk_buff *skb)
static int raw_v4_input(struct sk_buff *skb, const struct iphdr *iph, int hash)
{
int sdif = inet_sdif(skb);
+ int dif = inet_iif(skb);
struct sock *sk;
struct hlist_head *head;
int delivered = 0;
@@ -186,8 +187,7 @@ static int raw_v4_input(struct sk_buff *skb, const struct iphdr *iph, int hash)
net = dev_net(skb->dev);
sk = __raw_v4_lookup(net, __sk_head(head), iph->protocol,
- iph->saddr, iph->daddr,
- skb->dev->ifindex, sdif);
+ iph->saddr, iph->daddr, dif, sdif);
while (sk) {
delivered = 1;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 8501554e96a4..232581c140a0 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -500,15 +500,17 @@ EXPORT_SYMBOL(ip_idents_reserve);
void __ip_select_ident(struct net *net, struct iphdr *iph, int segs)
{
- static u32 ip_idents_hashrnd __read_mostly;
u32 hash, id;
- net_get_random_once(&ip_idents_hashrnd, sizeof(ip_idents_hashrnd));
+ /* Note the following code is not safe, but this is okay. */
+ if (unlikely(siphash_key_is_zero(&net->ipv4.ip_id_key)))
+ get_random_bytes(&net->ipv4.ip_id_key,
+ sizeof(net->ipv4.ip_id_key));
- hash = jhash_3words((__force u32)iph->daddr,
+ hash = siphash_3u32((__force u32)iph->daddr,
(__force u32)iph->saddr,
- iph->protocol ^ net_hash_mix(net),
- ip_idents_hashrnd);
+ iph->protocol,
+ &net->ipv4.ip_id_key);
id = ip_idents_reserve(hash, segs);
iph->id = htons(id);
}
@@ -887,13 +889,15 @@ void ip_rt_send_redirect(struct sk_buff *skb)
/* No redirected packets during ip_rt_redirect_silence;
* reset the algorithm.
*/
- if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence))
+ if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence)) {
peer->rate_tokens = 0;
+ peer->n_redirects = 0;
+ }
/* Too many ignored redirects; do not send anything
* set dst.rate_last to the last seen redirected packet.
*/
- if (peer->rate_tokens >= ip_rt_redirect_number) {
+ if (peer->n_redirects >= ip_rt_redirect_number) {
peer->rate_last = jiffies;
goto out_put_peer;
}
@@ -910,6 +914,7 @@ void ip_rt_send_redirect(struct sk_buff *skb)
icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, gw);
peer->rate_last = jiffies;
++peer->rate_tokens;
+ ++peer->n_redirects;
#ifdef CONFIG_IP_ROUTE_VERBOSE
if (log_martians &&
peer->rate_tokens == ip_rt_redirect_number)
@@ -1182,11 +1187,39 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
return dst;
}
+static void ipv4_send_dest_unreach(struct sk_buff *skb)
+{
+ struct ip_options opt;
+ int res;
+
+ /* Recompile ip options since IPCB may not be valid anymore.
+ * Also check we have a reasonable ipv4 header.
+ */
+ if (!pskb_network_may_pull(skb, sizeof(struct iphdr)) ||
+ ip_hdr(skb)->version != 4 || ip_hdr(skb)->ihl < 5)
+ return;
+
+ memset(&opt, 0, sizeof(opt));
+ if (ip_hdr(skb)->ihl > 5) {
+ if (!pskb_network_may_pull(skb, ip_hdr(skb)->ihl * 4))
+ return;
+ opt.optlen = ip_hdr(skb)->ihl * 4 - sizeof(struct iphdr);
+
+ rcu_read_lock();
+ res = __ip_options_compile(dev_net(skb->dev), &opt, skb, NULL);
+ rcu_read_unlock();
+
+ if (res)
+ return;
+ }
+ __icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0, &opt);
+}
+
static void ipv4_link_failure(struct sk_buff *skb)
{
struct rtable *rt;
- icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
+ ipv4_send_dest_unreach(skb);
rt = skb_rtable(skb);
if (rt)
@@ -1305,6 +1338,10 @@ static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr)
if (fnhe->fnhe_daddr == daddr) {
rcu_assign_pointer(*fnhe_p, rcu_dereference_protected(
fnhe->fnhe_next, lockdep_is_held(&fnhe_lock)));
+ /* set fnhe_daddr to 0 to ensure it won't bind with
+ * new dsts in rt_bind_exception().
+ */
+ fnhe->fnhe_daddr = 0;
fnhe_flush_routes(fnhe);
kfree_rcu(fnhe, rcu);
break;
@@ -1923,7 +1960,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
u32 itag = 0;
struct rtable *rth;
struct flowi4 fl4;
- bool do_cache;
+ bool do_cache = true;
/* IP on this device is disabled. */
@@ -2000,6 +2037,9 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
if (res->type == RTN_BROADCAST) {
if (IN_DEV_BFORWARD(in_dev))
goto make_route;
+ /* not do cache if bc_forwarding is enabled */
+ if (IPV4_DEVCONF_ALL(net, BC_FORWARDING))
+ do_cache = false;
goto brd_input;
}
@@ -2037,16 +2077,13 @@ brd_input:
RT_CACHE_STAT_INC(in_brd);
local_input:
- do_cache = false;
- if (res->fi) {
- if (!itag) {
- rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
- if (rt_cache_valid(rth)) {
- skb_dst_set_noref(skb, &rth->dst);
- err = 0;
- goto out;
- }
- do_cache = true;
+ do_cache &= res->fi && !itag;
+ if (do_cache) {
+ rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
+ if (rt_cache_valid(rth)) {
+ skb_dst_set_noref(skb, &rth->dst);
+ err = 0;
+ goto out;
}
}
@@ -2152,12 +2189,13 @@ int ip_route_input_rcu(struct sk_buff *skb, __be32 daddr, __be32 saddr,
int our = 0;
int err = -EINVAL;
- if (in_dev)
- our = ip_check_mc_rcu(in_dev, daddr, saddr,
- ip_hdr(skb)->protocol);
+ if (!in_dev)
+ return err;
+ our = ip_check_mc_rcu(in_dev, daddr, saddr,
+ ip_hdr(skb)->protocol);
/* check l3 master if no match yet */
- if ((!in_dev || !our) && netif_is_l3_slave(dev)) {
+ if (!our && netif_is_l3_slave(dev)) {
struct in_device *l3_in_dev;
l3_in_dev = __in_dev_get_rcu(skb->dev);
@@ -2811,7 +2849,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
if (tb[RTA_IP_PROTO]) {
err = rtm_getroute_parse_ip_proto(tb[RTA_IP_PROTO],
- &ip_proto, extack);
+ &ip_proto, AF_INET, extack);
if (err)
return err;
}
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index c3387dfd725b..f66b2e6d97a7 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -216,7 +216,12 @@ struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb,
refcount_set(&req->rsk_refcnt, 1);
tcp_sk(child)->tsoffset = tsoff;
sock_rps_save_rxhash(child, skb);
- inet_csk_reqsk_queue_add(sk, req, child);
+ if (!inet_csk_reqsk_queue_add(sk, req, child)) {
+ bh_unlock_sock(child);
+ sock_put(child);
+ child = NULL;
+ reqsk_put(req);
+ }
} else {
reqsk_free(req);
}
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 891ed2f91467..ce64453d337d 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -49,6 +49,7 @@ static int ip_ping_group_range_min[] = { 0, 0 };
static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX };
static int comp_sack_nr_max = 255;
static u32 u32_max_div_HZ = UINT_MAX / HZ;
+static int one_day_secs = 24 * 3600;
/* obsolete */
static int sysctl_tcp_low_latency __read_mostly;
@@ -1140,7 +1141,9 @@ static struct ctl_table ipv4_net_table[] = {
.data = &init_net.ipv4.sysctl_tcp_min_rtt_wlen,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &one_day_secs
},
{
.procname = "tcp_autocorking",
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index a32a0f4cc138..30c6e94b06c4 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1186,7 +1186,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
flags = msg->msg_flags;
if (flags & MSG_ZEROCOPY && size && sock_flag(sk, SOCK_ZEROCOPY)) {
- if (sk->sk_state != TCP_ESTABLISHED) {
+ if ((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) {
err = -EINVAL;
goto out_err;
}
@@ -1901,6 +1901,11 @@ static int tcp_inq_hint(struct sock *sk)
inq = tp->rcv_nxt - tp->copied_seq;
release_sock(sk);
}
+ /* After receiving a FIN, tell the user-space to continue reading
+ * by returning a non-zero inq.
+ */
+ if (inq == 0 && sock_flag(sk, SOCK_DONE))
+ inq = 1;
return inq;
}
@@ -2519,6 +2524,7 @@ void tcp_write_queue_purge(struct sock *sk)
sk_mem_reclaim(sk);
tcp_clear_all_retrans_hints(tcp_sk(sk));
tcp_sk(sk)->packets_out = 0;
+ inet_csk(sk)->icsk_backoff = 0;
}
int tcp_disconnect(struct sock *sk, int flags)
@@ -2567,7 +2573,6 @@ int tcp_disconnect(struct sock *sk, int flags)
tp->write_seq += tp->max_window + 2;
if (tp->write_seq == 0)
tp->write_seq = 1;
- icsk->icsk_backoff = 0;
tp->snd_cwnd = 2;
icsk->icsk_probes_out = 0;
tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c
index ca61e2a659e7..5205c5a5d8d5 100644
--- a/net/ipv4/tcp_dctcp.c
+++ b/net/ipv4/tcp_dctcp.c
@@ -66,11 +66,6 @@ static unsigned int dctcp_alpha_on_init __read_mostly = DCTCP_MAX_ALPHA;
module_param(dctcp_alpha_on_init, uint, 0644);
MODULE_PARM_DESC(dctcp_alpha_on_init, "parameter for initial alpha value");
-static unsigned int dctcp_clamp_alpha_on_loss __read_mostly;
-module_param(dctcp_clamp_alpha_on_loss, uint, 0644);
-MODULE_PARM_DESC(dctcp_clamp_alpha_on_loss,
- "parameter for clamping alpha on loss");
-
static struct tcp_congestion_ops dctcp_reno;
static void dctcp_reset(const struct tcp_sock *tp, struct dctcp *ca)
@@ -211,21 +206,23 @@ static void dctcp_update_alpha(struct sock *sk, u32 flags)
}
}
-static void dctcp_state(struct sock *sk, u8 new_state)
+static void dctcp_react_to_loss(struct sock *sk)
{
- if (dctcp_clamp_alpha_on_loss && new_state == TCP_CA_Loss) {
- struct dctcp *ca = inet_csk_ca(sk);
+ struct dctcp *ca = inet_csk_ca(sk);
+ struct tcp_sock *tp = tcp_sk(sk);
- /* If this extension is enabled, we clamp dctcp_alpha to
- * max on packet loss; the motivation is that dctcp_alpha
- * is an indicator to the extend of congestion and packet
- * loss is an indicator of extreme congestion; setting
- * this in practice turned out to be beneficial, and
- * effectively assumes total congestion which reduces the
- * window by half.
- */
- ca->dctcp_alpha = DCTCP_MAX_ALPHA;
- }
+ ca->loss_cwnd = tp->snd_cwnd;
+ tp->snd_ssthresh = max(tp->snd_cwnd >> 1U, 2U);
+}
+
+static void dctcp_state(struct sock *sk, u8 new_state)
+{
+ if (new_state == TCP_CA_Recovery &&
+ new_state != inet_csk(sk)->icsk_ca_state)
+ dctcp_react_to_loss(sk);
+ /* We handle RTO in dctcp_cwnd_event to ensure that we perform only
+ * one loss-adjustment per RTT.
+ */
}
static void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev)
@@ -237,6 +234,9 @@ static void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev)
case CA_EVENT_ECN_NO_CE:
dctcp_ce_state_1_to_0(sk);
break;
+ case CA_EVENT_LOSS:
+ dctcp_react_to_loss(sk);
+ break;
default:
/* Don't care for the rest. */
break;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 664fa7d8f7d9..cfdd70e32755 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -402,11 +402,12 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb)
static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
+ int room;
+
+ room = min_t(int, tp->window_clamp, tcp_space(sk)) - tp->rcv_ssthresh;
/* Check #1 */
- if (tp->rcv_ssthresh < tp->window_clamp &&
- (int)tp->rcv_ssthresh < tcp_space(sk) &&
- !tcp_under_memory_pressure(sk)) {
+ if (room > 0 && !tcp_under_memory_pressure(sk)) {
int incr;
/* Check #2. Increase window, if skb with such overhead
@@ -419,8 +420,7 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
if (incr) {
incr = max_t(int, incr, 2 * skb->len);
- tp->rcv_ssthresh = min(tp->rcv_ssthresh + incr,
- tp->window_clamp);
+ tp->rcv_ssthresh += min(room, incr);
inet_csk(sk)->icsk_ack.quick |= 1;
}
}
@@ -6514,7 +6514,13 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
af_ops->send_synack(fastopen_sk, dst, &fl, req,
&foc, TCP_SYNACK_FASTOPEN);
/* Add the child socket directly into the accept queue */
- inet_csk_reqsk_queue_add(sk, req, fastopen_sk);
+ if (!inet_csk_reqsk_queue_add(sk, req, fastopen_sk)) {
+ reqsk_fastopen_remove(fastopen_sk, req, false);
+ bh_unlock_sock(fastopen_sk);
+ sock_put(fastopen_sk);
+ reqsk_put(req);
+ goto drop;
+ }
sk->sk_data_ready(sk);
bh_unlock_sock(fastopen_sk);
sock_put(fastopen_sk);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index c78c22ea0be1..2b7205ad261a 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -536,14 +536,15 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
if (sock_owned_by_user(sk))
break;
+ skb = tcp_rtx_queue_head(sk);
+ if (WARN_ON_ONCE(!skb))
+ break;
+
icsk->icsk_backoff--;
icsk->icsk_rto = tp->srtt_us ? __tcp_set_rto(tp) :
TCP_TIMEOUT_INIT;
icsk->icsk_rto = inet_csk_rto_backoff(icsk, TCP_RTO_MAX);
- skb = tcp_rtx_queue_head(sk);
- BUG_ON(!skb);
-
tcp_mstamp_refresh(tp);
delta_us = (u32)(tp->tcp_mstamp - skb->skb_mstamp);
remaining = icsk->icsk_rto -
@@ -1651,15 +1652,8 @@ EXPORT_SYMBOL(tcp_add_backlog);
int tcp_filter(struct sock *sk, struct sk_buff *skb)
{
struct tcphdr *th = (struct tcphdr *)skb->data;
- unsigned int eaten = skb->len;
- int err;
- err = sk_filter_trim_cap(sk, skb, th->doff * 4);
- if (!err) {
- eaten -= skb->len;
- TCP_SKB_CB(skb)->end_seq -= eaten;
- }
- return err;
+ return sk_filter_trim_cap(sk, skb, th->doff * 4);
}
EXPORT_SYMBOL(tcp_filter);
@@ -2502,7 +2496,8 @@ static void __net_exit tcp_sk_exit(struct net *net)
{
int cpu;
- module_put(net->ipv4.tcp_congestion_control->owner);
+ if (net->ipv4.tcp_congestion_control)
+ module_put(net->ipv4.tcp_congestion_control->owner);
for_each_possible_cpu(cpu)
inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu));
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 57eae8d70ba1..b1b5a648def6 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -224,7 +224,7 @@ static int tcp_write_timeout(struct sock *sk)
if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
if (icsk->icsk_retransmits) {
dst_negative_advice(sk);
- } else if (!tp->syn_data && !tp->syn_fastopen) {
+ } else {
sk_rethink_txhash(sk);
}
retry_until = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index f8183fdce5b2..e45a5e19e509 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -785,15 +785,23 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4,
const int hlen = skb_network_header_len(skb) +
sizeof(struct udphdr);
- if (hlen + cork->gso_size > cork->fragsize)
+ if (hlen + cork->gso_size > cork->fragsize) {
+ kfree_skb(skb);
return -EINVAL;
- if (skb->len > cork->gso_size * UDP_MAX_SEGMENTS)
+ }
+ if (skb->len > cork->gso_size * UDP_MAX_SEGMENTS) {
+ kfree_skb(skb);
return -EINVAL;
- if (sk->sk_no_check_tx)
+ }
+ if (sk->sk_no_check_tx) {
+ kfree_skb(skb);
return -EINVAL;
+ }
if (skb->ip_summed != CHECKSUM_PARTIAL || is_udplite ||
- dst_xfrm(skb_dst(skb)))
+ dst_xfrm(skb_dst(skb))) {
+ kfree_skb(skb);
return -EIO;
+ }
skb_shinfo(skb)->gso_size = cork->gso_size;
skb_shinfo(skb)->gso_type = SKB_GSO_UDP_L4;
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index d73a6d6652f6..2b144b92ae46 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -111,7 +111,8 @@ static void
_decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
{
const struct iphdr *iph = ip_hdr(skb);
- u8 *xprth = skb_network_header(skb) + iph->ihl * 4;
+ int ihl = iph->ihl;
+ u8 *xprth = skb_network_header(skb) + ihl * 4;
struct flowi4 *fl4 = &fl->u.ip4;
int oif = 0;
@@ -122,6 +123,11 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
fl4->flowi4_mark = skb->mark;
fl4->flowi4_oif = reverse ? skb->skb_iif : oif;
+ fl4->flowi4_proto = iph->protocol;
+ fl4->daddr = reverse ? iph->saddr : iph->daddr;
+ fl4->saddr = reverse ? iph->daddr : iph->saddr;
+ fl4->flowi4_tos = iph->tos;
+
if (!ip_is_fragment(iph)) {
switch (iph->protocol) {
case IPPROTO_UDP:
@@ -133,7 +139,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
pskb_may_pull(skb, xprth + 4 - skb->data)) {
__be16 *ports;
- xprth = skb_network_header(skb) + iph->ihl * 4;
+ xprth = skb_network_header(skb) + ihl * 4;
ports = (__be16 *)xprth;
fl4->fl4_sport = ports[!!reverse];
@@ -146,7 +152,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
pskb_may_pull(skb, xprth + 2 - skb->data)) {
u8 *icmp;
- xprth = skb_network_header(skb) + iph->ihl * 4;
+ xprth = skb_network_header(skb) + ihl * 4;
icmp = xprth;
fl4->fl4_icmp_type = icmp[0];
@@ -159,7 +165,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
pskb_may_pull(skb, xprth + 4 - skb->data)) {
__be32 *ehdr;
- xprth = skb_network_header(skb) + iph->ihl * 4;
+ xprth = skb_network_header(skb) + ihl * 4;
ehdr = (__be32 *)xprth;
fl4->fl4_ipsec_spi = ehdr[0];
@@ -171,7 +177,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
pskb_may_pull(skb, xprth + 8 - skb->data)) {
__be32 *ah_hdr;
- xprth = skb_network_header(skb) + iph->ihl * 4;
+ xprth = skb_network_header(skb) + ihl * 4;
ah_hdr = (__be32 *)xprth;
fl4->fl4_ipsec_spi = ah_hdr[1];
@@ -183,7 +189,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
pskb_may_pull(skb, xprth + 4 - skb->data)) {
__be16 *ipcomp_hdr;
- xprth = skb_network_header(skb) + iph->ihl * 4;
+ xprth = skb_network_header(skb) + ihl * 4;
ipcomp_hdr = (__be16 *)xprth;
fl4->fl4_ipsec_spi = htonl(ntohs(ipcomp_hdr[1]));
@@ -196,7 +202,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
__be16 *greflags;
__be32 *gre_hdr;
- xprth = skb_network_header(skb) + iph->ihl * 4;
+ xprth = skb_network_header(skb) + ihl * 4;
greflags = (__be16 *)xprth;
gre_hdr = (__be32 *)xprth;
@@ -213,10 +219,6 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
break;
}
}
- fl4->flowi4_proto = iph->protocol;
- fl4->daddr = reverse ? iph->saddr : iph->daddr;
- fl4->saddr = reverse ? iph->daddr : iph->saddr;
- fl4->flowi4_tos = iph->tos;
}
static void xfrm4_update_pmtu(struct dst_entry *dst, struct sock *sk,
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 4e81ff2f4588..c57efd5c5b38 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1148,7 +1148,8 @@ check_cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long *expires)
list_for_each_entry(ifa, &idev->addr_list, if_list) {
if (ifa == ifp)
continue;
- if (!ipv6_prefix_equal(&ifa->addr, &ifp->addr,
+ if (ifa->prefix_len != ifp->prefix_len ||
+ !ipv6_prefix_equal(&ifa->addr, &ifp->addr,
ifp->prefix_len))
continue;
if (ifa->flags & (IFA_F_PERMANENT | IFA_F_NOPREFIXROUTE))
@@ -4711,8 +4712,8 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
IFA_F_MCAUTOJOIN | IFA_F_OPTIMISTIC;
idev = ipv6_find_idev(dev);
- if (IS_ERR(idev))
- return PTR_ERR(idev);
+ if (!idev)
+ return -ENOBUFS;
if (!ipv6_allow_optimistic_dad(net, idev))
cfg.ifa_flags &= ~IFA_F_OPTIMISTIC;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 9a4261e50272..79fcd9550fd2 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -309,6 +309,7 @@ static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
/* Check if the address belongs to the host. */
if (addr_type == IPV6_ADDR_MAPPED) {
+ struct net_device *dev = NULL;
int chk_addr_ret;
/* Binding to v4-mapped address on a v6-only socket
@@ -319,9 +320,20 @@ static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
goto out;
}
+ rcu_read_lock();
+ if (sk->sk_bound_dev_if) {
+ dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if);
+ if (!dev) {
+ err = -ENODEV;
+ goto out_unlock;
+ }
+ }
+
/* Reproduce AF_INET checks to make the bindings consistent */
v4addr = addr->sin6_addr.s6_addr32[3];
- chk_addr_ret = inet_addr_type(net, v4addr);
+ chk_addr_ret = inet_addr_type_dev_table(net, dev, v4addr);
+ rcu_read_unlock();
+
if (!inet_can_nonlocal_bind(net, inet) &&
v4addr != htonl(INADDR_ANY) &&
chk_addr_ret != RTN_LOCAL &&
@@ -349,6 +361,9 @@ static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
err = -EINVAL;
goto out_unlock;
}
+ }
+
+ if (sk->sk_bound_dev_if) {
dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if);
if (!dev) {
err = -ENODEV;
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 1ede7a16a0be..cb24850d2c7f 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -341,6 +341,7 @@ void ipv6_local_error(struct sock *sk, int err, struct flowi6 *fl6, u32 info)
skb_reset_network_header(skb);
iph = ipv6_hdr(skb);
iph->daddr = fl6->daddr;
+ ip6_flow_hdr(iph, 0, 0);
serr = SKB_EXT_ERR(skb);
serr->ee.ee_errno = err;
@@ -700,17 +701,15 @@ void ip6_datagram_recv_specific_ctl(struct sock *sk, struct msghdr *msg,
}
if (np->rxopt.bits.rxorigdstaddr) {
struct sockaddr_in6 sin6;
- __be16 *ports;
- int end;
+ __be16 _ports[2], *ports;
- end = skb_transport_offset(skb) + 4;
- if (end <= 0 || pskb_may_pull(skb, end)) {
+ ports = skb_header_pointer(skb, skb_transport_offset(skb),
+ sizeof(_ports), &_ports);
+ if (ports) {
/* All current transport protocols have the port numbers in the
* first four bytes of the transport header and this function is
* written with this assumption in mind.
*/
- ports = (__be16 *)skb_transport_header(skb);
-
sin6.sin6_family = AF_INET6;
sin6.sin6_addr = ipv6_hdr(skb)->daddr;
sin6.sin6_port = ports[1];
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 88a7579c23bd..a7d996148eed 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -293,7 +293,7 @@ int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info
skb->len += tailen;
skb->data_len += tailen;
skb->truesize += tailen;
- if (sk)
+ if (sk && sk_fullsock(sk))
refcount_add(tailen, &sk->sk_wmem_alloc);
goto out;
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index c9c53ade55c3..6d14cbe443f8 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -421,10 +421,10 @@ static int icmp6_iif(const struct sk_buff *skb)
static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
const struct in6_addr *force_saddr)
{
- struct net *net = dev_net(skb->dev);
struct inet6_dev *idev = NULL;
struct ipv6hdr *hdr = ipv6_hdr(skb);
struct sock *sk;
+ struct net *net;
struct ipv6_pinfo *np;
const struct in6_addr *saddr = NULL;
struct dst_entry *dst;
@@ -435,12 +435,16 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
int iif = 0;
int addr_type = 0;
int len;
- u32 mark = IP6_REPLY_MARK(net, skb->mark);
+ u32 mark;
if ((u8 *)hdr < skb->head ||
(skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb))
return;
+ if (!skb->dev)
+ return;
+ net = dev_net(skb->dev);
+ mark = IP6_REPLY_MARK(net, skb->mark);
/*
* Make sure we respect the rules
* i.e. RFC 1885 2.4(e)
diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c
index 17c455ff69ff..7858fa9ea103 100644
--- a/net/ipv6/ila/ila_xlat.c
+++ b/net/ipv6/ila/ila_xlat.c
@@ -420,6 +420,7 @@ int ila_xlat_nl_cmd_flush(struct sk_buff *skb, struct genl_info *info)
done:
rhashtable_walk_stop(&iter);
+ rhashtable_walk_exit(&iter);
return ret;
}
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index cbe46175bb59..a6c0479c1d55 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -877,6 +877,12 @@ static void fib6_drop_pcpu_from(struct fib6_info *f6i,
{
int cpu;
+ /* Make sure rt6_make_pcpu_route() wont add other percpu routes
+ * while we are cleaning them here.
+ */
+ f6i->fib6_destroying = 1;
+ mb(); /* paired with the cmpxchg() in rt6_make_pcpu_route() */
+
/* release the reference to this fib entry from
* all of its cached pcpu routes
*/
@@ -889,9 +895,7 @@ static void fib6_drop_pcpu_from(struct fib6_info *f6i,
if (pcpu_rt) {
struct fib6_info *from;
- from = rcu_dereference_protected(pcpu_rt->from,
- lockdep_is_held(&table->tb6_lock));
- rcu_assign_pointer(pcpu_rt->from, NULL);
+ from = xchg((__force struct fib6_info **)&pcpu_rt->from, NULL);
fib6_info_release(from);
}
}
@@ -902,6 +906,9 @@ static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn,
{
struct fib6_table *table = rt->fib6_table;
+ if (rt->rt6i_pcpu)
+ fib6_drop_pcpu_from(rt, table);
+
if (atomic_read(&rt->fib6_ref) != 1) {
/* This route is used as dummy address holder in some split
* nodes. It is not leaked, but it still holds other resources,
@@ -923,9 +930,6 @@ static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn,
fn = rcu_dereference_protected(fn->parent,
lockdep_is_held(&table->tb6_lock));
}
-
- if (rt->rt6i_pcpu)
- fib6_drop_pcpu_from(rt, table);
}
}
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index cb54a8a3c273..be5f3d7ceb96 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -94,15 +94,21 @@ static struct ip6_flowlabel *fl_lookup(struct net *net, __be32 label)
return fl;
}
+static void fl_free_rcu(struct rcu_head *head)
+{
+ struct ip6_flowlabel *fl = container_of(head, struct ip6_flowlabel, rcu);
+
+ if (fl->share == IPV6_FL_S_PROCESS)
+ put_pid(fl->owner.pid);
+ kfree(fl->opt);
+ kfree(fl);
+}
+
static void fl_free(struct ip6_flowlabel *fl)
{
- if (fl) {
- if (fl->share == IPV6_FL_S_PROCESS)
- put_pid(fl->owner.pid);
- kfree(fl->opt);
- kfree_rcu(fl, rcu);
- }
+ if (fl)
+ call_rcu(&fl->rcu, fl_free_rcu);
}
static void fl_release(struct ip6_flowlabel *fl)
@@ -633,9 +639,9 @@ recheck:
if (fl1->share == IPV6_FL_S_EXCL ||
fl1->share != fl->share ||
((fl1->share == IPV6_FL_S_PROCESS) &&
- (fl1->owner.pid == fl->owner.pid)) ||
+ (fl1->owner.pid != fl->owner.pid)) ||
((fl1->share == IPV6_FL_S_USER) &&
- uid_eq(fl1->owner.uid, fl->owner.uid)))
+ !uid_eq(fl1->owner.uid, fl->owner.uid)))
goto release;
err = -ENOMEM;
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index e493b041d4ac..01ecd510014f 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -540,23 +540,18 @@ static int ip6gre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi)
return PACKET_REJECT;
}
-static int ip6erspan_rcv(struct sk_buff *skb, int gre_hdr_len,
- struct tnl_ptk_info *tpi)
+static int ip6erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
+ int gre_hdr_len)
{
struct erspan_base_hdr *ershdr;
- struct erspan_metadata *pkt_md;
const struct ipv6hdr *ipv6h;
struct erspan_md2 *md2;
struct ip6_tnl *tunnel;
u8 ver;
- if (unlikely(!pskb_may_pull(skb, sizeof(*ershdr))))
- return PACKET_REJECT;
-
ipv6h = ipv6_hdr(skb);
ershdr = (struct erspan_base_hdr *)skb->data;
ver = ershdr->ver;
- tpi->key = cpu_to_be32(get_session_id(ershdr));
tunnel = ip6gre_tunnel_lookup(skb->dev,
&ipv6h->saddr, &ipv6h->daddr, tpi->key,
@@ -567,18 +562,16 @@ static int ip6erspan_rcv(struct sk_buff *skb, int gre_hdr_len,
if (unlikely(!pskb_may_pull(skb, len)))
return PACKET_REJECT;
- ershdr = (struct erspan_base_hdr *)skb->data;
- pkt_md = (struct erspan_metadata *)(ershdr + 1);
-
if (__iptunnel_pull_header(skb, len,
htons(ETH_P_TEB),
false, false) < 0)
return PACKET_REJECT;
if (tunnel->parms.collect_md) {
+ struct erspan_metadata *pkt_md, *md;
struct metadata_dst *tun_dst;
struct ip_tunnel_info *info;
- struct erspan_metadata *md;
+ unsigned char *gh;
__be64 tun_id;
__be16 flags;
@@ -591,6 +584,14 @@ static int ip6erspan_rcv(struct sk_buff *skb, int gre_hdr_len,
if (!tun_dst)
return PACKET_REJECT;
+ /* skb can be uncloned in __iptunnel_pull_header, so
+ * old pkt_md is no longer valid and we need to reset
+ * it
+ */
+ gh = skb_network_header(skb) +
+ skb_network_header_len(skb);
+ pkt_md = (struct erspan_metadata *)(gh + gre_hdr_len +
+ sizeof(*ershdr));
info = &tun_dst->u.tun_info;
md = ip_tunnel_info_opts(info);
md->version = ver;
@@ -627,7 +628,7 @@ static int gre_rcv(struct sk_buff *skb)
if (unlikely(tpi.proto == htons(ETH_P_ERSPAN) ||
tpi.proto == htons(ETH_P_ERSPAN2))) {
- if (ip6erspan_rcv(skb, hdr_len, &tpi) == PACKET_RCVD)
+ if (ip6erspan_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
return 0;
goto out;
}
@@ -897,6 +898,9 @@ static netdev_tx_t ip6gre_tunnel_xmit(struct sk_buff *skb,
struct net_device_stats *stats = &t->dev->stats;
int ret;
+ if (!pskb_inet_may_pull(skb))
+ goto tx_err;
+
if (!ip6_tnl_xmit_ctl(t, &t->parms.laddr, &t->parms.raddr))
goto tx_err;
@@ -935,10 +939,14 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
__u8 dsfield = false;
struct flowi6 fl6;
int err = -EINVAL;
+ __be16 proto;
__u32 mtu;
int nhoff;
int thoff;
+ if (!pskb_inet_may_pull(skb))
+ goto tx_err;
+
if (!ip6_tnl_xmit_ctl(t, &t->parms.laddr, &t->parms.raddr))
goto tx_err;
@@ -1011,8 +1019,6 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
goto tx_err;
}
} else {
- struct ipv6hdr *ipv6h = ipv6_hdr(skb);
-
switch (skb->protocol) {
case htons(ETH_P_IP):
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
@@ -1020,7 +1026,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
&dsfield, &encap_limit);
break;
case htons(ETH_P_IPV6):
- if (ipv6_addr_equal(&t->parms.raddr, &ipv6h->saddr))
+ if (ipv6_addr_equal(&t->parms.raddr, &ipv6_hdr(skb)->saddr))
goto tx_err;
if (prepare_ip6gre_xmit_ipv6(skb, dev, &fl6,
&dsfield, &encap_limit))
@@ -1047,8 +1053,9 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
}
/* Push GRE header. */
- gre_build_header(skb, 8, TUNNEL_SEQ,
- htons(ETH_P_ERSPAN), 0, htonl(t->o_seqno++));
+ proto = (t->parms.erspan_ver == 1) ? htons(ETH_P_ERSPAN)
+ : htons(ETH_P_ERSPAN2);
+ gre_build_header(skb, 8, TUNNEL_SEQ, proto, 0, htonl(t->o_seqno++));
/* TooBig packet may have updated dst->dev's mtu */
if (!t->parms.collect_md && dst && dst_mtu(dst) > dst->dev->mtu)
@@ -1181,6 +1188,10 @@ static void ip6gre_tnl_copy_tnl_parm(struct ip6_tnl *t,
t->parms.i_flags = p->i_flags;
t->parms.o_flags = p->o_flags;
t->parms.fwmark = p->fwmark;
+ t->parms.erspan_ver = p->erspan_ver;
+ t->parms.index = p->index;
+ t->parms.dir = p->dir;
+ t->parms.hwid = p->hwid;
dst_cache_reset(&t->dst_cache);
}
@@ -1729,6 +1740,27 @@ static int ip6erspan_tap_validate(struct nlattr *tb[], struct nlattr *data[],
return 0;
}
+static void ip6erspan_set_version(struct nlattr *data[],
+ struct __ip6_tnl_parm *parms)
+{
+ if (!data)
+ return;
+
+ parms->erspan_ver = 1;
+ if (data[IFLA_GRE_ERSPAN_VER])
+ parms->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]);
+
+ if (parms->erspan_ver == 1) {
+ if (data[IFLA_GRE_ERSPAN_INDEX])
+ parms->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
+ } else if (parms->erspan_ver == 2) {
+ if (data[IFLA_GRE_ERSPAN_DIR])
+ parms->dir = nla_get_u8(data[IFLA_GRE_ERSPAN_DIR]);
+ if (data[IFLA_GRE_ERSPAN_HWID])
+ parms->hwid = nla_get_u16(data[IFLA_GRE_ERSPAN_HWID]);
+ }
+}
+
static void ip6gre_netlink_parms(struct nlattr *data[],
struct __ip6_tnl_parm *parms)
{
@@ -1777,20 +1809,6 @@ static void ip6gre_netlink_parms(struct nlattr *data[],
if (data[IFLA_GRE_COLLECT_METADATA])
parms->collect_md = true;
-
- parms->erspan_ver = 1;
- if (data[IFLA_GRE_ERSPAN_VER])
- parms->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]);
-
- if (parms->erspan_ver == 1) {
- if (data[IFLA_GRE_ERSPAN_INDEX])
- parms->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
- } else if (parms->erspan_ver == 2) {
- if (data[IFLA_GRE_ERSPAN_DIR])
- parms->dir = nla_get_u8(data[IFLA_GRE_ERSPAN_DIR]);
- if (data[IFLA_GRE_ERSPAN_HWID])
- parms->hwid = nla_get_u16(data[IFLA_GRE_ERSPAN_HWID]);
- }
}
static int ip6gre_tap_init(struct net_device *dev)
@@ -2043,9 +2061,9 @@ static int ip6gre_changelink(struct net_device *dev, struct nlattr *tb[],
struct nlattr *data[],
struct netlink_ext_ack *extack)
{
- struct ip6gre_net *ign = net_generic(dev_net(dev), ip6gre_net_id);
+ struct ip6_tnl *t = netdev_priv(dev);
+ struct ip6gre_net *ign = net_generic(t->net, ip6gre_net_id);
struct __ip6_tnl_parm p;
- struct ip6_tnl *t;
t = ip6gre_changelink_common(dev, tb, data, &p, extack);
if (IS_ERR(t))
@@ -2114,12 +2132,17 @@ static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev)
{
struct ip6_tnl *t = netdev_priv(dev);
struct __ip6_tnl_parm *p = &t->parms;
+ __be16 o_flags = p->o_flags;
+
+ if ((p->erspan_ver == 1 || p->erspan_ver == 2) &&
+ !p->collect_md)
+ o_flags |= TUNNEL_KEY;
if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
nla_put_be16(skb, IFLA_GRE_IFLAGS,
gre_tnl_flags_to_gre_flags(p->i_flags)) ||
nla_put_be16(skb, IFLA_GRE_OFLAGS,
- gre_tnl_flags_to_gre_flags(p->o_flags)) ||
+ gre_tnl_flags_to_gre_flags(o_flags)) ||
nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
nla_put_in6_addr(skb, IFLA_GRE_LOCAL, &p->laddr) ||
@@ -2214,6 +2237,7 @@ static int ip6erspan_newlink(struct net *src_net, struct net_device *dev,
int err;
ip6gre_netlink_parms(data, &nt->parms);
+ ip6erspan_set_version(data, &nt->parms);
ign = net_generic(net, ip6gre_net_id);
if (nt->parms.collect_md) {
@@ -2259,6 +2283,7 @@ static int ip6erspan_changelink(struct net_device *dev, struct nlattr *tb[],
if (IS_ERR(t))
return PTR_ERR(t);
+ ip6erspan_set_version(data, &p);
ip6gre_tunnel_unlink_md(ign, t);
ip6gre_tunnel_unlink(ign, t);
ip6erspan_tnl_change(t, &p, !tb[IFLA_MTU]);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 2694def1e72c..eed9231c90ad 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -378,6 +378,7 @@ static inline int ip6_forward_finish(struct net *net, struct sock *sk,
__IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
__IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
+ skb->tstamp = 0;
return dst_output(net, sk, skb);
}
@@ -586,7 +587,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
inet6_sk(skb->sk) : NULL;
struct ipv6hdr *tmp_hdr;
struct frag_hdr *fh;
- unsigned int mtu, hlen, left, len;
+ unsigned int mtu, hlen, left, len, nexthdr_offset;
int hroom, troom;
__be32 frag_id;
int ptr, offset = 0, err = 0;
@@ -597,6 +598,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
goto fail;
hlen = err;
nexthdr = *prevhdr;
+ nexthdr_offset = prevhdr - skb_network_header(skb);
mtu = ip6_skb_dst_mtu(skb);
@@ -631,6 +633,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
(err = skb_checksum_help(skb)))
goto fail;
+ prevhdr = skb_network_header(skb) + nexthdr_offset;
hroom = LL_RESERVED_SPACE(rt->dst.dev);
if (skb_has_frag_list(skb)) {
unsigned int first_len = skb_pagelen(skb);
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index a9d06d4dd057..ade1390c6348 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -627,7 +627,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL,
eiph->daddr, eiph->saddr, 0, 0,
IPPROTO_IPIP, RT_TOS(eiph->tos), 0);
- if (IS_ERR(rt) || rt->dst.dev->type != ARPHRD_TUNNEL) {
+ if (IS_ERR(rt) || rt->dst.dev->type != ARPHRD_TUNNEL6) {
if (!IS_ERR(rt))
ip_rt_put(rt);
goto out;
@@ -636,7 +636,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
} else {
if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos,
skb2->dev) ||
- skb_dst(skb2)->dev->type != ARPHRD_TUNNEL)
+ skb_dst(skb2)->dev->type != ARPHRD_TUNNEL6)
goto out;
}
@@ -901,6 +901,7 @@ static int ipxip6_rcv(struct sk_buff *skb, u8 ipproto,
goto drop;
if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
goto drop;
+ ipv6h = ipv6_hdr(skb);
if (!ip6_tnl_rcv_ctl(t, &ipv6h->daddr, &ipv6h->saddr))
goto drop;
if (iptunnel_pull_header(skb, 0, tpi->proto, false))
@@ -1242,10 +1243,6 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
u8 tproto;
int err;
- /* ensure we can access the full inner ip header */
- if (!pskb_may_pull(skb, sizeof(struct iphdr)))
- return -1;
-
iph = ip_hdr(skb);
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
@@ -1320,9 +1317,6 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
u8 tproto;
int err;
- if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h))))
- return -1;
-
ipv6h = ipv6_hdr(skb);
tproto = READ_ONCE(t->parms.proto);
if ((tproto != IPPROTO_IPV6 && tproto != 0) ||
@@ -1404,6 +1398,9 @@ ip6_tnl_start_xmit(struct sk_buff *skb, struct net_device *dev)
struct net_device_stats *stats = &t->dev->stats;
int ret;
+ if (!pskb_inet_may_pull(skb))
+ goto tx_err;
+
switch (skb->protocol) {
case htons(ETH_P_IP):
ret = ip4ip6_tnl_xmit(skb, dev);
diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c
index b283f293ee4a..caad40d6e74d 100644
--- a/net/ipv6/ip6_udp_tunnel.c
+++ b/net/ipv6/ip6_udp_tunnel.c
@@ -15,7 +15,7 @@
int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg,
struct socket **sockp)
{
- struct sockaddr_in6 udp6_addr;
+ struct sockaddr_in6 udp6_addr = {};
int err;
struct socket *sock = NULL;
@@ -42,6 +42,7 @@ int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg,
goto error;
if (cfg->peer_udp_port) {
+ memset(&udp6_addr, 0, sizeof(udp6_addr));
udp6_addr.sin6_family = AF_INET6;
memcpy(&udp6_addr.sin6_addr, &cfg->peer_ip6,
sizeof(udp6_addr.sin6_addr));
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index eeaf7455d51e..8b6eefff2f7e 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -318,6 +318,7 @@ static int vti6_rcv(struct sk_buff *skb)
return 0;
}
+ ipv6h = ipv6_hdr(skb);
if (!ip6_tnl_rcv_ctl(t, &ipv6h->daddr, &ipv6h->saddr)) {
t->dev->stats.rx_dropped++;
rcu_read_unlock();
@@ -521,18 +522,18 @@ vti6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct ip6_tnl *t = netdev_priv(dev);
struct net_device_stats *stats = &t->dev->stats;
- struct ipv6hdr *ipv6h;
struct flowi fl;
int ret;
+ if (!pskb_inet_may_pull(skb))
+ goto tx_err;
+
memset(&fl, 0, sizeof(fl));
switch (skb->protocol) {
case htons(ETH_P_IPV6):
- ipv6h = ipv6_hdr(skb);
-
if ((t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) ||
- vti6_addr_conflict(t, ipv6h))
+ vti6_addr_conflict(t, ipv6_hdr(skb)))
goto tx_err;
xfrm_decode_session(skb, &fl, AF_INET6);
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index d0b7e0249c13..35e7092eceb3 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -51,6 +51,9 @@
#include <linux/export.h>
#include <net/ip6_checksum.h>
#include <linux/netconf.h>
+#include <net/ip_tunnels.h>
+
+#include <linux/nospec.h>
struct ip6mr_rule {
struct fib_rule common;
@@ -591,13 +594,12 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
.flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX,
.flowi6_mark = skb->mark,
};
- int err;
- err = ip6mr_fib_lookup(net, &fl6, &mrt);
- if (err < 0) {
- kfree_skb(skb);
- return err;
- }
+ if (!pskb_inet_may_pull(skb))
+ goto tx_err;
+
+ if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
+ goto tx_err;
read_lock(&mrt_lock);
dev->stats.tx_bytes += skb->len;
@@ -606,6 +608,11 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
read_unlock(&mrt_lock);
kfree_skb(skb);
return NETDEV_TX_OK;
+
+tx_err:
+ dev->stats.tx_errors++;
+ kfree_skb(skb);
+ return NETDEV_TX_OK;
}
static int reg_vif_get_iflink(const struct net_device *dev)
@@ -1499,6 +1506,9 @@ static void mroute_clean_tables(struct mr_table *mrt, bool all)
continue;
rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params);
list_del_rcu(&c->list);
+ call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
+ FIB_EVENT_ENTRY_DEL,
+ (struct mfc6_cache *)c, mrt->id);
mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
mr_cache_put(c);
}
@@ -1507,10 +1517,6 @@ static void mroute_clean_tables(struct mr_table *mrt, bool all)
spin_lock_bh(&mfc_unres_lock);
list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
list_del(&c->list);
- call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
- FIB_EVENT_ENTRY_DEL,
- (struct mfc6_cache *)c,
- mrt->id);
mr6_netlink_event(mrt, (struct mfc6_cache *)c,
RTM_DELROUTE);
ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
@@ -1831,6 +1837,7 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
return -EFAULT;
if (vr.mifi >= mrt->maxvif)
return -EINVAL;
+ vr.mifi = array_index_nospec(vr.mifi, mrt->maxvif);
read_lock(&mrt_lock);
vif = &mrt->vif_table[vr.mifi];
if (VIF_EXISTS(mrt, vr.mifi)) {
@@ -1905,6 +1912,7 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
return -EFAULT;
if (vr.mifi >= mrt->maxvif)
return -EINVAL;
+ vr.mifi = array_index_nospec(vr.mifi, mrt->maxvif);
read_lock(&mrt_lock);
vif = &mrt->vif_table[vr.mifi];
if (VIF_EXISTS(mrt, vr.mifi)) {
@@ -1946,10 +1954,10 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
- __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_OUTFORWDATAGRAMS);
- __IP6_ADD_STATS(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_OUTOCTETS, skb->len);
+ IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_OUTFORWDATAGRAMS);
+ IP6_ADD_STATS(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_OUTOCTETS, skb->len);
return dst_output(net, sk, skb);
}
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 8b075f0bc351..6d0b1f3e927b 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -23,9 +23,11 @@ int ip6_route_me_harder(struct net *net, struct sk_buff *skb)
struct sock *sk = sk_to_full_sk(skb->sk);
unsigned int hh_len;
struct dst_entry *dst;
+ int strict = (ipv6_addr_type(&iph->daddr) &
+ (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
struct flowi6 fl6 = {
.flowi6_oif = sk && sk->sk_bound_dev_if ? sk->sk_bound_dev_if :
- rt6_need_strict(&iph->daddr) ? skb_dst(skb)->dev->ifindex : 0,
+ strict ? skb_dst(skb)->dev->ifindex : 0,
.flowi6_mark = skb->mark,
.flowi6_uid = sock_net_uid(net, sk),
.daddr = iph->daddr,
diff --git a/net/ipv6/netfilter/ip6t_srh.c b/net/ipv6/netfilter/ip6t_srh.c
index 1059894a6f4c..4cb83fb69844 100644
--- a/net/ipv6/netfilter/ip6t_srh.c
+++ b/net/ipv6/netfilter/ip6t_srh.c
@@ -210,6 +210,8 @@ static bool srh1_mt6(const struct sk_buff *skb, struct xt_action_param *par)
psidoff = srhoff + sizeof(struct ipv6_sr_hdr) +
((srh->segments_left + 1) * sizeof(struct in6_addr));
psid = skb_header_pointer(skb, psidoff, sizeof(_psid), &_psid);
+ if (!psid)
+ return false;
if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_PSID,
ipv6_masked_addr_cmp(psid, &srhinfo->psid_msk,
&srhinfo->psid_addr)))
@@ -223,6 +225,8 @@ static bool srh1_mt6(const struct sk_buff *skb, struct xt_action_param *par)
nsidoff = srhoff + sizeof(struct ipv6_sr_hdr) +
((srh->segments_left - 1) * sizeof(struct in6_addr));
nsid = skb_header_pointer(skb, nsidoff, sizeof(_nsid), &_nsid);
+ if (!nsid)
+ return false;
if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_NSID,
ipv6_masked_addr_cmp(nsid, &srhinfo->nsid_msk,
&srhinfo->nsid_addr)))
@@ -233,6 +237,8 @@ static bool srh1_mt6(const struct sk_buff *skb, struct xt_action_param *par)
if (srhinfo->mt_flags & IP6T_SRH_LSID) {
lsidoff = srhoff + sizeof(struct ipv6_sr_hdr);
lsid = skb_header_pointer(skb, lsidoff, sizeof(_lsid), &_lsid);
+ if (!lsid)
+ return false;
if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_LSID,
ipv6_masked_addr_cmp(lsid, &srhinfo->lsid_msk,
&srhinfo->lsid_addr)))
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 043ed8eb0ab9..cb1b4772dac0 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -136,6 +136,9 @@ static void __net_exit nf_ct_frags6_sysctl_unregister(struct net *net)
}
#endif
+static int nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *skb,
+ struct sk_buff *prev_tail, struct net_device *dev);
+
static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h)
{
return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK);
@@ -177,9 +180,10 @@ static struct frag_queue *fq_find(struct net *net, __be32 id, u32 user,
static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
const struct frag_hdr *fhdr, int nhoff)
{
- struct sk_buff *prev, *next;
unsigned int payload_len;
- int offset, end;
+ struct net_device *dev;
+ struct sk_buff *prev;
+ int offset, end, err;
u8 ecn;
if (fq->q.flags & INET_FRAG_COMPLETE) {
@@ -254,55 +258,18 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
goto err;
}
- /* Find out which fragments are in front and at the back of us
- * in the chain of fragments so far. We must know where to put
- * this fragment, right?
- */
- prev = fq->q.fragments_tail;
- if (!prev || prev->ip_defrag_offset < offset) {
- next = NULL;
- goto found;
- }
- prev = NULL;
- for (next = fq->q.fragments; next != NULL; next = next->next) {
- if (next->ip_defrag_offset >= offset)
- break; /* bingo! */
- prev = next;
- }
-
-found:
- /* RFC5722, Section 4:
- * When reassembling an IPv6 datagram, if
- * one or more its constituent fragments is determined to be an
- * overlapping fragment, the entire datagram (and any constituent
- * fragments, including those not yet received) MUST be silently
- * discarded.
- */
-
- /* Check for overlap with preceding fragment. */
- if (prev &&
- (prev->ip_defrag_offset + prev->len) > offset)
- goto discard_fq;
-
- /* Look for overlap with succeeding segment. */
- if (next && next->ip_defrag_offset < end)
- goto discard_fq;
-
- /* Note : skb->ip_defrag_offset and skb->dev share the same location */
- if (skb->dev)
- fq->iif = skb->dev->ifindex;
+ /* Note : skb->rbnode and skb->dev share the same location. */
+ dev = skb->dev;
/* Makes sure compiler wont do silly aliasing games */
barrier();
- skb->ip_defrag_offset = offset;
- /* Insert this fragment in the chain of fragments. */
- skb->next = next;
- if (!next)
- fq->q.fragments_tail = skb;
- if (prev)
- prev->next = skb;
- else
- fq->q.fragments = skb;
+ prev = fq->q.fragments_tail;
+ err = inet_frag_queue_insert(&fq->q, skb, offset, end);
+ if (err)
+ goto insert_error;
+
+ if (dev)
+ fq->iif = dev->ifindex;
fq->q.stamp = skb->tstamp;
fq->q.meat += skb->len;
@@ -319,11 +286,25 @@ found:
fq->q.flags |= INET_FRAG_FIRST_IN;
}
- return 0;
+ if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
+ fq->q.meat == fq->q.len) {
+ unsigned long orefdst = skb->_skb_refdst;
+
+ skb->_skb_refdst = 0UL;
+ err = nf_ct_frag6_reasm(fq, skb, prev, dev);
+ skb->_skb_refdst = orefdst;
+ return err;
+ }
+
+ skb_dst_drop(skb);
+ return -EINPROGRESS;
-discard_fq:
+insert_error:
+ if (err == IPFRAG_DUP)
+ goto err;
inet_frag_kill(&fq->q);
err:
+ skb_dst_drop(skb);
return -EINVAL;
}
@@ -333,147 +314,67 @@ err:
* It is called with locked fq, and caller must check that
* queue is eligible for reassembly i.e. it is not COMPLETE,
* the last and the first frames arrived and all the bits are here.
- *
- * returns true if *prev skb has been transformed into the reassembled
- * skb, false otherwise.
*/
-static bool
-nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_device *dev)
+static int nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *skb,
+ struct sk_buff *prev_tail, struct net_device *dev)
{
- struct sk_buff *fp, *head = fq->q.fragments;
- int payload_len, delta;
+ void *reasm_data;
+ int payload_len;
u8 ecn;
inet_frag_kill(&fq->q);
- WARN_ON(head == NULL);
- WARN_ON(head->ip_defrag_offset != 0);
-
ecn = ip_frag_ecn_table[fq->ecn];
if (unlikely(ecn == 0xff))
- return false;
+ goto err;
+
+ reasm_data = inet_frag_reasm_prepare(&fq->q, skb, prev_tail);
+ if (!reasm_data)
+ goto err;
- /* Unfragmented part is taken from the first segment. */
- payload_len = ((head->data - skb_network_header(head)) -
+ payload_len = ((skb->data - skb_network_header(skb)) -
sizeof(struct ipv6hdr) + fq->q.len -
sizeof(struct frag_hdr));
if (payload_len > IPV6_MAXPLEN) {
net_dbg_ratelimited("nf_ct_frag6_reasm: payload len = %d\n",
payload_len);
- return false;
- }
-
- delta = - head->truesize;
-
- /* Head of list must not be cloned. */
- if (skb_unclone(head, GFP_ATOMIC))
- return false;
-
- delta += head->truesize;
- if (delta)
- add_frag_mem_limit(fq->q.net, delta);
-
- /* If the first fragment is fragmented itself, we split
- * it to two chunks: the first with data and paged part
- * and the second, holding only fragments. */
- if (skb_has_frag_list(head)) {
- struct sk_buff *clone;
- int i, plen = 0;
-
- clone = alloc_skb(0, GFP_ATOMIC);
- if (clone == NULL)
- return false;
-
- clone->next = head->next;
- head->next = clone;
- skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
- skb_frag_list_init(head);
- for (i = 0; i < skb_shinfo(head)->nr_frags; i++)
- plen += skb_frag_size(&skb_shinfo(head)->frags[i]);
- clone->len = clone->data_len = head->data_len - plen;
- head->data_len -= clone->len;
- head->len -= clone->len;
- clone->csum = 0;
- clone->ip_summed = head->ip_summed;
-
- add_frag_mem_limit(fq->q.net, clone->truesize);
- }
-
- /* morph head into last received skb: prev.
- *
- * This allows callers of ipv6 conntrack defrag to continue
- * to use the last skb(frag) passed into the reasm engine.
- * The last skb frag 'silently' turns into the full reassembled skb.
- *
- * Since prev is also part of q->fragments we have to clone it first.
- */
- if (head != prev) {
- struct sk_buff *iter;
-
- fp = skb_clone(prev, GFP_ATOMIC);
- if (!fp)
- return false;
-
- fp->next = prev->next;
-
- iter = head;
- while (iter) {
- if (iter->next == prev) {
- iter->next = fp;
- break;
- }
- iter = iter->next;
- }
-
- skb_morph(prev, head);
- prev->next = head->next;
- consume_skb(head);
- head = prev;
+ goto err;
}
/* We have to remove fragment header from datagram and to relocate
* header in order to calculate ICV correctly. */
- skb_network_header(head)[fq->nhoffset] = skb_transport_header(head)[0];
- memmove(head->head + sizeof(struct frag_hdr), head->head,
- (head->data - head->head) - sizeof(struct frag_hdr));
- head->mac_header += sizeof(struct frag_hdr);
- head->network_header += sizeof(struct frag_hdr);
-
- skb_shinfo(head)->frag_list = head->next;
- skb_reset_transport_header(head);
- skb_push(head, head->data - skb_network_header(head));
-
- for (fp = head->next; fp; fp = fp->next) {
- head->data_len += fp->len;
- head->len += fp->len;
- if (head->ip_summed != fp->ip_summed)
- head->ip_summed = CHECKSUM_NONE;
- else if (head->ip_summed == CHECKSUM_COMPLETE)
- head->csum = csum_add(head->csum, fp->csum);
- head->truesize += fp->truesize;
- fp->sk = NULL;
- }
- sub_frag_mem_limit(fq->q.net, head->truesize);
+ skb_network_header(skb)[fq->nhoffset] = skb_transport_header(skb)[0];
+ memmove(skb->head + sizeof(struct frag_hdr), skb->head,
+ (skb->data - skb->head) - sizeof(struct frag_hdr));
+ skb->mac_header += sizeof(struct frag_hdr);
+ skb->network_header += sizeof(struct frag_hdr);
+
+ skb_reset_transport_header(skb);
+
+ inet_frag_reasm_finish(&fq->q, skb, reasm_data);
- head->ignore_df = 1;
- head->next = NULL;
- head->dev = dev;
- head->tstamp = fq->q.stamp;
- ipv6_hdr(head)->payload_len = htons(payload_len);
- ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn);
- IP6CB(head)->frag_max_size = sizeof(struct ipv6hdr) + fq->q.max_size;
+ skb->ignore_df = 1;
+ skb->dev = dev;
+ ipv6_hdr(skb)->payload_len = htons(payload_len);
+ ipv6_change_dsfield(ipv6_hdr(skb), 0xff, ecn);
+ IP6CB(skb)->frag_max_size = sizeof(struct ipv6hdr) + fq->q.max_size;
/* Yes, and fold redundant checksum back. 8) */
- if (head->ip_summed == CHECKSUM_COMPLETE)
- head->csum = csum_partial(skb_network_header(head),
- skb_network_header_len(head),
- head->csum);
+ if (skb->ip_summed == CHECKSUM_COMPLETE)
+ skb->csum = csum_partial(skb_network_header(skb),
+ skb_network_header_len(skb),
+ skb->csum);
fq->q.fragments = NULL;
fq->q.rb_fragments = RB_ROOT;
fq->q.fragments_tail = NULL;
+ fq->q.last_run_head = NULL;
- return true;
+ return 0;
+
+err:
+ inet_frag_kill(&fq->q);
+ return -EINVAL;
}
/*
@@ -542,7 +443,6 @@ find_prev_fhdr(struct sk_buff *skb, u8 *prevhdrp, int *prevhoff, int *fhoff)
int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
{
u16 savethdr = skb->transport_header;
- struct net_device *dev = skb->dev;
int fhoff, nhoff, ret;
struct frag_hdr *fhdr;
struct frag_queue *fq;
@@ -565,10 +465,6 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
hdr = ipv6_hdr(skb);
fhdr = (struct frag_hdr *)skb_transport_header(skb);
- if (skb->len - skb_network_offset(skb) < IPV6_MIN_MTU &&
- fhdr->frag_off & htons(IP6_MF))
- return -EINVAL;
-
skb_orphan(skb);
fq = fq_find(net, fhdr->identification, user, hdr,
skb->dev ? skb->dev->ifindex : 0);
@@ -580,31 +476,17 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
spin_lock_bh(&fq->q.lock);
ret = nf_ct_frag6_queue(fq, skb, fhdr, nhoff);
- if (ret < 0) {
- if (ret == -EPROTO) {
- skb->transport_header = savethdr;
- ret = 0;
- }
- goto out_unlock;
+ if (ret == -EPROTO) {
+ skb->transport_header = savethdr;
+ ret = 0;
}
/* after queue has assumed skb ownership, only 0 or -EINPROGRESS
* must be returned.
*/
- ret = -EINPROGRESS;
- if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
- fq->q.meat == fq->q.len) {
- unsigned long orefdst = skb->_skb_refdst;
-
- skb->_skb_refdst = 0UL;
- if (nf_ct_frag6_reasm(fq, skb, dev))
- ret = 0;
- skb->_skb_refdst = orefdst;
- } else {
- skb_dst_drop(skb);
- }
+ if (ret)
+ ret = -EINPROGRESS;
-out_unlock:
spin_unlock_bh(&fq->q.lock);
inet_frag_put(&fq->q);
return ret;
diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c
index 4fe7c90962dd..868ae23dbae1 100644
--- a/net/ipv6/output_core.c
+++ b/net/ipv6/output_core.c
@@ -10,15 +10,25 @@
#include <net/secure_seq.h>
#include <linux/netfilter.h>
-static u32 __ipv6_select_ident(struct net *net, u32 hashrnd,
+static u32 __ipv6_select_ident(struct net *net,
const struct in6_addr *dst,
const struct in6_addr *src)
{
+ const struct {
+ struct in6_addr dst;
+ struct in6_addr src;
+ } __aligned(SIPHASH_ALIGNMENT) combined = {
+ .dst = *dst,
+ .src = *src,
+ };
u32 hash, id;
- hash = __ipv6_addr_jhash(dst, hashrnd);
- hash = __ipv6_addr_jhash(src, hash);
- hash ^= net_hash_mix(net);
+ /* Note the following code is not safe, but this is okay. */
+ if (unlikely(siphash_key_is_zero(&net->ipv4.ip_id_key)))
+ get_random_bytes(&net->ipv4.ip_id_key,
+ sizeof(net->ipv4.ip_id_key));
+
+ hash = siphash(&combined, sizeof(combined), &net->ipv4.ip_id_key);
/* Treat id of 0 as unset and if we get 0 back from ip_idents_reserve,
* set the hight order instead thus minimizing possible future
@@ -41,7 +51,6 @@ static u32 __ipv6_select_ident(struct net *net, u32 hashrnd,
*/
__be32 ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb)
{
- static u32 ip6_proxy_idents_hashrnd __read_mostly;
struct in6_addr buf[2];
struct in6_addr *addrs;
u32 id;
@@ -53,11 +62,7 @@ __be32 ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb)
if (!addrs)
return 0;
- net_get_random_once(&ip6_proxy_idents_hashrnd,
- sizeof(ip6_proxy_idents_hashrnd));
-
- id = __ipv6_select_ident(net, ip6_proxy_idents_hashrnd,
- &addrs[1], &addrs[0]);
+ id = __ipv6_select_ident(net, &addrs[1], &addrs[0]);
return htonl(id);
}
EXPORT_SYMBOL_GPL(ipv6_proxy_select_ident);
@@ -66,12 +71,9 @@ __be32 ipv6_select_ident(struct net *net,
const struct in6_addr *daddr,
const struct in6_addr *saddr)
{
- static u32 ip6_idents_hashrnd __read_mostly;
u32 id;
- net_get_random_once(&ip6_idents_hashrnd, sizeof(ip6_idents_hashrnd));
-
- id = __ipv6_select_ident(net, ip6_idents_hashrnd, daddr, saddr);
+ id = __ipv6_select_ident(net, daddr, saddr);
return htonl(id);
}
EXPORT_SYMBOL(ipv6_select_ident);
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 5e0efd3954e9..4856d9320b28 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -288,7 +288,9 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
/* Binding to link-local address requires an interface */
if (!sk->sk_bound_dev_if)
goto out_unlock;
+ }
+ if (sk->sk_bound_dev_if) {
err = -ENODEV;
dev = dev_get_by_index_rcu(sock_net(sk),
sk->sk_bound_dev_if);
@@ -780,6 +782,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
struct flowi6 fl6;
struct ipcm6_cookie ipc6;
int addr_len = msg->msg_namelen;
+ int hdrincl;
u16 proto;
int err;
@@ -793,6 +796,13 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (msg->msg_flags & MSG_OOB)
return -EOPNOTSUPP;
+ /* hdrincl should be READ_ONCE(inet->hdrincl)
+ * but READ_ONCE() doesn't work with bit fields.
+ * Doing this indirectly yields the same result.
+ */
+ hdrincl = inet->hdrincl;
+ hdrincl = READ_ONCE(hdrincl);
+
/*
* Get and verify the address.
*/
@@ -884,11 +894,14 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
opt = ipv6_fixup_options(&opt_space, opt);
fl6.flowi6_proto = proto;
- rfv.msg = msg;
- rfv.hlen = 0;
- err = rawv6_probe_proto_opt(&rfv, &fl6);
- if (err)
- goto out;
+
+ if (!hdrincl) {
+ rfv.msg = msg;
+ rfv.hlen = 0;
+ err = rawv6_probe_proto_opt(&rfv, &fl6);
+ if (err)
+ goto out;
+ }
if (!ipv6_addr_any(daddr))
fl6.daddr = *daddr;
@@ -905,7 +918,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
fl6.flowi6_oif = np->ucast_oif;
security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
- if (inet->hdrincl)
+ if (hdrincl)
fl6.flowi6_flags |= FLOWI_FLAG_KNOWN_NH;
if (ipc6.tclass < 0)
@@ -928,7 +941,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
goto do_confirm;
back_from_confirm:
- if (inet->hdrincl)
+ if (hdrincl)
err = rawv6_send_hdrinc(sk, msg, len, &fl6, &dst,
msg->msg_flags, &ipc6.sockc);
else {
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index d3fd2d7e5aa4..095825f964e2 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -69,8 +69,8 @@ static u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h)
static struct inet_frags ip6_frags;
-static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
- struct net_device *dev);
+static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb,
+ struct sk_buff *prev_tail, struct net_device *dev);
static void ip6_frag_expire(struct timer_list *t)
{
@@ -111,21 +111,26 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
struct frag_hdr *fhdr, int nhoff,
u32 *prob_offset)
{
- struct sk_buff *prev, *next;
- struct net_device *dev;
- int offset, end, fragsize;
struct net *net = dev_net(skb_dst(skb)->dev);
+ int offset, end, fragsize;
+ struct sk_buff *prev_tail;
+ struct net_device *dev;
+ int err = -ENOENT;
u8 ecn;
if (fq->q.flags & INET_FRAG_COMPLETE)
goto err;
+ err = -EINVAL;
offset = ntohs(fhdr->frag_off) & ~0x7;
end = offset + (ntohs(ipv6_hdr(skb)->payload_len) -
((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1)));
if ((unsigned int)end > IPV6_MAXPLEN) {
*prob_offset = (u8 *)&fhdr->frag_off - skb_network_header(skb);
+ /* note that if prob_offset is set, the skb is freed elsewhere,
+ * we do not free it here.
+ */
return -1;
}
@@ -145,7 +150,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
*/
if (end < fq->q.len ||
((fq->q.flags & INET_FRAG_LAST_IN) && end != fq->q.len))
- goto err;
+ goto discard_fq;
fq->q.flags |= INET_FRAG_LAST_IN;
fq->q.len = end;
} else {
@@ -162,70 +167,35 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
if (end > fq->q.len) {
/* Some bits beyond end -> corruption. */
if (fq->q.flags & INET_FRAG_LAST_IN)
- goto err;
+ goto discard_fq;
fq->q.len = end;
}
}
if (end == offset)
- goto err;
+ goto discard_fq;
+ err = -ENOMEM;
/* Point into the IP datagram 'data' part. */
if (!pskb_pull(skb, (u8 *) (fhdr + 1) - skb->data))
- goto err;
-
- if (pskb_trim_rcsum(skb, end - offset))
- goto err;
-
- /* Find out which fragments are in front and at the back of us
- * in the chain of fragments so far. We must know where to put
- * this fragment, right?
- */
- prev = fq->q.fragments_tail;
- if (!prev || prev->ip_defrag_offset < offset) {
- next = NULL;
- goto found;
- }
- prev = NULL;
- for (next = fq->q.fragments; next != NULL; next = next->next) {
- if (next->ip_defrag_offset >= offset)
- break; /* bingo! */
- prev = next;
- }
-
-found:
- /* RFC5722, Section 4, amended by Errata ID : 3089
- * When reassembling an IPv6 datagram, if
- * one or more its constituent fragments is determined to be an
- * overlapping fragment, the entire datagram (and any constituent
- * fragments) MUST be silently discarded.
- */
-
- /* Check for overlap with preceding fragment. */
- if (prev &&
- (prev->ip_defrag_offset + prev->len) > offset)
goto discard_fq;
- /* Look for overlap with succeeding segment. */
- if (next && next->ip_defrag_offset < end)
+ err = pskb_trim_rcsum(skb, end - offset);
+ if (err)
goto discard_fq;
- /* Note : skb->ip_defrag_offset and skb->dev share the same location */
+ /* Note : skb->rbnode and skb->dev share the same location. */
dev = skb->dev;
- if (dev)
- fq->iif = dev->ifindex;
/* Makes sure compiler wont do silly aliasing games */
barrier();
- skb->ip_defrag_offset = offset;
- /* Insert this fragment in the chain of fragments. */
- skb->next = next;
- if (!next)
- fq->q.fragments_tail = skb;
- if (prev)
- prev->next = skb;
- else
- fq->q.fragments = skb;
+ prev_tail = fq->q.fragments_tail;
+ err = inet_frag_queue_insert(&fq->q, skb, offset, end);
+ if (err)
+ goto insert_error;
+
+ if (dev)
+ fq->iif = dev->ifindex;
fq->q.stamp = skb->tstamp;
fq->q.meat += skb->len;
@@ -246,44 +216,48 @@ found:
if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
fq->q.meat == fq->q.len) {
- int res;
unsigned long orefdst = skb->_skb_refdst;
skb->_skb_refdst = 0UL;
- res = ip6_frag_reasm(fq, prev, dev);
+ err = ip6_frag_reasm(fq, skb, prev_tail, dev);
skb->_skb_refdst = orefdst;
- return res;
+ return err;
}
skb_dst_drop(skb);
- return -1;
+ return -EINPROGRESS;
+insert_error:
+ if (err == IPFRAG_DUP) {
+ kfree_skb(skb);
+ return -EINVAL;
+ }
+ err = -EINVAL;
+ __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_REASM_OVERLAPS);
discard_fq:
inet_frag_kill(&fq->q);
-err:
__IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
IPSTATS_MIB_REASMFAILS);
+err:
kfree_skb(skb);
- return -1;
+ return err;
}
/*
* Check if this packet is complete.
- * Returns NULL on failure by any reason, and pointer
- * to current nexthdr field in reassembled frame.
*
* It is called with locked fq, and caller must check that
* queue is eligible for reassembly i.e. it is not COMPLETE,
* the last and the first frames arrived and all the bits are here.
*/
-static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
- struct net_device *dev)
+static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb,
+ struct sk_buff *prev_tail, struct net_device *dev)
{
struct net *net = container_of(fq->q.net, struct net, ipv6.frags);
- struct sk_buff *fp, *head = fq->q.fragments;
- int payload_len, delta;
unsigned int nhoff;
- int sum_truesize;
+ void *reasm_data;
+ int payload_len;
u8 ecn;
inet_frag_kill(&fq->q);
@@ -292,120 +266,40 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
if (unlikely(ecn == 0xff))
goto out_fail;
- /* Make the one we just received the head. */
- if (prev) {
- head = prev->next;
- fp = skb_clone(head, GFP_ATOMIC);
-
- if (!fp)
- goto out_oom;
-
- fp->next = head->next;
- if (!fp->next)
- fq->q.fragments_tail = fp;
- prev->next = fp;
-
- skb_morph(head, fq->q.fragments);
- head->next = fq->q.fragments->next;
-
- consume_skb(fq->q.fragments);
- fq->q.fragments = head;
- }
-
- WARN_ON(head == NULL);
- WARN_ON(head->ip_defrag_offset != 0);
+ reasm_data = inet_frag_reasm_prepare(&fq->q, skb, prev_tail);
+ if (!reasm_data)
+ goto out_oom;
- /* Unfragmented part is taken from the first segment. */
- payload_len = ((head->data - skb_network_header(head)) -
+ payload_len = ((skb->data - skb_network_header(skb)) -
sizeof(struct ipv6hdr) + fq->q.len -
sizeof(struct frag_hdr));
if (payload_len > IPV6_MAXPLEN)
goto out_oversize;
- delta = - head->truesize;
-
- /* Head of list must not be cloned. */
- if (skb_unclone(head, GFP_ATOMIC))
- goto out_oom;
-
- delta += head->truesize;
- if (delta)
- add_frag_mem_limit(fq->q.net, delta);
-
- /* If the first fragment is fragmented itself, we split
- * it to two chunks: the first with data and paged part
- * and the second, holding only fragments. */
- if (skb_has_frag_list(head)) {
- struct sk_buff *clone;
- int i, plen = 0;
-
- clone = alloc_skb(0, GFP_ATOMIC);
- if (!clone)
- goto out_oom;
- clone->next = head->next;
- head->next = clone;
- skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
- skb_frag_list_init(head);
- for (i = 0; i < skb_shinfo(head)->nr_frags; i++)
- plen += skb_frag_size(&skb_shinfo(head)->frags[i]);
- clone->len = clone->data_len = head->data_len - plen;
- head->data_len -= clone->len;
- head->len -= clone->len;
- clone->csum = 0;
- clone->ip_summed = head->ip_summed;
- add_frag_mem_limit(fq->q.net, clone->truesize);
- }
-
/* We have to remove fragment header from datagram and to relocate
* header in order to calculate ICV correctly. */
nhoff = fq->nhoffset;
- skb_network_header(head)[nhoff] = skb_transport_header(head)[0];
- memmove(head->head + sizeof(struct frag_hdr), head->head,
- (head->data - head->head) - sizeof(struct frag_hdr));
- if (skb_mac_header_was_set(head))
- head->mac_header += sizeof(struct frag_hdr);
- head->network_header += sizeof(struct frag_hdr);
-
- skb_reset_transport_header(head);
- skb_push(head, head->data - skb_network_header(head));
-
- sum_truesize = head->truesize;
- for (fp = head->next; fp;) {
- bool headstolen;
- int delta;
- struct sk_buff *next = fp->next;
-
- sum_truesize += fp->truesize;
- if (head->ip_summed != fp->ip_summed)
- head->ip_summed = CHECKSUM_NONE;
- else if (head->ip_summed == CHECKSUM_COMPLETE)
- head->csum = csum_add(head->csum, fp->csum);
-
- if (skb_try_coalesce(head, fp, &headstolen, &delta)) {
- kfree_skb_partial(fp, headstolen);
- } else {
- if (!skb_shinfo(head)->frag_list)
- skb_shinfo(head)->frag_list = fp;
- head->data_len += fp->len;
- head->len += fp->len;
- head->truesize += fp->truesize;
- }
- fp = next;
- }
- sub_frag_mem_limit(fq->q.net, sum_truesize);
+ skb_network_header(skb)[nhoff] = skb_transport_header(skb)[0];
+ memmove(skb->head + sizeof(struct frag_hdr), skb->head,
+ (skb->data - skb->head) - sizeof(struct frag_hdr));
+ if (skb_mac_header_was_set(skb))
+ skb->mac_header += sizeof(struct frag_hdr);
+ skb->network_header += sizeof(struct frag_hdr);
+
+ skb_reset_transport_header(skb);
+
+ inet_frag_reasm_finish(&fq->q, skb, reasm_data);
- head->next = NULL;
- head->dev = dev;
- head->tstamp = fq->q.stamp;
- ipv6_hdr(head)->payload_len = htons(payload_len);
- ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn);
- IP6CB(head)->nhoff = nhoff;
- IP6CB(head)->flags |= IP6SKB_FRAGMENTED;
- IP6CB(head)->frag_max_size = fq->q.max_size;
+ skb->dev = dev;
+ ipv6_hdr(skb)->payload_len = htons(payload_len);
+ ipv6_change_dsfield(ipv6_hdr(skb), 0xff, ecn);
+ IP6CB(skb)->nhoff = nhoff;
+ IP6CB(skb)->flags |= IP6SKB_FRAGMENTED;
+ IP6CB(skb)->frag_max_size = fq->q.max_size;
/* Yes, and fold redundant checksum back. 8) */
- skb_postpush_rcsum(head, skb_network_header(head),
- skb_network_header_len(head));
+ skb_postpush_rcsum(skb, skb_network_header(skb),
+ skb_network_header_len(skb));
rcu_read_lock();
__IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMOKS);
@@ -413,6 +307,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
fq->q.fragments = NULL;
fq->q.rb_fragments = RB_ROOT;
fq->q.fragments_tail = NULL;
+ fq->q.last_run_head = NULL;
return 1;
out_oversize:
@@ -424,6 +319,7 @@ out_fail:
rcu_read_lock();
__IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
rcu_read_unlock();
+ inet_frag_kill(&fq->q);
return -1;
}
@@ -462,10 +358,6 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
return 1;
}
- if (skb->len - skb_network_offset(skb) < IPV6_MIN_MTU &&
- fhdr->frag_off & htons(IP6_MF))
- goto fail_hdr;
-
iif = skb->dev ? skb->dev->ifindex : 0;
fq = fq_find(net, fhdr->identification, hdr, iif);
if (fq) {
@@ -483,6 +375,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
if (prob_offset) {
__IP6_INC_STATS(net, __in6_dev_get_safely(skb->dev),
IPSTATS_MIB_INHDRERRORS);
+ /* icmpv6_param_prob() calls kfree_skb(skb) */
icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, prob_offset);
}
return ret;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index a33681dc4796..24f7b2cf504b 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -110,8 +110,8 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
int iif, int type, u32 portid, u32 seq,
unsigned int flags);
static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
- struct in6_addr *daddr,
- struct in6_addr *saddr);
+ const struct in6_addr *daddr,
+ const struct in6_addr *saddr);
#ifdef CONFIG_IPV6_ROUTE_INFO
static struct fib6_info *rt6_add_route_info(struct net *net,
@@ -210,7 +210,9 @@ struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw,
n = __ipv6_neigh_lookup(dev, daddr);
if (n)
return n;
- return neigh_create(&nd_tbl, daddr, dev);
+
+ n = neigh_create(&nd_tbl, daddr, dev);
+ return IS_ERR(n) ? NULL : n;
}
static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst,
@@ -380,11 +382,8 @@ static void ip6_dst_destroy(struct dst_entry *dst)
in6_dev_put(idev);
}
- rcu_read_lock();
- from = rcu_dereference(rt->from);
- rcu_assign_pointer(rt->from, NULL);
+ from = xchg((__force struct fib6_info **)&rt->from, NULL);
fib6_info_release(from);
- rcu_read_unlock();
}
static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
@@ -1046,14 +1045,20 @@ static struct rt6_info *ip6_create_rt_rcu(struct fib6_info *rt)
struct rt6_info *nrt;
if (!fib6_info_hold_safe(rt))
- return NULL;
+ goto fallback;
nrt = ip6_dst_alloc(dev_net(dev), dev, flags);
- if (nrt)
- ip6_rt_copy_init(nrt, rt);
- else
+ if (!nrt) {
fib6_info_release(rt);
+ goto fallback;
+ }
+
+ ip6_rt_copy_init(nrt, rt);
+ return nrt;
+fallback:
+ nrt = dev_net(dev)->ipv6.ip6_null_entry;
+ dst_hold(&nrt->dst);
return nrt;
}
@@ -1102,10 +1107,6 @@ restart:
dst_hold(&rt->dst);
} else {
rt = ip6_create_rt_rcu(f6i);
- if (!rt) {
- rt = net->ipv6.ip6_null_entry;
- dst_hold(&rt->dst);
- }
}
rcu_read_unlock();
@@ -1267,6 +1268,13 @@ static struct rt6_info *rt6_make_pcpu_route(struct net *net,
prev = cmpxchg(p, NULL, pcpu_rt);
BUG_ON(prev);
+ if (rt->fib6_destroying) {
+ struct fib6_info *from;
+
+ from = xchg((__force struct fib6_info **)&pcpu_rt->from, NULL);
+ fib6_info_release(from);
+ }
+
return pcpu_rt;
}
@@ -1280,18 +1288,27 @@ static DEFINE_SPINLOCK(rt6_exception_lock);
static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
struct rt6_exception *rt6_ex)
{
+ struct fib6_info *from;
struct net *net;
if (!bucket || !rt6_ex)
return;
net = dev_net(rt6_ex->rt6i->dst.dev);
+ net->ipv6.rt6_stats->fib_rt_cache--;
+
+ /* purge completely the exception to allow releasing the held resources:
+ * some [sk] cache may keep the dst around for unlimited time
+ */
+ from = xchg((__force struct fib6_info **)&rt6_ex->rt6i->from, NULL);
+ fib6_info_release(from);
+ dst_dev_put(&rt6_ex->rt6i->dst);
+
hlist_del_rcu(&rt6_ex->hlist);
dst_release(&rt6_ex->rt6i->dst);
kfree_rcu(rt6_ex, rcu);
WARN_ON_ONCE(!bucket->depth);
bucket->depth--;
- net->ipv6.rt6_stats->fib_rt_cache--;
}
/* Remove oldest rt6_ex in bucket and free the memory
@@ -1532,31 +1549,44 @@ out:
* Caller has to hold rcu_read_lock()
*/
static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
- struct in6_addr *daddr,
- struct in6_addr *saddr)
+ const struct in6_addr *daddr,
+ const struct in6_addr *saddr)
{
+ const struct in6_addr *src_key = NULL;
struct rt6_exception_bucket *bucket;
- struct in6_addr *src_key = NULL;
struct rt6_exception *rt6_ex;
struct rt6_info *res = NULL;
- bucket = rcu_dereference(rt->rt6i_exception_bucket);
-
#ifdef CONFIG_IPV6_SUBTREES
/* rt6i_src.plen != 0 indicates rt is in subtree
* and exception table is indexed by a hash of
* both rt6i_dst and rt6i_src.
- * Otherwise, the exception table is indexed by
- * a hash of only rt6i_dst.
+ * However, the src addr used to create the hash
+ * might not be exactly the passed in saddr which
+ * is a /128 addr from the flow.
+ * So we need to use f6i->fib6_src to redo lookup
+ * if the passed in saddr does not find anything.
+ * (See the logic in ip6_rt_cache_alloc() on how
+ * rt->rt6i_src is updated.)
*/
if (rt->fib6_src.plen)
src_key = saddr;
+find_ex:
#endif
+ bucket = rcu_dereference(rt->rt6i_exception_bucket);
rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
res = rt6_ex->rt6i;
+#ifdef CONFIG_IPV6_SUBTREES
+ /* Use fib6_src as src_key and redo lookup */
+ if (!res && src_key && src_key != &rt->fib6_src.addr) {
+ src_key = &rt->fib6_src.addr;
+ goto find_ex;
+ }
+#endif
+
return res;
}
@@ -1610,15 +1640,15 @@ static int rt6_remove_exception_rt(struct rt6_info *rt)
static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
{
struct rt6_exception_bucket *bucket;
- struct fib6_info *from = rt->from;
struct in6_addr *src_key = NULL;
struct rt6_exception *rt6_ex;
-
- if (!from ||
- !(rt->rt6i_flags & RTF_CACHE))
- return;
+ struct fib6_info *from;
rcu_read_lock();
+ from = rcu_dereference(rt->from);
+ if (!from || !(rt->rt6i_flags & RTF_CACHE))
+ goto unlock;
+
bucket = rcu_dereference(from->rt6i_exception_bucket);
#ifdef CONFIG_IPV6_SUBTREES
@@ -1637,6 +1667,7 @@ static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
if (rt6_ex)
rt6_ex->stamp = jiffies;
+unlock:
rcu_read_unlock();
}
@@ -2351,6 +2382,10 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
rcu_read_lock();
from = rcu_dereference(rt6->from);
+ if (!from) {
+ rcu_read_unlock();
+ return;
+ }
nrt6 = ip6_rt_cache_alloc(from, daddr, saddr);
if (nrt6) {
rt6_do_update_pmtu(nrt6, mtu);
@@ -2445,6 +2480,12 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
struct fib6_info *rt;
struct fib6_node *fn;
+ /* l3mdev_update_flow overrides oif if the device is enslaved; in
+ * this case we must match on the real ingress device, so reset it
+ */
+ if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
+ fl6->flowi6_oif = skb->dev->ifindex;
+
/* Get the "current" route for this destination and
* check if the redirect has come from appropriate router.
*
@@ -2635,10 +2676,8 @@ out:
u32 ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr,
struct in6_addr *saddr)
{
- struct rt6_exception_bucket *bucket;
- struct rt6_exception *rt6_ex;
- struct in6_addr *src_key;
struct inet6_dev *idev;
+ struct rt6_info *rt;
u32 mtu = 0;
if (unlikely(fib6_metric_locked(f6i, RTAX_MTU))) {
@@ -2647,18 +2686,10 @@ u32 ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr,
goto out;
}
- src_key = NULL;
-#ifdef CONFIG_IPV6_SUBTREES
- if (f6i->fib6_src.plen)
- src_key = saddr;
-#endif
-
- bucket = rcu_dereference(f6i->rt6i_exception_bucket);
- rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
- if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
- mtu = dst_metric_raw(&rt6_ex->rt6i->dst, RTAX_MTU);
-
- if (likely(!mtu)) {
+ rt = rt6_find_cached_rt(f6i, daddr, saddr);
+ if (unlikely(rt)) {
+ mtu = dst_metric_raw(&rt->dst, RTAX_MTU);
+ } else {
struct net_device *dev = fib6_info_nh_dev(f6i);
mtu = IPV6_MIN_MTU;
@@ -2794,20 +2825,24 @@ static int ip6_route_check_nh_onlink(struct net *net,
u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
const struct in6_addr *gw_addr = &cfg->fc_gateway;
u32 flags = RTF_LOCAL | RTF_ANYCAST | RTF_REJECT;
+ struct fib6_info *from;
struct rt6_info *grt;
int err;
err = 0;
grt = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0);
if (grt) {
+ rcu_read_lock();
+ from = rcu_dereference(grt->from);
if (!grt->dst.error &&
/* ignore match if it is the default route */
- grt->from && !ipv6_addr_any(&grt->from->fib6_dst.addr) &&
+ from && !ipv6_addr_any(&from->fib6_dst.addr) &&
(grt->rt6i_flags & flags || dev != grt->dst.dev)) {
NL_SET_ERR_MSG(extack,
"Nexthop has invalid gateway or device mismatch");
err = -EINVAL;
}
+ rcu_read_unlock();
ip6_rt_put(grt);
}
@@ -3430,11 +3465,8 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
rcu_read_lock();
from = rcu_dereference(rt->from);
- /* This fib6_info_hold() is safe here because we hold reference to rt
- * and rt already holds reference to fib6_info.
- */
- fib6_info_hold(from);
- rcu_read_unlock();
+ if (!from)
+ goto out;
nrt = ip6_rt_cache_alloc(from, &msg->dest, NULL);
if (!nrt)
@@ -3446,10 +3478,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
- /* No need to remove rt from the exception table if rt is
- * a cached route because rt6_insert_exception() will
- * takes care of it
- */
+ /* rt6_insert_exception() will take care of duplicated exceptions */
if (rt6_insert_exception(nrt, from)) {
dst_release_immediate(&nrt->dst);
goto out;
@@ -3462,7 +3491,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
out:
- fib6_info_release(from);
+ rcu_read_unlock();
neigh_release(neigh);
}
@@ -4211,6 +4240,10 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
cfg->fc_flags |= RTF_GATEWAY;
}
+ if (tb[RTA_VIA]) {
+ NL_SET_ERR_MSG(extack, "IPv6 does not support RTA_VIA attribute");
+ goto errout;
+ }
if (tb[RTA_DST]) {
int plen = (rtm->rtm_dst_len + 7) >> 3;
@@ -4704,7 +4737,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
table = rt->fib6_table->tb6_id;
else
table = RT6_TABLE_UNSPEC;
- rtm->rtm_table = table;
+ rtm->rtm_table = table < 256 ? table : RT_TABLE_COMPAT;
if (nla_put_u32(skb, RTA_TABLE, table))
goto nla_put_failure;
@@ -4905,7 +4938,8 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
if (tb[RTA_IP_PROTO]) {
err = rtm_getroute_parse_ip_proto(tb[RTA_IP_PROTO],
- &fl6.flowi6_proto, extack);
+ &fl6.flowi6_proto, AF_INET6,
+ extack);
if (err)
goto errout;
}
@@ -4962,16 +4996,20 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
rcu_read_lock();
from = rcu_dereference(rt->from);
-
- if (fibmatch)
- err = rt6_fill_node(net, skb, from, NULL, NULL, NULL, iif,
- RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
- nlh->nlmsg_seq, 0);
- else
- err = rt6_fill_node(net, skb, from, dst, &fl6.daddr,
- &fl6.saddr, iif, RTM_NEWROUTE,
- NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
- 0);
+ if (from) {
+ if (fibmatch)
+ err = rt6_fill_node(net, skb, from, NULL, NULL, NULL,
+ iif, RTM_NEWROUTE,
+ NETLINK_CB(in_skb).portid,
+ nlh->nlmsg_seq, 0);
+ else
+ err = rt6_fill_node(net, skb, from, dst, &fl6.daddr,
+ &fl6.saddr, iif, RTM_NEWROUTE,
+ NETLINK_CB(in_skb).portid,
+ nlh->nlmsg_seq, 0);
+ } else {
+ err = -ENETUNREACH;
+ }
rcu_read_unlock();
if (err < 0) {
diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c
index 8d0ba757a46c..9b2f272ca164 100644
--- a/net/ipv6/seg6.c
+++ b/net/ipv6/seg6.c
@@ -221,9 +221,7 @@ static int seg6_genl_get_tunsrc(struct sk_buff *skb, struct genl_info *info)
rcu_read_unlock();
genlmsg_end(msg, hdr);
- genlmsg_reply(msg, info);
-
- return 0;
+ return genlmsg_reply(msg, info);
nla_put_failure:
rcu_read_unlock();
diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
index 8181ee7e1e27..ee5403cbe655 100644
--- a/net/ipv6/seg6_iptunnel.c
+++ b/net/ipv6/seg6_iptunnel.c
@@ -146,6 +146,8 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
} else {
ip6_flow_hdr(hdr, 0, flowlabel);
hdr->hop_limit = ip6_dst_hoplimit(skb_dst(skb));
+
+ memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
}
hdr->nexthdr = NEXTHDR_ROUTING;
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index e9400ffa7875..41b3fe8ac3bc 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -546,7 +546,8 @@ static int ipip6_err(struct sk_buff *skb, u32 info)
}
err = 0;
- if (!ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4, type, data_len))
+ if (__in6_dev_get(skb->dev) &&
+ !ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4, type, data_len))
goto out;
if (t->parms.iph.daddr == 0)
@@ -668,6 +669,10 @@ static int ipip6_rcv(struct sk_buff *skb)
!net_eq(tunnel->net, dev_net(tunnel->dev))))
goto out;
+ /* skb can be uncloned in iptunnel_pull_header, so
+ * old iph is no longer valid
+ */
+ iph = (const struct iphdr *)skb_mac_header(skb);
err = IP_ECN_decapsulate(iph, skb);
if (unlikely(err)) {
if (log_ecn_error)
@@ -777,8 +782,9 @@ static bool check_6rd(struct ip_tunnel *tunnel, const struct in6_addr *v6dst,
pbw0 = tunnel->ip6rd.prefixlen >> 5;
pbi0 = tunnel->ip6rd.prefixlen & 0x1f;
- d = (ntohl(v6dst->s6_addr32[pbw0]) << pbi0) >>
- tunnel->ip6rd.relay_prefixlen;
+ d = tunnel->ip6rd.relay_prefixlen < 32 ?
+ (ntohl(v6dst->s6_addr32[pbw0]) << pbi0) >>
+ tunnel->ip6rd.relay_prefixlen : 0;
pbi1 = pbi0 - tunnel->ip6rd.relay_prefixlen;
if (pbi1 > 0)
@@ -1021,6 +1027,9 @@ tx_error:
static netdev_tx_t sit_tunnel_xmit(struct sk_buff *skb,
struct net_device *dev)
{
+ if (!pskb_inet_may_pull(skb))
+ goto tx_err;
+
switch (skb->protocol) {
case htons(ETH_P_IP):
sit_tunnel_xmit__(skb, dev, IPPROTO_IPIP);
@@ -1075,7 +1084,7 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev)
if (!tdev && tunnel->parms.link)
tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
- if (tdev) {
+ if (tdev && !netif_is_l3_master(tdev)) {
int t_hlen = tunnel->hlen + sizeof(struct iphdr);
dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
@@ -1869,6 +1878,7 @@ static int __net_init sit_init_net(struct net *net)
err_reg_dev:
ipip6_dev_free(sitn->fb_tunnel_dev);
+ free_netdev(sitn->fb_tunnel_dev);
err_alloc_dev:
return err;
}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 03e6b7a2bc53..e7cdfa92c382 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1108,11 +1108,11 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
newnp->ipv6_fl_list = NULL;
newnp->pktoptions = NULL;
newnp->opt = NULL;
- newnp->mcast_oif = tcp_v6_iif(skb);
- newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
- newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
+ newnp->mcast_oif = inet_iif(skb);
+ newnp->mcast_hops = ip_hdr(skb)->ttl;
+ newnp->rcv_flowinfo = 0;
if (np->repflow)
- newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
+ newnp->flow_label = 0;
/*
* No need to charge this sock to the relevant IPv6 refcnt debug socks count
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index b36694b6716e..cab8b2b647f9 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1056,15 +1056,23 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6,
const int hlen = skb_network_header_len(skb) +
sizeof(struct udphdr);
- if (hlen + cork->gso_size > cork->fragsize)
+ if (hlen + cork->gso_size > cork->fragsize) {
+ kfree_skb(skb);
return -EINVAL;
- if (skb->len > cork->gso_size * UDP_MAX_SEGMENTS)
+ }
+ if (skb->len > cork->gso_size * UDP_MAX_SEGMENTS) {
+ kfree_skb(skb);
return -EINVAL;
- if (udp_sk(sk)->no_check6_tx)
+ }
+ if (udp_sk(sk)->no_check6_tx) {
+ kfree_skb(skb);
return -EINVAL;
+ }
if (skb->ip_summed != CHECKSUM_PARTIAL || is_udplite ||
- dst_xfrm(skb_dst(skb)))
+ dst_xfrm(skb_dst(skb))) {
+ kfree_skb(skb);
return -EIO;
+ }
skb_shinfo(skb)->gso_size = cork->gso_size;
skb_shinfo(skb)->gso_type = SKB_GSO_UDP_L4;
@@ -1314,10 +1322,7 @@ do_udp_sendmsg:
ipc6.opt = opt;
fl6.flowi6_proto = sk->sk_protocol;
- if (!ipv6_addr_any(daddr))
- fl6.daddr = *daddr;
- else
- fl6.daddr.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */
+ fl6.daddr = *daddr;
if (ipv6_addr_any(&fl6.saddr) && !ipv6_addr_any(&np->saddr))
fl6.saddr = np->saddr;
fl6.fl6_sport = inet->inet_sport;
@@ -1345,6 +1350,9 @@ do_udp_sendmsg:
}
}
+ if (ipv6_addr_any(&fl6.daddr))
+ fl6.daddr.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */
+
final_p = fl6_update_dst(&fl6, opt, &final);
if (final_p)
connected = false;
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index 4a46df8441c9..d9e5f6808811 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -144,6 +144,9 @@ static u32 __xfrm6_tunnel_alloc_spi(struct net *net, xfrm_address_t *saddr)
index = __xfrm6_tunnel_spi_check(net, spi);
if (index >= 0)
goto alloc_spi;
+
+ if (spi == XFRM6_TUNNEL_SPI_MAX)
+ break;
}
for (spi = XFRM6_TUNNEL_SPI_MIN; spi < xfrm6_tn->spi; spi++) {
index = __xfrm6_tunnel_spi_check(net, spi);
@@ -341,8 +344,8 @@ static void __net_exit xfrm6_tunnel_net_exit(struct net *net)
struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net);
unsigned int i;
- xfrm_state_flush(net, IPSEC_PROTO_ANY, false);
xfrm_flush_gc();
+ xfrm_state_flush(net, 0, false, true);
for (i = 0; i < XFRM6_TUNNEL_SPI_BYADDR_HSIZE; i++)
WARN_ON_ONCE(!hlist_empty(&xfrm6_tn->spi_byaddr[i]));
@@ -399,6 +402,10 @@ static void __exit xfrm6_tunnel_fini(void)
xfrm6_tunnel_deregister(&xfrm6_tunnel_handler, AF_INET6);
xfrm_unregister_type(&xfrm6_tunnel_type, AF_INET6);
unregister_pernet_subsys(&xfrm6_tunnel_net_ops);
+ /* Someone maybe has gotten the xfrm6_tunnel_spi.
+ * So need to wait it.
+ */
+ rcu_barrier();
kmem_cache_destroy(xfrm6_tunnel_spi_kmem);
}
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index 571d824e4e24..b919db02c7f9 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -2054,14 +2054,14 @@ static int __init kcm_init(void)
if (err)
goto fail;
- err = sock_register(&kcm_family_ops);
- if (err)
- goto sock_register_fail;
-
err = register_pernet_device(&kcm_net_ops);
if (err)
goto net_ops_fail;
+ err = sock_register(&kcm_family_ops);
+ if (err)
+ goto sock_register_fail;
+
err = kcm_proc_init();
if (err)
goto proc_init_fail;
@@ -2069,12 +2069,12 @@ static int __init kcm_init(void)
return 0;
proc_init_fail:
- unregister_pernet_device(&kcm_net_ops);
-
-net_ops_fail:
sock_unregister(PF_KCM);
sock_register_fail:
+ unregister_pernet_device(&kcm_net_ops);
+
+net_ops_fail:
proto_unregister(&kcm_proto);
fail:
@@ -2090,8 +2090,8 @@ fail:
static void __exit kcm_exit(void)
{
kcm_proc_exit();
- unregister_pernet_device(&kcm_net_ops);
sock_unregister(PF_KCM);
+ unregister_pernet_device(&kcm_net_ops);
proto_unregister(&kcm_proto);
destroy_workqueue(kcm_wq);
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 9d61266526e7..0b79c9aa8eb1 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -196,30 +196,22 @@ static int pfkey_release(struct socket *sock)
return 0;
}
-static int pfkey_broadcast_one(struct sk_buff *skb, struct sk_buff **skb2,
- gfp_t allocation, struct sock *sk)
+static int pfkey_broadcast_one(struct sk_buff *skb, gfp_t allocation,
+ struct sock *sk)
{
int err = -ENOBUFS;
- sock_hold(sk);
- if (*skb2 == NULL) {
- if (refcount_read(&skb->users) != 1) {
- *skb2 = skb_clone(skb, allocation);
- } else {
- *skb2 = skb;
- refcount_inc(&skb->users);
- }
- }
- if (*skb2 != NULL) {
- if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) {
- skb_set_owner_r(*skb2, sk);
- skb_queue_tail(&sk->sk_receive_queue, *skb2);
- sk->sk_data_ready(sk);
- *skb2 = NULL;
- err = 0;
- }
+ if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
+ return err;
+
+ skb = skb_clone(skb, allocation);
+
+ if (skb) {
+ skb_set_owner_r(skb, sk);
+ skb_queue_tail(&sk->sk_receive_queue, skb);
+ sk->sk_data_ready(sk);
+ err = 0;
}
- sock_put(sk);
return err;
}
@@ -234,7 +226,6 @@ static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation,
{
struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id);
struct sock *sk;
- struct sk_buff *skb2 = NULL;
int err = -ESRCH;
/* XXX Do we need something like netlink_overrun? I think
@@ -253,7 +244,7 @@ static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation,
* socket.
*/
if (pfk->promisc)
- pfkey_broadcast_one(skb, &skb2, GFP_ATOMIC, sk);
+ pfkey_broadcast_one(skb, GFP_ATOMIC, sk);
/* the exact target will be processed later */
if (sk == one_sk)
@@ -268,7 +259,7 @@ static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation,
continue;
}
- err2 = pfkey_broadcast_one(skb, &skb2, GFP_ATOMIC, sk);
+ err2 = pfkey_broadcast_one(skb, GFP_ATOMIC, sk);
/* Error is cleared after successful sending to at least one
* registered KM */
@@ -278,9 +269,8 @@ static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation,
rcu_read_unlock();
if (one_sk != NULL)
- err = pfkey_broadcast_one(skb, &skb2, allocation, one_sk);
+ err = pfkey_broadcast_one(skb, allocation, one_sk);
- kfree_skb(skb2);
kfree_skb(skb);
return err;
}
@@ -1783,7 +1773,7 @@ static int pfkey_flush(struct sock *sk, struct sk_buff *skb, const struct sadb_m
if (proto == 0)
return -EINVAL;
- err = xfrm_state_flush(net, proto, true);
+ err = xfrm_state_flush(net, proto, true, false);
err2 = unicast_flush_resp(sk, hdr);
if (err || err2) {
if (err == -ESRCH) /* empty table - go quietly */
@@ -1961,8 +1951,10 @@ parse_ipsecrequest(struct xfrm_policy *xp, struct sadb_x_ipsecrequest *rq)
if (rq->sadb_x_ipsecrequest_mode == 0)
return -EINVAL;
+ if (!xfrm_id_proto_valid(rq->sadb_x_ipsecrequest_proto))
+ return -EINVAL;
- t->id.proto = rq->sadb_x_ipsecrequest_proto; /* XXX check proto */
+ t->id.proto = rq->sadb_x_ipsecrequest_proto;
if ((mode = pfkey_mode_to_xfrm(rq->sadb_x_ipsecrequest_mode)) < 0)
return -EINVAL;
t->mode = mode;
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 26f1d435696a..52b5a2797c0c 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -83,8 +83,7 @@
#define L2TP_SLFLAG_S 0x40000000
#define L2TP_SL_SEQ_MASK 0x00ffffff
-#define L2TP_HDR_SIZE_SEQ 10
-#define L2TP_HDR_SIZE_NOSEQ 6
+#define L2TP_HDR_SIZE_MAX 14
/* Default trace flags */
#define L2TP_DEFAULT_DEBUG_FLAGS 0
@@ -170,8 +169,8 @@ struct l2tp_tunnel *l2tp_tunnel_get(const struct net *net, u32 tunnel_id)
rcu_read_lock_bh();
list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) {
- if (tunnel->tunnel_id == tunnel_id) {
- l2tp_tunnel_inc_refcount(tunnel);
+ if (tunnel->tunnel_id == tunnel_id &&
+ refcount_inc_not_zero(&tunnel->ref_count)) {
rcu_read_unlock_bh();
return tunnel;
@@ -191,8 +190,8 @@ struct l2tp_tunnel *l2tp_tunnel_get_nth(const struct net *net, int nth)
rcu_read_lock_bh();
list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) {
- if (++count > nth) {
- l2tp_tunnel_inc_refcount(tunnel);
+ if (++count > nth &&
+ refcount_inc_not_zero(&tunnel->ref_count)) {
rcu_read_unlock_bh();
return tunnel;
}
@@ -808,7 +807,7 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb)
__skb_pull(skb, sizeof(struct udphdr));
/* Short packet? */
- if (!pskb_may_pull(skb, L2TP_HDR_SIZE_SEQ)) {
+ if (!pskb_may_pull(skb, L2TP_HDR_SIZE_MAX)) {
l2tp_info(tunnel, L2TP_MSG_DATA,
"%s: recv short packet (len=%d)\n",
tunnel->name, skb->len);
@@ -884,6 +883,10 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb)
goto error;
}
+ if (tunnel->version == L2TP_HDR_VER_3 &&
+ l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr))
+ goto error;
+
l2tp_recv_common(session, skb, ptr, optr, hdrflags, length);
l2tp_session_dec_refcount(session);
@@ -906,7 +909,7 @@ int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
{
struct l2tp_tunnel *tunnel;
- tunnel = l2tp_tunnel(sk);
+ tunnel = rcu_dereference_sk_user_data(sk);
if (tunnel == NULL)
goto pass_up;
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index 9c9afe94d389..b2ce90260c35 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -301,6 +301,26 @@ static inline bool l2tp_tunnel_uses_xfrm(const struct l2tp_tunnel *tunnel)
}
#endif
+static inline int l2tp_v3_ensure_opt_in_linear(struct l2tp_session *session, struct sk_buff *skb,
+ unsigned char **ptr, unsigned char **optr)
+{
+ int opt_len = session->peer_cookie_len + l2tp_get_l2specific_len(session);
+
+ if (opt_len > 0) {
+ int off = *ptr - *optr;
+
+ if (!pskb_may_pull(skb, off + opt_len))
+ return -1;
+
+ if (skb->data != *optr) {
+ *optr = skb->data;
+ *ptr = skb->data + off;
+ }
+ }
+
+ return 0;
+}
+
#define l2tp_printk(ptr, type, func, fmt, ...) \
do { \
if (((ptr)->debug) & (type)) \
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 35f6f86d4dcc..d4c60523c549 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -165,6 +165,9 @@ static int l2tp_ip_recv(struct sk_buff *skb)
print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, ptr, length);
}
+ if (l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr))
+ goto discard_sess;
+
l2tp_recv_common(session, skb, ptr, optr, 0, skb->len);
l2tp_session_dec_refcount(session);
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 237f1a4a0b0c..37a69df17cab 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -178,6 +178,9 @@ static int l2tp_ip6_recv(struct sk_buff *skb)
print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, ptr, length);
}
+ if (l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr))
+ goto discard_sess;
+
l2tp_recv_common(session, skb, ptr, optr, 0, skb->len);
l2tp_session_dec_refcount(session);
@@ -671,9 +674,6 @@ static int l2tp_ip6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
if (flags & MSG_OOB)
goto out;
- if (addr_len)
- *addr_len = sizeof(*lsa);
-
if (flags & MSG_ERRQUEUE)
return ipv6_recv_error(sk, msg, len, addr_len);
@@ -703,6 +703,7 @@ static int l2tp_ip6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
lsa->l2tp_conn_id = 0;
if (ipv6_addr_type(&lsa->l2tp_addr) & IPV6_ADDR_LINKLOCAL)
lsa->l2tp_scope_id = inet6_iif(skb);
+ *addr_len = sizeof(*lsa);
}
if (np->rxopt.all)
diff --git a/net/llc/llc_output.c b/net/llc/llc_output.c
index 94425e421213..9e4b6bcf6920 100644
--- a/net/llc/llc_output.c
+++ b/net/llc/llc_output.c
@@ -72,6 +72,8 @@ int llc_build_and_send_ui_pkt(struct llc_sap *sap, struct sk_buff *skb,
rc = llc_mac_hdr_init(skb, skb->dev->dev_addr, dmac);
if (likely(!rc))
rc = dev_queue_xmit(skb);
+ else
+ kfree_skb(skb);
return rc;
}
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index 69e831bc317b..54821fb1a960 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -8,7 +8,7 @@
* Copyright 2007, Michael Wu <flamingice@sourmilk.net>
* Copyright 2007-2010, Intel Corporation
* Copyright(c) 2015-2017 Intel Deutschland GmbH
- * Copyright (C) 2018 Intel Corporation
+ * Copyright (C) 2018 - 2019 Intel Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -366,6 +366,8 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
set_bit(HT_AGG_STATE_STOPPING, &tid_tx->state);
+ ieee80211_agg_stop_txq(sta, tid);
+
spin_unlock_bh(&sta->lock);
ht_dbg(sta->sdata, "Tx BA session stop requested for %pM tid %u\n",
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 5d22eda8a6b1..40c510223467 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -887,6 +887,7 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
BSS_CHANGED_P2P_PS |
BSS_CHANGED_TXPOWER;
int err;
+ int prev_beacon_int;
old = sdata_dereference(sdata->u.ap.beacon, sdata);
if (old)
@@ -909,6 +910,7 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
sdata->needed_rx_chains = sdata->local->rx_chains;
+ prev_beacon_int = sdata->vif.bss_conf.beacon_int;
sdata->vif.bss_conf.beacon_int = params->beacon_interval;
mutex_lock(&local->mtx);
@@ -917,8 +919,10 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
if (!err)
ieee80211_vif_copy_chanctx_to_vlans(sdata, false);
mutex_unlock(&local->mtx);
- if (err)
+ if (err) {
+ sdata->vif.bss_conf.beacon_int = prev_beacon_int;
return err;
+ }
/*
* Apply control port protocol, this allows us to
@@ -1474,6 +1478,10 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev,
if (params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER))
sta->sta.tdls = true;
+ if (sta->sta.tdls && sdata->vif.type == NL80211_IFTYPE_STATION &&
+ !sdata->u.mgd.associated)
+ return -EINVAL;
+
err = sta_apply_parameters(local, sta, params);
if (err) {
sta_info_free(local, sta);
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index c813207bb123..d37d4acafebf 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -838,7 +838,7 @@ void ieee80211_debugfs_rename_netdev(struct ieee80211_sub_if_data *sdata)
dir = sdata->vif.debugfs_dir;
- if (!dir)
+ if (IS_ERR_OR_NULL(dir))
return;
sprintf(buf, "netdev:%s", sdata->name);
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 8f6998091d26..2123f6e90fc0 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -1166,6 +1166,9 @@ static inline void drv_wake_tx_queue(struct ieee80211_local *local,
{
struct ieee80211_sub_if_data *sdata = vif_to_sdata(txq->txq.vif);
+ if (local->in_reconfig)
+ return;
+
if (!check_sdata_in_driver(sdata))
return;
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 5f3c81e705c7..152d4365f961 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -7,6 +7,7 @@
* Copyright 2008, Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright (c) 2016 Intel Deutschland GmbH
+ * Copyright (C) 2018 Intel Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -1909,6 +1910,9 @@ void ieee80211_if_remove(struct ieee80211_sub_if_data *sdata)
list_del_rcu(&sdata->list);
mutex_unlock(&sdata->local->iflist_mtx);
+ if (sdata->vif.txq)
+ ieee80211_txq_purge(sdata->local, to_txq_info(sdata->vif.txq));
+
synchronize_rcu();
if (sdata->dev) {
@@ -1951,6 +1955,8 @@ void ieee80211_remove_interfaces(struct ieee80211_local *local)
WARN(local->open_count, "%s: open count remains %d\n",
wiphy_name(local->hw.wiphy), local->open_count);
+ ieee80211_txq_teardown_flows(local);
+
mutex_lock(&local->iflist_mtx);
list_for_each_entry_safe(sdata, tmp, &local->interfaces, list) {
list_del(&sdata->list);
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index c054ac85793c..f20bb39f492d 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -167,8 +167,10 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
* The driver doesn't know anything about VLAN interfaces.
* Hence, don't send GTKs for VLAN interfaces to the driver.
*/
- if (!(key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE))
+ if (!(key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE)) {
+ ret = 1;
goto out_unsupported;
+ }
}
ret = drv_set_key(key->local, SET_KEY, sdata,
@@ -213,11 +215,8 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
/* all of these we can do in software - if driver can */
if (ret == 1)
return 0;
- if (ieee80211_hw_check(&key->local->hw, SW_CRYPTO_CONTROL)) {
- if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
- return 0;
+ if (ieee80211_hw_check(&key->local->hw, SW_CRYPTO_CONTROL))
return -EINVAL;
- }
return 0;
default:
return -EINVAL;
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 513627896204..68db2a356443 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -1198,7 +1198,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
rtnl_unlock();
ieee80211_led_exit(local);
ieee80211_wep_free(local);
- ieee80211_txq_teardown_flows(local);
fail_flows:
destroy_workqueue(local->workqueue);
fail_workqueue:
@@ -1224,7 +1223,6 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw)
#if IS_ENABLED(CONFIG_IPV6)
unregister_inet6addr_notifier(&local->ifa6_notifier);
#endif
- ieee80211_txq_teardown_flows(local);
rtnl_lock();
diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
index 21526630bf65..e84103b40534 100644
--- a/net/mac80211/mesh.h
+++ b/net/mac80211/mesh.h
@@ -70,6 +70,7 @@ enum mesh_deferred_task_flags {
* @dst: mesh path destination mac address
* @mpp: mesh proxy mac address
* @rhash: rhashtable list pointer
+ * @walk_list: linked list containing all mesh_path objects.
* @gate_list: list pointer for known gates list
* @sdata: mesh subif
* @next_hop: mesh neighbor to which frames for this destination will be
@@ -105,6 +106,7 @@ struct mesh_path {
u8 dst[ETH_ALEN];
u8 mpp[ETH_ALEN]; /* used for MPP or MAP */
struct rhash_head rhash;
+ struct hlist_node walk_list;
struct hlist_node gate_list;
struct ieee80211_sub_if_data *sdata;
struct sta_info __rcu *next_hop;
@@ -133,12 +135,16 @@ struct mesh_path {
* gate's mpath may or may not be resolved and active.
* @gates_lock: protects updates to known_gates
* @rhead: the rhashtable containing struct mesh_paths, keyed by dest addr
+ * @walk_head: linked list containging all mesh_path objects
+ * @walk_lock: lock protecting walk_head
* @entries: number of entries in the table
*/
struct mesh_table {
struct hlist_head known_gates;
spinlock_t gates_lock;
struct rhashtable rhead;
+ struct hlist_head walk_head;
+ spinlock_t walk_lock;
atomic_t entries; /* Up to MAX_MESH_NEIGHBOURS */
};
diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index a5125624a76d..49a90217622b 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -23,7 +23,7 @@ static void mesh_path_free_rcu(struct mesh_table *tbl, struct mesh_path *mpath);
static u32 mesh_table_hash(const void *addr, u32 len, u32 seed)
{
/* Use last four bytes of hw addr as hash index */
- return jhash_1word(*(u32 *)(addr+2), seed);
+ return jhash_1word(__get_unaligned_cpu32((u8 *)addr + 2), seed);
}
static const struct rhashtable_params mesh_rht_params = {
@@ -59,8 +59,10 @@ static struct mesh_table *mesh_table_alloc(void)
return NULL;
INIT_HLIST_HEAD(&newtbl->known_gates);
+ INIT_HLIST_HEAD(&newtbl->walk_head);
atomic_set(&newtbl->entries, 0);
spin_lock_init(&newtbl->gates_lock);
+ spin_lock_init(&newtbl->walk_lock);
return newtbl;
}
@@ -249,28 +251,15 @@ mpp_path_lookup(struct ieee80211_sub_if_data *sdata, const u8 *dst)
static struct mesh_path *
__mesh_path_lookup_by_idx(struct mesh_table *tbl, int idx)
{
- int i = 0, ret;
- struct mesh_path *mpath = NULL;
- struct rhashtable_iter iter;
-
- ret = rhashtable_walk_init(&tbl->rhead, &iter, GFP_ATOMIC);
- if (ret)
- return NULL;
-
- rhashtable_walk_start(&iter);
+ int i = 0;
+ struct mesh_path *mpath;
- while ((mpath = rhashtable_walk_next(&iter))) {
- if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN)
- continue;
- if (IS_ERR(mpath))
- break;
+ hlist_for_each_entry_rcu(mpath, &tbl->walk_head, walk_list) {
if (i++ == idx)
break;
}
- rhashtable_walk_stop(&iter);
- rhashtable_walk_exit(&iter);
- if (IS_ERR(mpath) || !mpath)
+ if (!mpath)
return NULL;
if (mpath_expired(mpath)) {
@@ -432,6 +421,7 @@ struct mesh_path *mesh_path_add(struct ieee80211_sub_if_data *sdata,
return ERR_PTR(-ENOMEM);
tbl = sdata->u.mesh.mesh_paths;
+ spin_lock_bh(&tbl->walk_lock);
do {
ret = rhashtable_lookup_insert_fast(&tbl->rhead,
&new_mpath->rhash,
@@ -441,20 +431,20 @@ struct mesh_path *mesh_path_add(struct ieee80211_sub_if_data *sdata,
mpath = rhashtable_lookup_fast(&tbl->rhead,
dst,
mesh_rht_params);
-
+ else if (!ret)
+ hlist_add_head(&new_mpath->walk_list, &tbl->walk_head);
} while (unlikely(ret == -EEXIST && !mpath));
+ spin_unlock_bh(&tbl->walk_lock);
- if (ret && ret != -EEXIST)
- return ERR_PTR(ret);
-
- /* At this point either new_mpath was added, or we found a
- * matching entry already in the table; in the latter case
- * free the unnecessary new entry.
- */
- if (ret == -EEXIST) {
+ if (ret) {
kfree(new_mpath);
+
+ if (ret != -EEXIST)
+ return ERR_PTR(ret);
+
new_mpath = mpath;
}
+
sdata->u.mesh.mesh_paths_generation++;
return new_mpath;
}
@@ -480,9 +470,17 @@ int mpp_path_add(struct ieee80211_sub_if_data *sdata,
memcpy(new_mpath->mpp, mpp, ETH_ALEN);
tbl = sdata->u.mesh.mpp_paths;
+
+ spin_lock_bh(&tbl->walk_lock);
ret = rhashtable_lookup_insert_fast(&tbl->rhead,
&new_mpath->rhash,
mesh_rht_params);
+ if (!ret)
+ hlist_add_head_rcu(&new_mpath->walk_list, &tbl->walk_head);
+ spin_unlock_bh(&tbl->walk_lock);
+
+ if (ret)
+ kfree(new_mpath);
sdata->u.mesh.mpp_paths_generation++;
return ret;
@@ -503,20 +501,9 @@ void mesh_plink_broken(struct sta_info *sta)
struct mesh_table *tbl = sdata->u.mesh.mesh_paths;
static const u8 bcast[ETH_ALEN] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
struct mesh_path *mpath;
- struct rhashtable_iter iter;
- int ret;
-
- ret = rhashtable_walk_init(&tbl->rhead, &iter, GFP_ATOMIC);
- if (ret)
- return;
- rhashtable_walk_start(&iter);
-
- while ((mpath = rhashtable_walk_next(&iter))) {
- if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN)
- continue;
- if (IS_ERR(mpath))
- break;
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(mpath, &tbl->walk_head, walk_list) {
if (rcu_access_pointer(mpath->next_hop) == sta &&
mpath->flags & MESH_PATH_ACTIVE &&
!(mpath->flags & MESH_PATH_FIXED)) {
@@ -530,8 +517,7 @@ void mesh_plink_broken(struct sta_info *sta)
WLAN_REASON_MESH_PATH_DEST_UNREACHABLE, bcast);
}
}
- rhashtable_walk_stop(&iter);
- rhashtable_walk_exit(&iter);
+ rcu_read_unlock();
}
static void mesh_path_free_rcu(struct mesh_table *tbl,
@@ -551,6 +537,7 @@ static void mesh_path_free_rcu(struct mesh_table *tbl,
static void __mesh_path_del(struct mesh_table *tbl, struct mesh_path *mpath)
{
+ hlist_del_rcu(&mpath->walk_list);
rhashtable_remove_fast(&tbl->rhead, &mpath->rhash, mesh_rht_params);
mesh_path_free_rcu(tbl, mpath);
}
@@ -571,27 +558,14 @@ void mesh_path_flush_by_nexthop(struct sta_info *sta)
struct ieee80211_sub_if_data *sdata = sta->sdata;
struct mesh_table *tbl = sdata->u.mesh.mesh_paths;
struct mesh_path *mpath;
- struct rhashtable_iter iter;
- int ret;
-
- ret = rhashtable_walk_init(&tbl->rhead, &iter, GFP_ATOMIC);
- if (ret)
- return;
-
- rhashtable_walk_start(&iter);
-
- while ((mpath = rhashtable_walk_next(&iter))) {
- if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN)
- continue;
- if (IS_ERR(mpath))
- break;
+ struct hlist_node *n;
+ spin_lock_bh(&tbl->walk_lock);
+ hlist_for_each_entry_safe(mpath, n, &tbl->walk_head, walk_list) {
if (rcu_access_pointer(mpath->next_hop) == sta)
__mesh_path_del(tbl, mpath);
}
-
- rhashtable_walk_stop(&iter);
- rhashtable_walk_exit(&iter);
+ spin_unlock_bh(&tbl->walk_lock);
}
static void mpp_flush_by_proxy(struct ieee80211_sub_if_data *sdata,
@@ -599,51 +573,26 @@ static void mpp_flush_by_proxy(struct ieee80211_sub_if_data *sdata,
{
struct mesh_table *tbl = sdata->u.mesh.mpp_paths;
struct mesh_path *mpath;
- struct rhashtable_iter iter;
- int ret;
-
- ret = rhashtable_walk_init(&tbl->rhead, &iter, GFP_ATOMIC);
- if (ret)
- return;
-
- rhashtable_walk_start(&iter);
-
- while ((mpath = rhashtable_walk_next(&iter))) {
- if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN)
- continue;
- if (IS_ERR(mpath))
- break;
+ struct hlist_node *n;
+ spin_lock_bh(&tbl->walk_lock);
+ hlist_for_each_entry_safe(mpath, n, &tbl->walk_head, walk_list) {
if (ether_addr_equal(mpath->mpp, proxy))
__mesh_path_del(tbl, mpath);
}
-
- rhashtable_walk_stop(&iter);
- rhashtable_walk_exit(&iter);
+ spin_unlock_bh(&tbl->walk_lock);
}
static void table_flush_by_iface(struct mesh_table *tbl)
{
struct mesh_path *mpath;
- struct rhashtable_iter iter;
- int ret;
-
- ret = rhashtable_walk_init(&tbl->rhead, &iter, GFP_ATOMIC);
- if (ret)
- return;
-
- rhashtable_walk_start(&iter);
+ struct hlist_node *n;
- while ((mpath = rhashtable_walk_next(&iter))) {
- if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN)
- continue;
- if (IS_ERR(mpath))
- break;
+ spin_lock_bh(&tbl->walk_lock);
+ hlist_for_each_entry_safe(mpath, n, &tbl->walk_head, walk_list) {
__mesh_path_del(tbl, mpath);
}
-
- rhashtable_walk_stop(&iter);
- rhashtable_walk_exit(&iter);
+ spin_unlock_bh(&tbl->walk_lock);
}
/**
@@ -675,7 +624,7 @@ static int table_path_del(struct mesh_table *tbl,
{
struct mesh_path *mpath;
- rcu_read_lock();
+ spin_lock_bh(&tbl->walk_lock);
mpath = rhashtable_lookup_fast(&tbl->rhead, addr, mesh_rht_params);
if (!mpath) {
rcu_read_unlock();
@@ -683,7 +632,7 @@ static int table_path_del(struct mesh_table *tbl,
}
__mesh_path_del(tbl, mpath);
- rcu_read_unlock();
+ spin_unlock_bh(&tbl->walk_lock);
return 0;
}
@@ -854,28 +803,16 @@ void mesh_path_tbl_expire(struct ieee80211_sub_if_data *sdata,
struct mesh_table *tbl)
{
struct mesh_path *mpath;
- struct rhashtable_iter iter;
- int ret;
+ struct hlist_node *n;
- ret = rhashtable_walk_init(&tbl->rhead, &iter, GFP_KERNEL);
- if (ret)
- return;
-
- rhashtable_walk_start(&iter);
-
- while ((mpath = rhashtable_walk_next(&iter))) {
- if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN)
- continue;
- if (IS_ERR(mpath))
- break;
+ spin_lock_bh(&tbl->walk_lock);
+ hlist_for_each_entry_safe(mpath, n, &tbl->walk_head, walk_list) {
if ((!(mpath->flags & MESH_PATH_RESOLVING)) &&
(!(mpath->flags & MESH_PATH_FIXED)) &&
time_after(jiffies, mpath->exp_time + MESH_PATH_EXPIRE))
__mesh_path_del(tbl, mpath);
}
-
- rhashtable_walk_stop(&iter);
- rhashtable_walk_exit(&iter);
+ spin_unlock_bh(&tbl->walk_lock);
}
void mesh_path_expire(struct ieee80211_sub_if_data *sdata)
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 3dbecae4be73..2ac749c4a6b2 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1156,9 +1156,6 @@ static void ieee80211_chswitch_work(struct work_struct *work)
goto out;
}
- /* XXX: shouldn't really modify cfg80211-owned data! */
- ifmgd->associated->channel = sdata->csa_chandef.chan;
-
ifmgd->csa_waiting_bcn = true;
ieee80211_sta_reset_beacon_monitor(sdata);
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 5e2b4a41acf1..e946ee4f335b 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -142,6 +142,9 @@ ieee80211_rx_radiotap_hdrlen(struct ieee80211_local *local,
/* allocate extra bitmaps */
if (status->chains)
len += 4 * hweight8(status->chains);
+ /* vendor presence bitmap */
+ if (status->flag & RX_FLAG_RADIOTAP_VENDOR_DATA)
+ len += 4;
if (ieee80211_have_rx_timestamp(status)) {
len = ALIGN(len, 8);
@@ -197,8 +200,6 @@ ieee80211_rx_radiotap_hdrlen(struct ieee80211_local *local,
if (status->flag & RX_FLAG_RADIOTAP_VENDOR_DATA) {
struct ieee80211_vendor_radiotap *rtap = (void *)skb->data;
- /* vendor presence bitmap */
- len += 4;
/* alignment for fixed 6-byte vendor data header */
len = ALIGN(len, 2);
/* vendor data header */
@@ -220,7 +221,7 @@ static void ieee80211_handle_mu_mimo_mon(struct ieee80211_sub_if_data *sdata,
struct ieee80211_hdr_3addr hdr;
u8 category;
u8 action_code;
- } __packed action;
+ } __packed __aligned(2) action;
if (!sdata)
return;
@@ -2597,6 +2598,7 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
struct ieee80211_sub_if_data *sdata = rx->sdata;
struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
u16 ac, q, hdrlen;
+ int tailroom = 0;
hdr = (struct ieee80211_hdr *) skb->data;
hdrlen = ieee80211_hdrlen(hdr->frame_control);
@@ -2676,15 +2678,21 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
skb_set_queue_mapping(skb, q);
if (!--mesh_hdr->ttl) {
- IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, dropped_frames_ttl);
+ if (!is_multicast_ether_addr(hdr->addr1))
+ IEEE80211_IFSTA_MESH_CTR_INC(ifmsh,
+ dropped_frames_ttl);
goto out;
}
if (!ifmsh->mshcfg.dot11MeshForwarding)
goto out;
+ if (sdata->crypto_tx_tailroom_needed_cnt)
+ tailroom = IEEE80211_ENCRYPT_TAILROOM;
+
fwd_skb = skb_copy_expand(skb, local->tx_headroom +
- sdata->encrypt_headroom, 0, GFP_ATOMIC);
+ sdata->encrypt_headroom,
+ tailroom, GFP_ATOMIC);
if (!fwd_skb)
goto out;
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 7fa10d06cc51..534a604b75c2 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -556,6 +556,11 @@ static void ieee80211_report_used_skb(struct ieee80211_local *local,
}
ieee80211_led_tx(local);
+
+ if (skb_has_frag_list(skb)) {
+ kfree_skb_list(skb_shinfo(skb)->frag_list);
+ skb_shinfo(skb)->frag_list = NULL;
+ }
}
/*
diff --git a/net/mac80211/trace_msg.h b/net/mac80211/trace_msg.h
index 366b9e6f043e..40141df09f25 100644
--- a/net/mac80211/trace_msg.h
+++ b/net/mac80211/trace_msg.h
@@ -1,4 +1,9 @@
/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Portions of this file
+ * Copyright (C) 2019 Intel Corporation
+ */
+
#ifdef CONFIG_MAC80211_MESSAGE_TRACING
#if !defined(__MAC80211_MSG_DRIVER_TRACE) || defined(TRACE_HEADER_MULTI_READ)
@@ -11,7 +16,7 @@
#undef TRACE_SYSTEM
#define TRACE_SYSTEM mac80211_msg
-#define MAX_MSG_LEN 100
+#define MAX_MSG_LEN 120
DECLARE_EVENT_CLASS(mac80211_msg_event,
TP_PROTO(struct va_format *vaf),
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 995a491f73a9..2f726cde9998 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1913,9 +1913,16 @@ static int ieee80211_skb_resize(struct ieee80211_sub_if_data *sdata,
int head_need, bool may_encrypt)
{
struct ieee80211_local *local = sdata->local;
+ struct ieee80211_hdr *hdr;
+ bool enc_tailroom;
int tail_need = 0;
- if (may_encrypt && sdata->crypto_tx_tailroom_needed_cnt) {
+ hdr = (struct ieee80211_hdr *) skb->data;
+ enc_tailroom = may_encrypt &&
+ (sdata->crypto_tx_tailroom_needed_cnt ||
+ ieee80211_is_mgmt(hdr->frame_control));
+
+ if (enc_tailroom) {
tail_need = IEEE80211_ENCRYPT_TAILROOM;
tail_need -= skb_tailroom(skb);
tail_need = max_t(int, tail_need, 0);
@@ -1923,8 +1930,7 @@ static int ieee80211_skb_resize(struct ieee80211_sub_if_data *sdata,
if (skb_cloned(skb) &&
(!ieee80211_hw_check(&local->hw, SUPPORTS_CLONED_SKBS) ||
- !skb_clone_writable(skb, ETH_HLEN) ||
- (may_encrypt && sdata->crypto_tx_tailroom_needed_cnt)))
+ !skb_clone_writable(skb, ETH_HLEN) || enc_tailroom))
I802_DEBUG_INC(local->tx_expand_skb_head_cloned);
else if (head_need || tail_need)
I802_DEBUG_INC(local->tx_expand_skb_head);
@@ -3179,6 +3185,7 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata,
u8 max_subframes = sta->sta.max_amsdu_subframes;
int max_frags = local->hw.max_tx_fragments;
int max_amsdu_len = sta->sta.max_amsdu_len;
+ int orig_truesize;
__be16 len;
void *data;
bool ret = false;
@@ -3212,6 +3219,7 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata,
if (!head)
goto out;
+ orig_truesize = head->truesize;
orig_len = head->len;
if (skb->len + head->len > max_amsdu_len)
@@ -3266,6 +3274,7 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata,
*frag_tail = skb;
out_recalc:
+ fq->memory_usage += head->truesize - orig_truesize;
if (head->len != orig_len) {
flow->backlog += head->len - orig_len;
tin->backlog_bytes += head->len - orig_len;
@@ -3608,10 +3617,10 @@ void __ieee80211_subif_start_xmit(struct sk_buff *skb,
/* We need a bit of data queued to build aggregates properly, so
* instruct the TCP stack to allow more than a single ms of data
* to be queued in the stack. The value is a bit-shift of 1
- * second, so 8 is ~4ms of queued data. Only affects local TCP
+ * second, so 7 is ~8ms of queued data. Only affects local TCP
* sockets.
*/
- sk_pacing_shift_update(skb->sk, 8);
+ sk_pacing_shift_update(skb->sk, 7);
fast_tx = rcu_dereference(sta->fast_tx);
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 716cd6442d86..3deaa01ebee4 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -5,7 +5,7 @@
* Copyright 2007 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright (C) 2015-2017 Intel Deutschland GmbH
- * Copyright (C) 2018 Intel Corporation
+ * Copyright (C) 2018-2019 Intel Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -2020,6 +2020,10 @@ int ieee80211_reconfig(struct ieee80211_local *local)
case NL80211_IFTYPE_AP_VLAN:
case NL80211_IFTYPE_MONITOR:
break;
+ case NL80211_IFTYPE_ADHOC:
+ if (sdata->vif.bss_conf.ibss_joined)
+ WARN_ON(drv_join_ibss(local, sdata));
+ /* fall through */
default:
ieee80211_reconfig_stations(sdata);
/* fall through */
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index 8fbe6cdbe255..d5a4db5b3fe7 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -1822,6 +1822,9 @@ static int rtm_to_route_config(struct sk_buff *skb,
goto errout;
break;
}
+ case RTA_GATEWAY:
+ NL_SET_ERR_MSG(extack, "MPLS does not support RTA_GATEWAY attribute");
+ goto errout;
case RTA_VIA:
{
if (nla_get_via(nla, &cfg->rc_via_alen,
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index f61c306de1d0..e0fb56d67d42 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -1003,6 +1003,7 @@ config NETFILTER_XT_TARGET_TEE
depends on NETFILTER_ADVANCED
depends on IPV6 || IPV6=n
depends on !NF_CONNTRACK || NF_CONNTRACK
+ depends on IP6_NF_IPTABLES || !IP6_NF_IPTABLES
select NF_DUP_IPV4
select NF_DUP_IPV6 if IP6_NF_IPTABLES
---help---
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 9bd8f062ebc1..b364cf8e5776 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -39,7 +39,7 @@ EXPORT_SYMBOL_GPL(nf_ipv6_ops);
DEFINE_PER_CPU(bool, nf_skb_duplicated);
EXPORT_SYMBOL_GPL(nf_skb_duplicated);
-#ifdef HAVE_JUMP_LABEL
+#ifdef CONFIG_JUMP_LABEL
struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
EXPORT_SYMBOL(nf_hooks_needed);
#endif
@@ -353,7 +353,7 @@ static int __nf_register_net_hook(struct net *net, int pf,
if (pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS)
net_inc_ingress_queue();
#endif
-#ifdef HAVE_JUMP_LABEL
+#ifdef CONFIG_JUMP_LABEL
static_key_slow_inc(&nf_hooks_needed[pf][reg->hooknum]);
#endif
BUG_ON(p == new_hooks);
@@ -411,7 +411,7 @@ static void __nf_unregister_net_hook(struct net *net, int pf,
if (pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS)
net_dec_ingress_queue();
#endif
-#ifdef HAVE_JUMP_LABEL
+#ifdef CONFIG_JUMP_LABEL
static_key_slow_dec(&nf_hooks_needed[pf][reg->hooknum]);
#endif
} else {
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index c00b6a2e8e3c..13ade5782847 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -219,10 +219,6 @@ bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb,
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
u32 ip;
- /* MAC can be src only */
- if (!(opt->flags & IPSET_DIM_TWO_SRC))
- return 0;
-
ip = ntohl(ip4addr(skb, opt->flags & IPSET_DIM_ONE_SRC));
if (ip < map->first_ip || ip > map->last_ip)
return -IPSET_ERR_BITMAP_RANGE;
@@ -233,7 +229,11 @@ bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb,
return -EINVAL;
e.id = ip_to_id(map, ip);
- memcpy(e.ether, eth_hdr(skb)->h_source, ETH_ALEN);
+
+ if (opt->flags & IPSET_DIM_ONE_SRC)
+ ether_addr_copy(e.ether, eth_hdr(skb)->h_source);
+ else
+ ether_addr_copy(e.ether, eth_hdr(skb)->h_dest);
return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
}
diff --git a/net/netfilter/ipset/ip_set_hash_ipmac.c b/net/netfilter/ipset/ip_set_hash_ipmac.c
index 1ab5ed2f6839..fd87de3ed55b 100644
--- a/net/netfilter/ipset/ip_set_hash_ipmac.c
+++ b/net/netfilter/ipset/ip_set_hash_ipmac.c
@@ -103,7 +103,11 @@ hash_ipmac4_kadt(struct ip_set *set, const struct sk_buff *skb,
(skb_mac_header(skb) + ETH_HLEN) > skb->data)
return -EINVAL;
- memcpy(e.ether, eth_hdr(skb)->h_source, ETH_ALEN);
+ if (opt->flags & IPSET_DIM_ONE_SRC)
+ ether_addr_copy(e.ether, eth_hdr(skb)->h_source);
+ else
+ ether_addr_copy(e.ether, eth_hdr(skb)->h_dest);
+
if (ether_addr_equal(e.ether, invalid_ether))
return -EINVAL;
@@ -211,15 +215,15 @@ hash_ipmac6_kadt(struct ip_set *set, const struct sk_buff *skb,
};
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
- /* MAC can be src only */
- if (!(opt->flags & IPSET_DIM_TWO_SRC))
- return 0;
-
if (skb_mac_header(skb) < skb->head ||
(skb_mac_header(skb) + ETH_HLEN) > skb->data)
return -EINVAL;
- memcpy(e.ether, eth_hdr(skb)->h_source, ETH_ALEN);
+ if (opt->flags & IPSET_DIM_ONE_SRC)
+ ether_addr_copy(e.ether, eth_hdr(skb)->h_source);
+ else
+ ether_addr_copy(e.ether, eth_hdr(skb)->h_dest);
+
if (ether_addr_equal(e.ether, invalid_ether))
return -EINVAL;
diff --git a/net/netfilter/ipset/ip_set_hash_mac.c b/net/netfilter/ipset/ip_set_hash_mac.c
index f9d5a2a1e3d0..4fe5f243d0a3 100644
--- a/net/netfilter/ipset/ip_set_hash_mac.c
+++ b/net/netfilter/ipset/ip_set_hash_mac.c
@@ -81,15 +81,15 @@ hash_mac4_kadt(struct ip_set *set, const struct sk_buff *skb,
struct hash_mac4_elem e = { { .foo[0] = 0, .foo[1] = 0 } };
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
- /* MAC can be src only */
- if (!(opt->flags & IPSET_DIM_ONE_SRC))
- return 0;
-
if (skb_mac_header(skb) < skb->head ||
(skb_mac_header(skb) + ETH_HLEN) > skb->data)
return -EINVAL;
- ether_addr_copy(e.ether, eth_hdr(skb)->h_source);
+ if (opt->flags & IPSET_DIM_ONE_SRC)
+ ether_addr_copy(e.ether, eth_hdr(skb)->h_source);
+ else
+ ether_addr_copy(e.ether, eth_hdr(skb)->h_dest);
+
if (is_zero_ether_addr(e.ether))
return -EINVAL;
return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index 4eef55da0878..8da228da53ae 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -531,8 +531,8 @@ nla_put_failure:
ret = -EMSGSIZE;
} else {
cb->args[IPSET_CB_ARG0] = i;
+ ipset_nest_end(skb, atd);
}
- ipset_nest_end(skb, atd);
out:
rcu_read_unlock();
return ret;
diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig
index cad48d07c818..8401cefd9f65 100644
--- a/net/netfilter/ipvs/Kconfig
+++ b/net/netfilter/ipvs/Kconfig
@@ -29,6 +29,7 @@ config IP_VS_IPV6
bool "IPv6 support for IPVS"
depends on IPV6 = y || IP_VS = IPV6
select IP6_NF_IPTABLES
+ select NF_DEFRAG_IPV6
---help---
Add IPv6 support to IPVS.
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 7ca926a03b81..a42c1bc7c698 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1536,14 +1536,12 @@ ip_vs_try_to_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
/* sorry, all this trouble for a no-hit :) */
IP_VS_DBG_PKT(12, af, pp, skb, iph->off,
"ip_vs_in: packet continues traversal as normal");
- if (iph->fragoffs) {
- /* Fragment that couldn't be mapped to a conn entry
- * is missing module nf_defrag_ipv6
- */
- IP_VS_DBG_RL("Unhandled frag, load nf_defrag_ipv6\n");
+
+ /* Fragment couldn't be mapped to a conn entry */
+ if (iph->fragoffs)
IP_VS_DBG_PKT(7, af, pp, skb, iph->off,
"unhandled fragment");
- }
+
*verdict = NF_ACCEPT;
return 0;
}
@@ -1649,7 +1647,7 @@ ip_vs_in_icmp(struct netns_ipvs *ipvs, struct sk_buff *skb, int *related,
if (!cp) {
int v;
- if (!sysctl_schedule_icmp(ipvs))
+ if (ipip || !sysctl_schedule_icmp(ipvs))
return NF_ACCEPT;
if (!ip_vs_try_to_schedule(ipvs, AF_INET, skb, pd, &v, &cp, &ciph))
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 518364f4abcc..2d4e048762f6 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -43,6 +43,7 @@
#ifdef CONFIG_IP_VS_IPV6
#include <net/ipv6.h>
#include <net/ip6_route.h>
+#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
#endif
#include <net/route.h>
#include <net/sock.h>
@@ -900,11 +901,17 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
#ifdef CONFIG_IP_VS_IPV6
if (udest->af == AF_INET6) {
+ int ret;
+
atype = ipv6_addr_type(&udest->addr.in6);
if ((!(atype & IPV6_ADDR_UNICAST) ||
atype & IPV6_ADDR_LINKLOCAL) &&
!__ip_vs_addr_is_local_v6(svc->ipvs->net, &udest->addr.in6))
return -EINVAL;
+
+ ret = nf_defrag_ipv6_enable(svc->ipvs->net);
+ if (ret)
+ return ret;
} else
#endif
{
@@ -1228,6 +1235,10 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u,
ret = -EINVAL;
goto out_err;
}
+
+ ret = nf_defrag_ipv6_enable(ipvs->net);
+ if (ret)
+ goto out_err;
}
#endif
@@ -2221,6 +2232,18 @@ static int ip_vs_set_timeout(struct netns_ipvs *ipvs, struct ip_vs_timeout_user
u->udp_timeout);
#ifdef CONFIG_IP_VS_PROTO_TCP
+ if (u->tcp_timeout < 0 || u->tcp_timeout > (INT_MAX / HZ) ||
+ u->tcp_fin_timeout < 0 || u->tcp_fin_timeout > (INT_MAX / HZ)) {
+ return -EINVAL;
+ }
+#endif
+
+#ifdef CONFIG_IP_VS_PROTO_UDP
+ if (u->udp_timeout < 0 || u->udp_timeout > (INT_MAX / HZ))
+ return -EINVAL;
+#endif
+
+#ifdef CONFIG_IP_VS_PROTO_TCP
if (u->tcp_timeout) {
pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP);
pd->timeout_table[IP_VS_TCP_S_ESTABLISHED]
diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c
index b6d0f6deea86..7554c56b2e63 100644
--- a/net/netfilter/nf_conncount.c
+++ b/net/netfilter/nf_conncount.c
@@ -33,12 +33,6 @@
#define CONNCOUNT_SLOTS 256U
-#ifdef CONFIG_LOCKDEP
-#define CONNCOUNT_LOCK_SLOTS 8U
-#else
-#define CONNCOUNT_LOCK_SLOTS 256U
-#endif
-
#define CONNCOUNT_GC_MAX_NODES 8
#define MAX_KEYLEN 5
@@ -49,8 +43,6 @@ struct nf_conncount_tuple {
struct nf_conntrack_zone zone;
int cpu;
u32 jiffies32;
- bool dead;
- struct rcu_head rcu_head;
};
struct nf_conncount_rb {
@@ -60,7 +52,7 @@ struct nf_conncount_rb {
struct rcu_head rcu_head;
};
-static spinlock_t nf_conncount_locks[CONNCOUNT_LOCK_SLOTS] __cacheline_aligned_in_smp;
+static spinlock_t nf_conncount_locks[CONNCOUNT_SLOTS] __cacheline_aligned_in_smp;
struct nf_conncount_data {
unsigned int keylen;
@@ -89,79 +81,25 @@ static int key_diff(const u32 *a, const u32 *b, unsigned int klen)
return memcmp(a, b, klen * sizeof(u32));
}
-enum nf_conncount_list_add
-nf_conncount_add(struct nf_conncount_list *list,
- const struct nf_conntrack_tuple *tuple,
- const struct nf_conntrack_zone *zone)
-{
- struct nf_conncount_tuple *conn;
-
- if (WARN_ON_ONCE(list->count > INT_MAX))
- return NF_CONNCOUNT_ERR;
-
- conn = kmem_cache_alloc(conncount_conn_cachep, GFP_ATOMIC);
- if (conn == NULL)
- return NF_CONNCOUNT_ERR;
-
- conn->tuple = *tuple;
- conn->zone = *zone;
- conn->cpu = raw_smp_processor_id();
- conn->jiffies32 = (u32)jiffies;
- conn->dead = false;
- spin_lock_bh(&list->list_lock);
- if (list->dead == true) {
- kmem_cache_free(conncount_conn_cachep, conn);
- spin_unlock_bh(&list->list_lock);
- return NF_CONNCOUNT_SKIP;
- }
- list_add_tail(&conn->node, &list->head);
- list->count++;
- spin_unlock_bh(&list->list_lock);
- return NF_CONNCOUNT_ADDED;
-}
-EXPORT_SYMBOL_GPL(nf_conncount_add);
-
-static void __conn_free(struct rcu_head *h)
-{
- struct nf_conncount_tuple *conn;
-
- conn = container_of(h, struct nf_conncount_tuple, rcu_head);
- kmem_cache_free(conncount_conn_cachep, conn);
-}
-
-static bool conn_free(struct nf_conncount_list *list,
+static void conn_free(struct nf_conncount_list *list,
struct nf_conncount_tuple *conn)
{
- bool free_entry = false;
-
- spin_lock_bh(&list->list_lock);
-
- if (conn->dead) {
- spin_unlock_bh(&list->list_lock);
- return free_entry;
- }
+ lockdep_assert_held(&list->list_lock);
list->count--;
- conn->dead = true;
- list_del_rcu(&conn->node);
- if (list->count == 0) {
- list->dead = true;
- free_entry = true;
- }
+ list_del(&conn->node);
- spin_unlock_bh(&list->list_lock);
- call_rcu(&conn->rcu_head, __conn_free);
- return free_entry;
+ kmem_cache_free(conncount_conn_cachep, conn);
}
static const struct nf_conntrack_tuple_hash *
find_or_evict(struct net *net, struct nf_conncount_list *list,
- struct nf_conncount_tuple *conn, bool *free_entry)
+ struct nf_conncount_tuple *conn)
{
const struct nf_conntrack_tuple_hash *found;
unsigned long a, b;
int cpu = raw_smp_processor_id();
- __s32 age;
+ u32 age;
found = nf_conntrack_find_get(net, &conn->zone, &conn->tuple);
if (found)
@@ -176,52 +114,45 @@ find_or_evict(struct net *net, struct nf_conncount_list *list,
*/
age = a - b;
if (conn->cpu == cpu || age >= 2) {
- *free_entry = conn_free(list, conn);
+ conn_free(list, conn);
return ERR_PTR(-ENOENT);
}
return ERR_PTR(-EAGAIN);
}
-void nf_conncount_lookup(struct net *net,
- struct nf_conncount_list *list,
- const struct nf_conntrack_tuple *tuple,
- const struct nf_conntrack_zone *zone,
- bool *addit)
+static int __nf_conncount_add(struct net *net,
+ struct nf_conncount_list *list,
+ const struct nf_conntrack_tuple *tuple,
+ const struct nf_conntrack_zone *zone)
{
const struct nf_conntrack_tuple_hash *found;
struct nf_conncount_tuple *conn, *conn_n;
struct nf_conn *found_ct;
unsigned int collect = 0;
- bool free_entry = false;
-
- /* best effort only */
- *addit = tuple ? true : false;
/* check the saved connections */
list_for_each_entry_safe(conn, conn_n, &list->head, node) {
if (collect > CONNCOUNT_GC_MAX_NODES)
break;
- found = find_or_evict(net, list, conn, &free_entry);
+ found = find_or_evict(net, list, conn);
if (IS_ERR(found)) {
/* Not found, but might be about to be confirmed */
if (PTR_ERR(found) == -EAGAIN) {
- if (!tuple)
- continue;
-
if (nf_ct_tuple_equal(&conn->tuple, tuple) &&
nf_ct_zone_id(&conn->zone, conn->zone.dir) ==
nf_ct_zone_id(zone, zone->dir))
- *addit = false;
- } else if (PTR_ERR(found) == -ENOENT)
+ return 0; /* already exists */
+ } else {
collect++;
+ }
continue;
}
found_ct = nf_ct_tuplehash_to_ctrack(found);
- if (tuple && nf_ct_tuple_equal(&conn->tuple, tuple) &&
+ if (nf_ct_tuple_equal(&conn->tuple, tuple) &&
nf_ct_zone_equal(found_ct, zone, zone->dir)) {
/*
* We should not see tuples twice unless someone hooks
@@ -229,7 +160,8 @@ void nf_conncount_lookup(struct net *net,
*
* Attempt to avoid a re-add in this case.
*/
- *addit = false;
+ nf_ct_put(found_ct);
+ return 0;
} else if (already_closed(found_ct)) {
/*
* we do not care about connections which are
@@ -243,19 +175,48 @@ void nf_conncount_lookup(struct net *net,
nf_ct_put(found_ct);
}
+
+ if (WARN_ON_ONCE(list->count > INT_MAX))
+ return -EOVERFLOW;
+
+ conn = kmem_cache_alloc(conncount_conn_cachep, GFP_ATOMIC);
+ if (conn == NULL)
+ return -ENOMEM;
+
+ conn->tuple = *tuple;
+ conn->zone = *zone;
+ conn->cpu = raw_smp_processor_id();
+ conn->jiffies32 = (u32)jiffies;
+ list_add_tail(&conn->node, &list->head);
+ list->count++;
+ return 0;
}
-EXPORT_SYMBOL_GPL(nf_conncount_lookup);
+
+int nf_conncount_add(struct net *net,
+ struct nf_conncount_list *list,
+ const struct nf_conntrack_tuple *tuple,
+ const struct nf_conntrack_zone *zone)
+{
+ int ret;
+
+ /* check the saved connections */
+ spin_lock_bh(&list->list_lock);
+ ret = __nf_conncount_add(net, list, tuple, zone);
+ spin_unlock_bh(&list->list_lock);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(nf_conncount_add);
void nf_conncount_list_init(struct nf_conncount_list *list)
{
spin_lock_init(&list->list_lock);
INIT_LIST_HEAD(&list->head);
list->count = 0;
- list->dead = false;
}
EXPORT_SYMBOL_GPL(nf_conncount_list_init);
-/* Return true if the list is empty */
+/* Return true if the list is empty. Must be called with BH disabled. */
bool nf_conncount_gc_list(struct net *net,
struct nf_conncount_list *list)
{
@@ -263,17 +224,17 @@ bool nf_conncount_gc_list(struct net *net,
struct nf_conncount_tuple *conn, *conn_n;
struct nf_conn *found_ct;
unsigned int collected = 0;
- bool free_entry = false;
bool ret = false;
+ /* don't bother if other cpu is already doing GC */
+ if (!spin_trylock(&list->list_lock))
+ return false;
+
list_for_each_entry_safe(conn, conn_n, &list->head, node) {
- found = find_or_evict(net, list, conn, &free_entry);
+ found = find_or_evict(net, list, conn);
if (IS_ERR(found)) {
- if (PTR_ERR(found) == -ENOENT) {
- if (free_entry)
- return true;
+ if (PTR_ERR(found) == -ENOENT)
collected++;
- }
continue;
}
@@ -284,23 +245,19 @@ bool nf_conncount_gc_list(struct net *net,
* closed already -> ditch it
*/
nf_ct_put(found_ct);
- if (conn_free(list, conn))
- return true;
+ conn_free(list, conn);
collected++;
continue;
}
nf_ct_put(found_ct);
if (collected > CONNCOUNT_GC_MAX_NODES)
- return false;
+ break;
}
- spin_lock_bh(&list->list_lock);
- if (!list->count) {
- list->dead = true;
+ if (!list->count)
ret = true;
- }
- spin_unlock_bh(&list->list_lock);
+ spin_unlock(&list->list_lock);
return ret;
}
@@ -314,6 +271,7 @@ static void __tree_nodes_free(struct rcu_head *h)
kmem_cache_free(conncount_rb_cachep, rbconn);
}
+/* caller must hold tree nf_conncount_locks[] lock */
static void tree_nodes_free(struct rb_root *root,
struct nf_conncount_rb *gc_nodes[],
unsigned int gc_count)
@@ -323,8 +281,10 @@ static void tree_nodes_free(struct rb_root *root,
while (gc_count) {
rbconn = gc_nodes[--gc_count];
spin_lock(&rbconn->list.list_lock);
- rb_erase(&rbconn->node, root);
- call_rcu(&rbconn->rcu_head, __tree_nodes_free);
+ if (!rbconn->list.count) {
+ rb_erase(&rbconn->node, root);
+ call_rcu(&rbconn->rcu_head, __tree_nodes_free);
+ }
spin_unlock(&rbconn->list.list_lock);
}
}
@@ -341,20 +301,19 @@ insert_tree(struct net *net,
struct rb_root *root,
unsigned int hash,
const u32 *key,
- u8 keylen,
const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_zone *zone)
{
- enum nf_conncount_list_add ret;
struct nf_conncount_rb *gc_nodes[CONNCOUNT_GC_MAX_NODES];
struct rb_node **rbnode, *parent;
struct nf_conncount_rb *rbconn;
struct nf_conncount_tuple *conn;
unsigned int count = 0, gc_count = 0;
- bool node_found = false;
-
- spin_lock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]);
+ u8 keylen = data->keylen;
+ bool do_gc = true;
+ spin_lock_bh(&nf_conncount_locks[hash]);
+restart:
parent = NULL;
rbnode = &(root->rb_node);
while (*rbnode) {
@@ -368,45 +327,32 @@ insert_tree(struct net *net,
} else if (diff > 0) {
rbnode = &((*rbnode)->rb_right);
} else {
- /* unlikely: other cpu added node already */
- node_found = true;
- ret = nf_conncount_add(&rbconn->list, tuple, zone);
- if (ret == NF_CONNCOUNT_ERR) {
+ int ret;
+
+ ret = nf_conncount_add(net, &rbconn->list, tuple, zone);
+ if (ret)
count = 0; /* hotdrop */
- } else if (ret == NF_CONNCOUNT_ADDED) {
+ else
count = rbconn->list.count;
- } else {
- /* NF_CONNCOUNT_SKIP, rbconn is already
- * reclaimed by gc, insert a new tree node
- */
- node_found = false;
- }
- break;
+ tree_nodes_free(root, gc_nodes, gc_count);
+ goto out_unlock;
}
if (gc_count >= ARRAY_SIZE(gc_nodes))
continue;
- if (nf_conncount_gc_list(net, &rbconn->list))
+ if (do_gc && nf_conncount_gc_list(net, &rbconn->list))
gc_nodes[gc_count++] = rbconn;
}
if (gc_count) {
tree_nodes_free(root, gc_nodes, gc_count);
- /* tree_node_free before new allocation permits
- * allocator to re-use newly free'd object.
- *
- * This is a rare event; in most cases we will find
- * existing node to re-use. (or gc_count is 0).
- */
-
- if (gc_count >= ARRAY_SIZE(gc_nodes))
- schedule_gc_worker(data, hash);
+ schedule_gc_worker(data, hash);
+ gc_count = 0;
+ do_gc = false;
+ goto restart;
}
- if (node_found)
- goto out_unlock;
-
/* expected case: match, insert new node */
rbconn = kmem_cache_alloc(conncount_rb_cachep, GFP_ATOMIC);
if (rbconn == NULL)
@@ -427,10 +373,10 @@ insert_tree(struct net *net,
count = 1;
rbconn->list.count = count;
- rb_link_node(&rbconn->node, parent, rbnode);
+ rb_link_node_rcu(&rbconn->node, parent, rbnode);
rb_insert_color(&rbconn->node, root);
out_unlock:
- spin_unlock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]);
+ spin_unlock_bh(&nf_conncount_locks[hash]);
return count;
}
@@ -441,7 +387,6 @@ count_tree(struct net *net,
const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_zone *zone)
{
- enum nf_conncount_list_add ret;
struct rb_root *root;
struct rb_node *parent;
struct nf_conncount_rb *rbconn;
@@ -454,7 +399,6 @@ count_tree(struct net *net,
parent = rcu_dereference_raw(root->rb_node);
while (parent) {
int diff;
- bool addit;
rbconn = rb_entry(parent, struct nf_conncount_rb, node);
@@ -464,31 +408,36 @@ count_tree(struct net *net,
} else if (diff > 0) {
parent = rcu_dereference_raw(parent->rb_right);
} else {
- /* same source network -> be counted! */
- nf_conncount_lookup(net, &rbconn->list, tuple, zone,
- &addit);
+ int ret;
- if (!addit)
+ if (!tuple) {
+ nf_conncount_gc_list(net, &rbconn->list);
return rbconn->list.count;
+ }
- ret = nf_conncount_add(&rbconn->list, tuple, zone);
- if (ret == NF_CONNCOUNT_ERR) {
- return 0; /* hotdrop */
- } else if (ret == NF_CONNCOUNT_ADDED) {
- return rbconn->list.count;
- } else {
- /* NF_CONNCOUNT_SKIP, rbconn is already
- * reclaimed by gc, insert a new tree node
- */
+ spin_lock_bh(&rbconn->list.list_lock);
+ /* Node might be about to be free'd.
+ * We need to defer to insert_tree() in this case.
+ */
+ if (rbconn->list.count == 0) {
+ spin_unlock_bh(&rbconn->list.list_lock);
break;
}
+
+ /* same source network -> be counted! */
+ ret = __nf_conncount_add(net, &rbconn->list, tuple, zone);
+ spin_unlock_bh(&rbconn->list.list_lock);
+ if (ret)
+ return 0; /* hotdrop */
+ else
+ return rbconn->list.count;
}
}
if (!tuple)
return 0;
- return insert_tree(net, data, root, hash, key, keylen, tuple, zone);
+ return insert_tree(net, data, root, hash, key, tuple, zone);
}
static void tree_gc_worker(struct work_struct *work)
@@ -499,27 +448,47 @@ static void tree_gc_worker(struct work_struct *work)
struct rb_node *node;
unsigned int tree, next_tree, gc_count = 0;
- tree = data->gc_tree % CONNCOUNT_LOCK_SLOTS;
+ tree = data->gc_tree % CONNCOUNT_SLOTS;
root = &data->root[tree];
+ local_bh_disable();
rcu_read_lock();
for (node = rb_first(root); node != NULL; node = rb_next(node)) {
rbconn = rb_entry(node, struct nf_conncount_rb, node);
if (nf_conncount_gc_list(data->net, &rbconn->list))
- gc_nodes[gc_count++] = rbconn;
+ gc_count++;
}
rcu_read_unlock();
+ local_bh_enable();
+
+ cond_resched();
spin_lock_bh(&nf_conncount_locks[tree]);
+ if (gc_count < ARRAY_SIZE(gc_nodes))
+ goto next; /* do not bother */
- if (gc_count) {
- tree_nodes_free(root, gc_nodes, gc_count);
+ gc_count = 0;
+ node = rb_first(root);
+ while (node != NULL) {
+ rbconn = rb_entry(node, struct nf_conncount_rb, node);
+ node = rb_next(node);
+
+ if (rbconn->list.count > 0)
+ continue;
+
+ gc_nodes[gc_count++] = rbconn;
+ if (gc_count >= ARRAY_SIZE(gc_nodes)) {
+ tree_nodes_free(root, gc_nodes, gc_count);
+ gc_count = 0;
+ }
}
+ tree_nodes_free(root, gc_nodes, gc_count);
+next:
clear_bit(tree, data->pending_trees);
next_tree = (tree + 1) % CONNCOUNT_SLOTS;
- next_tree = find_next_bit(data->pending_trees, next_tree, CONNCOUNT_SLOTS);
+ next_tree = find_next_bit(data->pending_trees, CONNCOUNT_SLOTS, next_tree);
if (next_tree < CONNCOUNT_SLOTS) {
data->gc_tree = next_tree;
@@ -621,10 +590,7 @@ static int __init nf_conncount_modinit(void)
{
int i;
- BUILD_BUG_ON(CONNCOUNT_LOCK_SLOTS > CONNCOUNT_SLOTS);
- BUILD_BUG_ON((CONNCOUNT_SLOTS % CONNCOUNT_LOCK_SLOTS) != 0);
-
- for (i = 0; i < CONNCOUNT_LOCK_SLOTS; ++i)
+ for (i = 0; i < CONNCOUNT_SLOTS; ++i)
spin_lock_init(&nf_conncount_locks[i]);
conncount_conn_cachep = kmem_cache_create("nf_conncount_tuple",
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 277d02a8cac8..27eff89fad01 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -25,6 +25,7 @@
#include <linux/slab.h>
#include <linux/random.h>
#include <linux/jhash.h>
+#include <linux/siphash.h>
#include <linux/err.h>
#include <linux/percpu.h>
#include <linux/moduleparam.h>
@@ -424,6 +425,40 @@ nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
}
EXPORT_SYMBOL_GPL(nf_ct_invert_tuple);
+/* Generate a almost-unique pseudo-id for a given conntrack.
+ *
+ * intentionally doesn't re-use any of the seeds used for hash
+ * table location, we assume id gets exposed to userspace.
+ *
+ * Following nf_conn items do not change throughout lifetime
+ * of the nf_conn after it has been committed to main hash table:
+ *
+ * 1. nf_conn address
+ * 2. nf_conn->ext address
+ * 3. nf_conn->master address (normally NULL)
+ * 4. tuple
+ * 5. the associated net namespace
+ */
+u32 nf_ct_get_id(const struct nf_conn *ct)
+{
+ static __read_mostly siphash_key_t ct_id_seed;
+ unsigned long a, b, c, d;
+
+ net_get_random_once(&ct_id_seed, sizeof(ct_id_seed));
+
+ a = (unsigned long)ct;
+ b = (unsigned long)ct->master ^ net_hash_mix(nf_ct_net(ct));
+ c = (unsigned long)ct->ext;
+ d = (unsigned long)siphash(&ct->tuplehash, sizeof(ct->tuplehash),
+ &ct_id_seed);
+#ifdef CONFIG_64BIT
+ return siphash_4u64((u64)a, (u64)b, (u64)c, (u64)d, &ct_id_seed);
+#else
+ return siphash_4u32((u32)a, (u32)b, (u32)c, (u32)d, &ct_id_seed);
+#endif
+}
+EXPORT_SYMBOL_GPL(nf_ct_get_id);
+
static void
clean_from_lists(struct nf_conn *ct)
{
@@ -901,10 +936,18 @@ __nf_conntrack_confirm(struct sk_buff *skb)
* REJECT will give spurious warnings here.
*/
- /* No external references means no one else could have
- * confirmed us.
+ /* Another skb with the same unconfirmed conntrack may
+ * win the race. This may happen for bridge(br_flood)
+ * or broadcast/multicast packets do skb_clone with
+ * unconfirmed conntrack.
*/
- WARN_ON(nf_ct_is_confirmed(ct));
+ if (unlikely(nf_ct_is_confirmed(ct))) {
+ WARN_ON_ONCE(1);
+ nf_conntrack_double_unlock(hash, reply_hash);
+ local_bh_enable();
+ return NF_DROP;
+ }
+
pr_debug("Confirming conntrack %p\n", ct);
/* We have to check the DYING flag after unlink to prevent
* a race against nf_ct_get_next_corpse() possibly called from
@@ -1007,6 +1050,22 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
}
if (nf_ct_key_equal(h, tuple, zone, net)) {
+ /* Tuple is taken already, so caller will need to find
+ * a new source port to use.
+ *
+ * Only exception:
+ * If the *original tuples* are identical, then both
+ * conntracks refer to the same flow.
+ * This is a rare situation, it can occur e.g. when
+ * more than one UDP packet is sent from same socket
+ * in different threads.
+ *
+ * Let nf_ct_resolve_clash() deal with this later.
+ */
+ if (nf_ct_tuple_equal(&ignored_conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
+ &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple))
+ continue;
+
NF_CT_STAT_INC_ATOMIC(net, found);
rcu_read_unlock();
return 1;
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 036207ecaf16..47e5a076522d 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -29,6 +29,7 @@
#include <linux/spinlock.h>
#include <linux/interrupt.h>
#include <linux/slab.h>
+#include <linux/siphash.h>
#include <linux/netfilter.h>
#include <net/netlink.h>
@@ -487,7 +488,9 @@ nla_put_failure:
static int ctnetlink_dump_id(struct sk_buff *skb, const struct nf_conn *ct)
{
- if (nla_put_be32(skb, CTA_ID, htonl((unsigned long)ct)))
+ __be32 id = (__force __be32)nf_ct_get_id(ct);
+
+ if (nla_put_be32(skb, CTA_ID, id))
goto nla_put_failure;
return 0;
@@ -1275,8 +1278,9 @@ static int ctnetlink_del_conntrack(struct net *net, struct sock *ctnl,
}
if (cda[CTA_ID]) {
- u_int32_t id = ntohl(nla_get_be32(cda[CTA_ID]));
- if (id != (u32)(unsigned long)ct) {
+ __be32 id = nla_get_be32(cda[CTA_ID]);
+
+ if (id != (__force __be32)nf_ct_get_id(ct)) {
nf_ct_put(ct);
return -ENOENT;
}
@@ -2675,6 +2679,25 @@ nla_put_failure:
static const union nf_inet_addr any_addr;
+static __be32 nf_expect_get_id(const struct nf_conntrack_expect *exp)
+{
+ static __read_mostly siphash_key_t exp_id_seed;
+ unsigned long a, b, c, d;
+
+ net_get_random_once(&exp_id_seed, sizeof(exp_id_seed));
+
+ a = (unsigned long)exp;
+ b = (unsigned long)exp->helper;
+ c = (unsigned long)exp->master;
+ d = (unsigned long)siphash(&exp->tuple, sizeof(exp->tuple), &exp_id_seed);
+
+#ifdef CONFIG_64BIT
+ return (__force __be32)siphash_4u64((u64)a, (u64)b, (u64)c, (u64)d, &exp_id_seed);
+#else
+ return (__force __be32)siphash_4u32((u32)a, (u32)b, (u32)c, (u32)d, &exp_id_seed);
+#endif
+}
+
static int
ctnetlink_exp_dump_expect(struct sk_buff *skb,
const struct nf_conntrack_expect *exp)
@@ -2722,7 +2745,7 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb,
}
#endif
if (nla_put_be32(skb, CTA_EXPECT_TIMEOUT, htonl(timeout)) ||
- nla_put_be32(skb, CTA_EXPECT_ID, htonl((unsigned long)exp)) ||
+ nla_put_be32(skb, CTA_EXPECT_ID, nf_expect_get_id(exp)) ||
nla_put_be32(skb, CTA_EXPECT_FLAGS, htonl(exp->flags)) ||
nla_put_be32(skb, CTA_EXPECT_CLASS, htonl(exp->class)))
goto nla_put_failure;
@@ -3027,7 +3050,8 @@ static int ctnetlink_get_expect(struct net *net, struct sock *ctnl,
if (cda[CTA_EXPECT_ID]) {
__be32 id = nla_get_be32(cda[CTA_EXPECT_ID]);
- if (ntohl(id) != (u32)(unsigned long)exp) {
+
+ if (id != nf_expect_get_id(exp)) {
nf_ct_expect_put(exp);
return -ENOENT;
}
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 51c5d7eec0a3..e903ef9b96cf 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -86,7 +86,7 @@ void nf_l4proto_log_invalid(const struct sk_buff *skb,
struct va_format vaf;
va_list args;
- if (net->ct.sysctl_log_invalid != protonum ||
+ if (net->ct.sysctl_log_invalid != protonum &&
net->ct.sysctl_log_invalid != IPPROTO_RAW)
return;
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index 650eb4fba2c5..841c472aae1c 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -43,24 +43,12 @@
#include <linux/netfilter/nf_conntrack_proto_gre.h>
#include <linux/netfilter/nf_conntrack_pptp.h>
-enum grep_conntrack {
- GRE_CT_UNREPLIED,
- GRE_CT_REPLIED,
- GRE_CT_MAX
-};
-
static const unsigned int gre_timeouts[GRE_CT_MAX] = {
[GRE_CT_UNREPLIED] = 30*HZ,
[GRE_CT_REPLIED] = 180*HZ,
};
static unsigned int proto_gre_net_id __read_mostly;
-struct netns_proto_gre {
- struct nf_proto_net nf;
- rwlock_t keymap_lock;
- struct list_head keymap_list;
- unsigned int gre_timeouts[GRE_CT_MAX];
-};
static inline struct netns_proto_gre *gre_pernet(struct net *net)
{
@@ -408,6 +396,8 @@ static int __init nf_ct_proto_gre_init(void)
{
int ret;
+ BUILD_BUG_ON(offsetof(struct netns_proto_gre, nf) != 0);
+
ret = register_pernet_subsys(&proto_gre_net_ops);
if (ret < 0)
goto out_pernet;
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 247b89784a6f..842f3f86fb2e 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -769,6 +769,12 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl,
return NF_ACCEPT;
}
+static bool nf_conntrack_tcp_established(const struct nf_conn *ct)
+{
+ return ct->proto.tcp.state == TCP_CONNTRACK_ESTABLISHED &&
+ test_bit(IPS_ASSURED_BIT, &ct->status);
+}
+
/* Returns verdict for packet, or -1 for invalid. */
static int tcp_packet(struct nf_conn *ct,
const struct sk_buff *skb,
@@ -963,16 +969,38 @@ static int tcp_packet(struct nf_conn *ct,
new_state = TCP_CONNTRACK_ESTABLISHED;
break;
case TCP_CONNTRACK_CLOSE:
- if (index == TCP_RST_SET
- && (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET)
- && before(ntohl(th->seq), ct->proto.tcp.seen[!dir].td_maxack)) {
- /* Invalid RST */
- spin_unlock_bh(&ct->lock);
- nf_ct_l4proto_log_invalid(skb, ct, "invalid rst");
- return -NF_ACCEPT;
+ if (index != TCP_RST_SET)
+ break;
+
+ if (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET) {
+ u32 seq = ntohl(th->seq);
+
+ if (before(seq, ct->proto.tcp.seen[!dir].td_maxack)) {
+ /* Invalid RST */
+ spin_unlock_bh(&ct->lock);
+ nf_ct_l4proto_log_invalid(skb, ct, "invalid rst");
+ return -NF_ACCEPT;
+ }
+
+ if (!nf_conntrack_tcp_established(ct) ||
+ seq == ct->proto.tcp.seen[!dir].td_maxack)
+ break;
+
+ /* Check if rst is part of train, such as
+ * foo:80 > bar:4379: P, 235946583:235946602(19) ack 42
+ * foo:80 > bar:4379: R, 235946602:235946602(0) ack 42
+ */
+ if (ct->proto.tcp.last_index == TCP_ACK_SET &&
+ ct->proto.tcp.last_dir == dir &&
+ seq == ct->proto.tcp.last_end)
+ break;
+
+ /* ... RST sequence number doesn't match exactly, keep
+ * established state to allow a possible challenge ACK.
+ */
+ new_state = old_state;
}
- if (index == TCP_RST_SET
- && ((test_bit(IPS_SEEN_REPLY_BIT, &ct->status)
+ if (((test_bit(IPS_SEEN_REPLY_BIT, &ct->status)
&& ct->proto.tcp.last_index == TCP_SYN_SET)
|| (!test_bit(IPS_ASSURED_BIT, &ct->status)
&& ct->proto.tcp.last_index == TCP_ACK_SET))
@@ -988,7 +1016,7 @@ static int tcp_packet(struct nf_conn *ct,
* segments we ignored. */
goto in_window;
}
- /* Just fall through */
+ break;
default:
/* Keep compilers happy. */
break;
@@ -1023,6 +1051,8 @@ static int tcp_packet(struct nf_conn *ct,
if (ct->proto.tcp.retrans >= tn->tcp_max_retrans &&
timeouts[new_state] > timeouts[TCP_CONNTRACK_RETRANS])
timeout = timeouts[TCP_CONNTRACK_RETRANS];
+ else if (unlikely(index == TCP_RST_SET))
+ timeout = timeouts[TCP_CONNTRACK_CLOSE];
else if ((ct->proto.tcp.seen[0].flags | ct->proto.tcp.seen[1].flags) &
IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED &&
timeouts[new_state] > timeouts[TCP_CONNTRACK_UNACK])
diff --git a/net/netfilter/nf_conntrack_seqadj.c b/net/netfilter/nf_conntrack_seqadj.c
index a975efd6b8c3..9da303461069 100644
--- a/net/netfilter/nf_conntrack_seqadj.c
+++ b/net/netfilter/nf_conntrack_seqadj.c
@@ -115,12 +115,12 @@ static void nf_ct_sack_block_adjust(struct sk_buff *skb,
/* TCP SACK sequence number adjustment */
static unsigned int nf_ct_sack_adjust(struct sk_buff *skb,
unsigned int protoff,
- struct tcphdr *tcph,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo)
{
- unsigned int dir, optoff, optend;
+ struct tcphdr *tcph = (void *)skb->data + protoff;
struct nf_conn_seqadj *seqadj = nfct_seqadj(ct);
+ unsigned int dir, optoff, optend;
optoff = protoff + sizeof(struct tcphdr);
optend = protoff + tcph->doff * 4;
@@ -128,6 +128,7 @@ static unsigned int nf_ct_sack_adjust(struct sk_buff *skb,
if (!skb_make_writable(skb, optend))
return 0;
+ tcph = (void *)skb->data + protoff;
dir = CTINFO2DIR(ctinfo);
while (optoff < optend) {
@@ -207,7 +208,7 @@ int nf_ct_seq_adjust(struct sk_buff *skb,
ntohl(newack));
tcph->ack_seq = newack;
- res = nf_ct_sack_adjust(skb, protoff, tcph, ct, ctinfo);
+ res = nf_ct_sack_adjust(skb, protoff, ct, ctinfo);
out:
spin_unlock_bh(&ct->lock);
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index d8125616edc7..e1537ace2b90 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -28,6 +28,7 @@ flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct,
{
struct flow_offload_tuple *ft = &flow->tuplehash[dir].tuple;
struct nf_conntrack_tuple *ctt = &ct->tuplehash[dir].tuple;
+ struct dst_entry *other_dst = route->tuple[!dir].dst;
struct dst_entry *dst = route->tuple[dir].dst;
ft->dir = dir;
@@ -50,8 +51,8 @@ flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct,
ft->src_port = ctt->src.u.tcp.port;
ft->dst_port = ctt->dst.u.tcp.port;
- ft->iifidx = route->tuple[dir].ifindex;
- ft->oifidx = route->tuple[!dir].ifindex;
+ ft->iifidx = other_dst->dev->ifindex;
+ ft->oifidx = dst->dev->ifindex;
ft->dst_cache = dst;
}
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index e2b196054dfc..2268b10a9dcf 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -117,7 +117,8 @@ int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int family)
dst = skb_dst(skb);
if (dst->xfrm)
dst = ((struct xfrm_dst *)dst)->route;
- dst_hold(dst);
+ if (!dst_hold_safe(dst))
+ return -EHOSTUNREACH;
if (sk && !net_eq(net, sock_net(sk)))
sk = NULL;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index fe0558b15fd3..ebfcfe1dcbdb 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -112,6 +112,23 @@ static void nft_trans_destroy(struct nft_trans *trans)
kfree(trans);
}
+static void nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set)
+{
+ struct net *net = ctx->net;
+ struct nft_trans *trans;
+
+ if (!nft_set_is_anonymous(set))
+ return;
+
+ list_for_each_entry_reverse(trans, &net->nft.commit_list, list) {
+ if (trans->msg_type == NFT_MSG_NEWSET &&
+ nft_trans_set(trans) == set) {
+ set->bound = true;
+ break;
+ }
+ }
+}
+
static int nf_tables_register_hook(struct net *net,
const struct nft_table *table,
struct nft_chain *chain)
@@ -222,14 +239,15 @@ static void nft_rule_expr_activate(const struct nft_ctx *ctx,
}
static void nft_rule_expr_deactivate(const struct nft_ctx *ctx,
- struct nft_rule *rule)
+ struct nft_rule *rule,
+ enum nft_trans_phase phase)
{
struct nft_expr *expr;
expr = nft_expr_first(rule);
while (expr != nft_expr_last(rule) && expr->ops) {
if (expr->ops->deactivate)
- expr->ops->deactivate(ctx, expr);
+ expr->ops->deactivate(ctx, expr, phase);
expr = nft_expr_next(expr);
}
@@ -280,7 +298,7 @@ static int nft_delrule(struct nft_ctx *ctx, struct nft_rule *rule)
nft_trans_destroy(trans);
return err;
}
- nft_rule_expr_deactivate(ctx, rule);
+ nft_rule_expr_deactivate(ctx, rule, NFT_TRANS_PREPARE);
return 0;
}
@@ -291,6 +309,9 @@ static int nft_delrule_by_chain(struct nft_ctx *ctx)
int err;
list_for_each_entry(rule, &ctx->chain->rules, list) {
+ if (!nft_is_active_next(ctx->net, rule))
+ continue;
+
err = nft_delrule(ctx, rule);
if (err < 0)
return err;
@@ -298,7 +319,7 @@ static int nft_delrule_by_chain(struct nft_ctx *ctx)
return 0;
}
-static int nft_trans_set_add(struct nft_ctx *ctx, int msg_type,
+static int nft_trans_set_add(const struct nft_ctx *ctx, int msg_type,
struct nft_set *set)
{
struct nft_trans *trans;
@@ -318,7 +339,7 @@ static int nft_trans_set_add(struct nft_ctx *ctx, int msg_type,
return 0;
}
-static int nft_delset(struct nft_ctx *ctx, struct nft_set *set)
+static int nft_delset(const struct nft_ctx *ctx, struct nft_set *set)
{
int err;
@@ -1199,7 +1220,8 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net,
if (nla_put_string(skb, NFTA_CHAIN_TYPE, basechain->type->name))
goto nla_put_failure;
- if (basechain->stats && nft_dump_stats(skb, basechain->stats))
+ if (rcu_access_pointer(basechain->stats) &&
+ nft_dump_stats(skb, rcu_dereference(basechain->stats)))
goto nla_put_failure;
}
@@ -1375,7 +1397,8 @@ static struct nft_stats __percpu *nft_stats_alloc(const struct nlattr *attr)
return newstats;
}
-static void nft_chain_stats_replace(struct nft_base_chain *chain,
+static void nft_chain_stats_replace(struct net *net,
+ struct nft_base_chain *chain,
struct nft_stats __percpu *newstats)
{
struct nft_stats __percpu *oldstats;
@@ -1383,8 +1406,9 @@ static void nft_chain_stats_replace(struct nft_base_chain *chain,
if (newstats == NULL)
return;
- if (chain->stats) {
- oldstats = nfnl_dereference(chain->stats, NFNL_SUBSYS_NFTABLES);
+ if (rcu_access_pointer(chain->stats)) {
+ oldstats = rcu_dereference_protected(chain->stats,
+ lockdep_commit_lock_is_held(net));
rcu_assign_pointer(chain->stats, newstats);
synchronize_rcu();
free_percpu(oldstats);
@@ -1421,9 +1445,10 @@ static void nf_tables_chain_destroy(struct nft_ctx *ctx)
struct nft_base_chain *basechain = nft_base_chain(chain);
module_put(basechain->type->owner);
- free_percpu(basechain->stats);
- if (basechain->stats)
+ if (rcu_access_pointer(basechain->stats)) {
static_branch_dec(&nft_counters_enabled);
+ free_percpu(rcu_dereference_raw(basechain->stats));
+ }
kfree(chain->name);
kfree(basechain);
} else {
@@ -1471,7 +1496,7 @@ static int nft_chain_parse_hook(struct net *net,
if (IS_ERR(type))
return PTR_ERR(type);
}
- if (!(type->hook_mask & (1 << hook->num)))
+ if (hook->num > NF_MAX_HOOKS || !(type->hook_mask & (1 << hook->num)))
return -EOPNOTSUPP;
if (type->type == NFT_CHAIN_T_NAT &&
@@ -1572,7 +1597,7 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
kfree(basechain);
return PTR_ERR(stats);
}
- basechain->stats = stats;
+ rcu_assign_pointer(basechain->stats, stats);
static_branch_inc(&nft_counters_enabled);
}
@@ -2088,9 +2113,11 @@ err1:
static void nf_tables_expr_destroy(const struct nft_ctx *ctx,
struct nft_expr *expr)
{
+ const struct nft_expr_type *type = expr->ops->type;
+
if (expr->ops->destroy)
expr->ops->destroy(ctx, expr);
- module_put(expr->ops->type->owner);
+ module_put(type->owner);
}
struct nft_expr *nft_expr_init(const struct nft_ctx *ctx,
@@ -2098,6 +2125,7 @@ struct nft_expr *nft_expr_init(const struct nft_ctx *ctx,
{
struct nft_expr_info info;
struct nft_expr *expr;
+ struct module *owner;
int err;
err = nf_tables_expr_parse(ctx, nla, &info);
@@ -2117,7 +2145,11 @@ struct nft_expr *nft_expr_init(const struct nft_ctx *ctx,
err3:
kfree(expr);
err2:
- module_put(info.ops->type->owner);
+ owner = info.ops->type->owner;
+ if (info.ops->type->release_ops)
+ info.ops->type->release_ops(info.ops);
+
+ module_put(owner);
err1:
return ERR_PTR(err);
}
@@ -2451,7 +2483,7 @@ static void nf_tables_rule_destroy(const struct nft_ctx *ctx,
static void nf_tables_rule_release(const struct nft_ctx *ctx,
struct nft_rule *rule)
{
- nft_rule_expr_deactivate(ctx, rule);
+ nft_rule_expr_deactivate(ctx, rule, NFT_TRANS_RELEASE);
nf_tables_rule_destroy(ctx, rule);
}
@@ -2687,8 +2719,11 @@ err2:
nf_tables_rule_release(&ctx, rule);
err1:
for (i = 0; i < n; i++) {
- if (info[i].ops != NULL)
+ if (info[i].ops) {
module_put(info[i].ops->type->owner);
+ if (info[i].ops->type->release_ops)
+ info[i].ops->type->release_ops(info[i].ops);
+ }
}
kvfree(info);
return err;
@@ -3555,19 +3590,15 @@ err1:
static void nft_set_destroy(struct nft_set *set)
{
+ if (WARN_ON(set->use > 0))
+ return;
+
set->ops->destroy(set);
module_put(to_set_type(set->ops)->owner);
kfree(set->name);
kvfree(set);
}
-static void nf_tables_set_destroy(const struct nft_ctx *ctx, struct nft_set *set)
-{
- list_del_rcu(&set->list);
- nf_tables_set_notify(ctx, set, NFT_MSG_DELSET, GFP_ATOMIC);
- nft_set_destroy(set);
-}
-
static int nf_tables_delset(struct net *net, struct sock *nlsk,
struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[],
@@ -3602,7 +3633,7 @@ static int nf_tables_delset(struct net *net, struct sock *nlsk,
NL_SET_BAD_ATTR(extack, attr);
return PTR_ERR(set);
}
- if (!list_empty(&set->bindings) ||
+ if (set->use ||
(nlh->nlmsg_flags & NLM_F_NONREC && atomic_read(&set->nelems) > 0)) {
NL_SET_BAD_ATTR(extack, attr);
return -EBUSY;
@@ -3632,6 +3663,9 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
struct nft_set_binding *i;
struct nft_set_iter iter;
+ if (set->use == UINT_MAX)
+ return -EOVERFLOW;
+
if (!list_empty(&set->bindings) && nft_set_is_anonymous(set))
return -EBUSY;
@@ -3658,21 +3692,53 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
bind:
binding->chain = ctx->chain;
list_add_tail_rcu(&binding->list, &set->bindings);
+ nft_set_trans_bind(ctx, set);
+ set->use++;
+
return 0;
}
EXPORT_SYMBOL_GPL(nf_tables_bind_set);
void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
- struct nft_set_binding *binding)
+ struct nft_set_binding *binding, bool event)
{
list_del_rcu(&binding->list);
- if (list_empty(&set->bindings) && nft_set_is_anonymous(set) &&
- nft_is_active(ctx->net, set))
- nf_tables_set_destroy(ctx, set);
+ if (list_empty(&set->bindings) && nft_set_is_anonymous(set)) {
+ list_del_rcu(&set->list);
+ if (event)
+ nf_tables_set_notify(ctx, set, NFT_MSG_DELSET,
+ GFP_KERNEL);
+ }
}
EXPORT_SYMBOL_GPL(nf_tables_unbind_set);
+void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set,
+ struct nft_set_binding *binding,
+ enum nft_trans_phase phase)
+{
+ switch (phase) {
+ case NFT_TRANS_PREPARE:
+ set->use--;
+ return;
+ case NFT_TRANS_ABORT:
+ case NFT_TRANS_RELEASE:
+ set->use--;
+ /* fall through */
+ default:
+ nf_tables_unbind_set(ctx, set, binding,
+ phase == NFT_TRANS_COMMIT);
+ }
+}
+EXPORT_SYMBOL_GPL(nf_tables_deactivate_set);
+
+void nf_tables_destroy_set(const struct nft_ctx *ctx, struct nft_set *set)
+{
+ if (list_empty(&set->bindings) && nft_set_is_anonymous(set))
+ nft_set_destroy(set);
+}
+EXPORT_SYMBOL_GPL(nf_tables_destroy_set);
+
const struct nft_set_ext_type nft_set_ext_types[] = {
[NFT_SET_EXT_KEY] = {
.align = __alignof__(u32),
@@ -4435,6 +4501,8 @@ err6:
err5:
kfree(trans);
err4:
+ if (obj)
+ obj->use--;
kfree(elem.priv);
err3:
if (nla[NFTA_SET_ELEM_DATA] != NULL)
@@ -6145,7 +6213,8 @@ static void nft_chain_commit_update(struct nft_trans *trans)
return;
basechain = nft_base_chain(trans->ctx.chain);
- nft_chain_stats_replace(basechain, nft_trans_chain_stats(trans));
+ nft_chain_stats_replace(trans->ctx.net, basechain,
+ nft_trans_chain_stats(trans));
switch (nft_trans_chain_policy(trans)) {
case NF_DROP:
@@ -6419,6 +6488,9 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
nf_tables_rule_notify(&trans->ctx,
nft_trans_rule(trans),
NFT_MSG_DELRULE);
+ nft_rule_expr_deactivate(&trans->ctx,
+ nft_trans_rule(trans),
+ NFT_TRANS_COMMIT);
break;
case NFT_MSG_NEWSET:
nft_clear(net, nft_trans_set(trans));
@@ -6567,7 +6639,9 @@ static int __nf_tables_abort(struct net *net)
case NFT_MSG_NEWRULE:
trans->ctx.chain->use--;
list_del_rcu(&nft_trans_rule(trans)->list);
- nft_rule_expr_deactivate(&trans->ctx, nft_trans_rule(trans));
+ nft_rule_expr_deactivate(&trans->ctx,
+ nft_trans_rule(trans),
+ NFT_TRANS_ABORT);
break;
case NFT_MSG_DELRULE:
trans->ctx.chain->use++;
@@ -6577,6 +6651,10 @@ static int __nf_tables_abort(struct net *net)
break;
case NFT_MSG_NEWSET:
trans->ctx.table->use--;
+ if (nft_trans_set(trans)->bound) {
+ nft_trans_destroy(trans);
+ break;
+ }
list_del_rcu(&nft_trans_set(trans)->list);
break;
case NFT_MSG_DELSET:
@@ -6585,8 +6663,11 @@ static int __nf_tables_abort(struct net *net)
nft_trans_destroy(trans);
break;
case NFT_MSG_NEWSETELEM:
+ if (nft_trans_elem_set(trans)->bound) {
+ nft_trans_destroy(trans);
+ break;
+ }
te = (struct nft_trans_elem *)trans->data;
-
te->set->ops->remove(net, te->set, &te->elem);
atomic_dec(&te->set->nelems);
break;
@@ -7193,9 +7274,6 @@ static void __nft_release_tables(struct net *net)
list_for_each_entry(chain, &table->chains, list)
nf_tables_unregister_hook(net, table, chain);
- list_for_each_entry(flowtable, &table->flowtables, list)
- nf_unregister_net_hooks(net, flowtable->ops,
- flowtable->ops_len);
/* No packets are walking on these chains anymore. */
ctx.table = table;
list_for_each_entry(chain, &table->chains, list) {
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index ffd5c0f9412b..a3850414dba2 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -98,21 +98,23 @@ static noinline void nft_update_chain_stats(const struct nft_chain *chain,
const struct nft_pktinfo *pkt)
{
struct nft_base_chain *base_chain;
+ struct nft_stats __percpu *pstats;
struct nft_stats *stats;
base_chain = nft_base_chain(chain);
- if (!base_chain->stats)
- return;
- local_bh_disable();
- stats = this_cpu_ptr(rcu_dereference(base_chain->stats));
- if (stats) {
+ rcu_read_lock();
+ pstats = READ_ONCE(base_chain->stats);
+ if (pstats) {
+ local_bh_disable();
+ stats = this_cpu_ptr(pstats);
u64_stats_update_begin(&stats->syncp);
stats->pkts++;
stats->bytes += pkt->skb->len;
u64_stats_update_end(&stats->syncp);
+ local_bh_enable();
}
- local_bh_enable();
+ rcu_read_unlock();
}
struct nft_jumpstack {
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index a30f8ba4b89a..70a7382b9787 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -392,7 +392,8 @@ err:
static int
cttimeout_default_fill_info(struct net *net, struct sk_buff *skb, u32 portid,
u32 seq, u32 type, int event,
- const struct nf_conntrack_l4proto *l4proto)
+ const struct nf_conntrack_l4proto *l4proto,
+ const unsigned int *timeouts)
{
struct nlmsghdr *nlh;
struct nfgenmsg *nfmsg;
@@ -421,7 +422,7 @@ cttimeout_default_fill_info(struct net *net, struct sk_buff *skb, u32 portid,
if (!nest_parms)
goto nla_put_failure;
- ret = l4proto->ctnl_timeout.obj_to_nlattr(skb, NULL);
+ ret = l4proto->ctnl_timeout.obj_to_nlattr(skb, timeouts);
if (ret < 0)
goto nla_put_failure;
@@ -444,6 +445,7 @@ static int cttimeout_default_get(struct net *net, struct sock *ctnl,
struct netlink_ext_ack *extack)
{
const struct nf_conntrack_l4proto *l4proto;
+ unsigned int *timeouts = NULL;
struct sk_buff *skb2;
int ret, err;
__u16 l3num;
@@ -456,12 +458,55 @@ static int cttimeout_default_get(struct net *net, struct sock *ctnl,
l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]);
l4proto = nf_ct_l4proto_find_get(l3num, l4num);
- /* This protocol is not supported, skip. */
- if (l4proto->l4proto != l4num) {
- err = -EOPNOTSUPP;
+ err = -EOPNOTSUPP;
+ if (l4proto->l4proto != l4num)
goto err;
+
+ switch (l4proto->l4proto) {
+ case IPPROTO_ICMP:
+ timeouts = &net->ct.nf_ct_proto.icmp.timeout;
+ break;
+ case IPPROTO_TCP:
+ timeouts = net->ct.nf_ct_proto.tcp.timeouts;
+ break;
+ case IPPROTO_UDP: /* fallthrough */
+ case IPPROTO_UDPLITE:
+ timeouts = net->ct.nf_ct_proto.udp.timeouts;
+ break;
+ case IPPROTO_DCCP:
+#ifdef CONFIG_NF_CT_PROTO_DCCP
+ timeouts = net->ct.nf_ct_proto.dccp.dccp_timeout;
+#endif
+ break;
+ case IPPROTO_ICMPV6:
+ timeouts = &net->ct.nf_ct_proto.icmpv6.timeout;
+ break;
+ case IPPROTO_SCTP:
+#ifdef CONFIG_NF_CT_PROTO_SCTP
+ timeouts = net->ct.nf_ct_proto.sctp.timeouts;
+#endif
+ break;
+ case IPPROTO_GRE:
+#ifdef CONFIG_NF_CT_PROTO_GRE
+ if (l4proto->net_id) {
+ struct netns_proto_gre *net_gre;
+
+ net_gre = net_generic(net, *l4proto->net_id);
+ timeouts = net_gre->gre_timeouts;
+ }
+#endif
+ break;
+ case 255:
+ timeouts = &net->ct.nf_ct_proto.generic.timeout;
+ break;
+ default:
+ WARN_ONCE(1, "Missing timeouts for proto %d", l4proto->l4proto);
+ break;
}
+ if (!timeouts)
+ goto err;
+
skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (skb2 == NULL) {
err = -ENOMEM;
@@ -472,7 +517,7 @@ static int cttimeout_default_get(struct net *net, struct sock *ctnl,
nlh->nlmsg_seq,
NFNL_MSG_TYPE(nlh->nlmsg_type),
IPCTNL_MSG_TIMEOUT_DEFAULT_SET,
- l4proto);
+ l4proto, timeouts);
if (ret <= 0) {
kfree_skb(skb2);
err = -ENOMEM;
diff --git a/net/netfilter/nfnetlink_osf.c b/net/netfilter/nfnetlink_osf.c
index 00db27dfd2ff..b0bc130947c9 100644
--- a/net/netfilter/nfnetlink_osf.c
+++ b/net/netfilter/nfnetlink_osf.c
@@ -71,6 +71,7 @@ static bool nf_osf_match_one(const struct sk_buff *skb,
int ttl_check,
struct nf_osf_hdr_ctx *ctx)
{
+ const __u8 *optpinit = ctx->optp;
unsigned int check_WSS = 0;
int fmatch = FMATCH_WRONG;
int foptsize, optnum;
@@ -160,6 +161,9 @@ static bool nf_osf_match_one(const struct sk_buff *skb,
}
}
+ if (fmatch != FMATCH_OK)
+ ctx->optp = optpinit;
+
return fmatch == FMATCH_OK;
}
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index 29d6fc73caf9..1245e02239d9 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -23,19 +23,6 @@
#include <linux/netfilter_arp/arp_tables.h>
#include <net/netfilter/nf_tables.h>
-struct nft_xt {
- struct list_head head;
- struct nft_expr_ops ops;
- unsigned int refcnt;
-
- /* Unlike other expressions, ops doesn't have static storage duration.
- * nft core assumes they do. We use kfree_rcu so that nft core can
- * can check expr->ops->size even after nft_compat->destroy() frees
- * the nft_xt struct that holds the ops structure.
- */
- struct rcu_head rcu_head;
-};
-
/* Used for matches where *info is larger than X byte */
#define NFT_MATCH_LARGE_THRESH 192
@@ -43,17 +30,6 @@ struct nft_xt_match_priv {
void *info;
};
-static bool nft_xt_put(struct nft_xt *xt)
-{
- if (--xt->refcnt == 0) {
- list_del(&xt->head);
- kfree_rcu(xt, rcu_head);
- return true;
- }
-
- return false;
-}
-
static int nft_compat_chain_validate_dependency(const struct nft_ctx *ctx,
const char *tablename)
{
@@ -248,7 +224,6 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
struct xt_target *target = expr->ops->data;
struct xt_tgchk_param par;
size_t size = XT_ALIGN(nla_len(tb[NFTA_TARGET_INFO]));
- struct nft_xt *nft_xt;
u16 proto = 0;
bool inv = false;
union nft_entry e = {};
@@ -272,8 +247,6 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
if (!target->target)
return -EINVAL;
- nft_xt = container_of(expr->ops, struct nft_xt, ops);
- nft_xt->refcnt++;
return 0;
}
@@ -282,6 +255,7 @@ nft_target_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
{
struct xt_target *target = expr->ops->data;
void *info = nft_expr_priv(expr);
+ struct module *me = target->me;
struct xt_tgdtor_param par;
par.net = ctx->net;
@@ -291,8 +265,8 @@ nft_target_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
if (par.target->destroy != NULL)
par.target->destroy(&par);
- if (nft_xt_put(container_of(expr->ops, struct nft_xt, ops)))
- module_put(target->me);
+ module_put(me);
+ kfree(expr->ops);
}
static int nft_target_dump(struct sk_buff *skb, const struct nft_expr *expr)
@@ -446,7 +420,6 @@ __nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
struct xt_match *match = expr->ops->data;
struct xt_mtchk_param par;
size_t size = XT_ALIGN(nla_len(tb[NFTA_MATCH_INFO]));
- struct nft_xt *nft_xt;
u16 proto = 0;
bool inv = false;
union nft_entry e = {};
@@ -462,13 +435,7 @@ __nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
nft_match_set_mtchk_param(&par, ctx, match, info, &e, proto, inv);
- ret = xt_check_match(&par, size, proto, inv);
- if (ret < 0)
- return ret;
-
- nft_xt = container_of(expr->ops, struct nft_xt, ops);
- nft_xt->refcnt++;
- return 0;
+ return xt_check_match(&par, size, proto, inv);
}
static int
@@ -511,8 +478,8 @@ __nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr,
if (par.match->destroy != NULL)
par.match->destroy(&par);
- if (nft_xt_put(container_of(expr->ops, struct nft_xt, ops)))
- module_put(me);
+ module_put(me);
+ kfree(expr->ops);
}
static void
@@ -714,22 +681,13 @@ static const struct nfnetlink_subsystem nfnl_compat_subsys = {
.cb = nfnl_nft_compat_cb,
};
-static LIST_HEAD(nft_match_list);
-
static struct nft_expr_type nft_match_type;
-static bool nft_match_cmp(const struct xt_match *match,
- const char *name, u32 rev, u32 family)
-{
- return strcmp(match->name, name) == 0 && match->revision == rev &&
- (match->family == NFPROTO_UNSPEC || match->family == family);
-}
-
static const struct nft_expr_ops *
nft_match_select_ops(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
{
- struct nft_xt *nft_match;
+ struct nft_expr_ops *ops;
struct xt_match *match;
unsigned int matchsize;
char *mt_name;
@@ -745,14 +703,6 @@ nft_match_select_ops(const struct nft_ctx *ctx,
rev = ntohl(nla_get_be32(tb[NFTA_MATCH_REV]));
family = ctx->family;
- /* Re-use the existing match if it's already loaded. */
- list_for_each_entry(nft_match, &nft_match_list, head) {
- struct xt_match *match = nft_match->ops.data;
-
- if (nft_match_cmp(match, mt_name, rev, family))
- return &nft_match->ops;
- }
-
match = xt_request_find_match(family, mt_name, rev);
if (IS_ERR(match))
return ERR_PTR(-ENOENT);
@@ -762,66 +712,62 @@ nft_match_select_ops(const struct nft_ctx *ctx,
goto err;
}
- /* This is the first time we use this match, allocate operations */
- nft_match = kzalloc(sizeof(struct nft_xt), GFP_KERNEL);
- if (nft_match == NULL) {
+ ops = kzalloc(sizeof(struct nft_expr_ops), GFP_KERNEL);
+ if (!ops) {
err = -ENOMEM;
goto err;
}
- nft_match->refcnt = 0;
- nft_match->ops.type = &nft_match_type;
- nft_match->ops.eval = nft_match_eval;
- nft_match->ops.init = nft_match_init;
- nft_match->ops.destroy = nft_match_destroy;
- nft_match->ops.dump = nft_match_dump;
- nft_match->ops.validate = nft_match_validate;
- nft_match->ops.data = match;
+ ops->type = &nft_match_type;
+ ops->eval = nft_match_eval;
+ ops->init = nft_match_init;
+ ops->destroy = nft_match_destroy;
+ ops->dump = nft_match_dump;
+ ops->validate = nft_match_validate;
+ ops->data = match;
matchsize = NFT_EXPR_SIZE(XT_ALIGN(match->matchsize));
if (matchsize > NFT_MATCH_LARGE_THRESH) {
matchsize = NFT_EXPR_SIZE(sizeof(struct nft_xt_match_priv));
- nft_match->ops.eval = nft_match_large_eval;
- nft_match->ops.init = nft_match_large_init;
- nft_match->ops.destroy = nft_match_large_destroy;
- nft_match->ops.dump = nft_match_large_dump;
+ ops->eval = nft_match_large_eval;
+ ops->init = nft_match_large_init;
+ ops->destroy = nft_match_large_destroy;
+ ops->dump = nft_match_large_dump;
}
- nft_match->ops.size = matchsize;
-
- list_add(&nft_match->head, &nft_match_list);
+ ops->size = matchsize;
- return &nft_match->ops;
+ return ops;
err:
module_put(match->me);
return ERR_PTR(err);
}
+static void nft_match_release_ops(const struct nft_expr_ops *ops)
+{
+ struct xt_match *match = ops->data;
+
+ module_put(match->me);
+ kfree(ops);
+}
+
static struct nft_expr_type nft_match_type __read_mostly = {
.name = "match",
.select_ops = nft_match_select_ops,
+ .release_ops = nft_match_release_ops,
.policy = nft_match_policy,
.maxattr = NFTA_MATCH_MAX,
.owner = THIS_MODULE,
};
-static LIST_HEAD(nft_target_list);
-
static struct nft_expr_type nft_target_type;
-static bool nft_target_cmp(const struct xt_target *tg,
- const char *name, u32 rev, u32 family)
-{
- return strcmp(tg->name, name) == 0 && tg->revision == rev &&
- (tg->family == NFPROTO_UNSPEC || tg->family == family);
-}
-
static const struct nft_expr_ops *
nft_target_select_ops(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
{
- struct nft_xt *nft_target;
+ struct nft_expr_ops *ops;
struct xt_target *target;
char *tg_name;
u32 rev, family;
@@ -841,17 +787,6 @@ nft_target_select_ops(const struct nft_ctx *ctx,
strcmp(tg_name, "standard") == 0)
return ERR_PTR(-EINVAL);
- /* Re-use the existing target if it's already loaded. */
- list_for_each_entry(nft_target, &nft_target_list, head) {
- struct xt_target *target = nft_target->ops.data;
-
- if (!target->target)
- continue;
-
- if (nft_target_cmp(target, tg_name, rev, family))
- return &nft_target->ops;
- }
-
target = xt_request_find_target(family, tg_name, rev);
if (IS_ERR(target))
return ERR_PTR(-ENOENT);
@@ -866,38 +801,43 @@ nft_target_select_ops(const struct nft_ctx *ctx,
goto err;
}
- /* This is the first time we use this target, allocate operations */
- nft_target = kzalloc(sizeof(struct nft_xt), GFP_KERNEL);
- if (nft_target == NULL) {
+ ops = kzalloc(sizeof(struct nft_expr_ops), GFP_KERNEL);
+ if (!ops) {
err = -ENOMEM;
goto err;
}
- nft_target->refcnt = 0;
- nft_target->ops.type = &nft_target_type;
- nft_target->ops.size = NFT_EXPR_SIZE(XT_ALIGN(target->targetsize));
- nft_target->ops.init = nft_target_init;
- nft_target->ops.destroy = nft_target_destroy;
- nft_target->ops.dump = nft_target_dump;
- nft_target->ops.validate = nft_target_validate;
- nft_target->ops.data = target;
+ ops->type = &nft_target_type;
+ ops->size = NFT_EXPR_SIZE(XT_ALIGN(target->targetsize));
+ ops->init = nft_target_init;
+ ops->destroy = nft_target_destroy;
+ ops->dump = nft_target_dump;
+ ops->validate = nft_target_validate;
+ ops->data = target;
if (family == NFPROTO_BRIDGE)
- nft_target->ops.eval = nft_target_eval_bridge;
+ ops->eval = nft_target_eval_bridge;
else
- nft_target->ops.eval = nft_target_eval_xt;
-
- list_add(&nft_target->head, &nft_target_list);
+ ops->eval = nft_target_eval_xt;
- return &nft_target->ops;
+ return ops;
err:
module_put(target->me);
return ERR_PTR(err);
}
+static void nft_target_release_ops(const struct nft_expr_ops *ops)
+{
+ struct xt_target *target = ops->data;
+
+ module_put(target->me);
+ kfree(ops);
+}
+
static struct nft_expr_type nft_target_type __read_mostly = {
.name = "target",
.select_ops = nft_target_select_ops,
+ .release_ops = nft_target_release_ops,
.policy = nft_target_policy,
.maxattr = NFTA_TARGET_MAX,
.owner = THIS_MODULE,
@@ -922,7 +862,6 @@ static int __init nft_compat_module_init(void)
}
return ret;
-
err_target:
nft_unregister_expr(&nft_target_type);
err_match:
@@ -932,32 +871,6 @@ err_match:
static void __exit nft_compat_module_exit(void)
{
- struct nft_xt *xt, *next;
-
- /* list should be empty here, it can be non-empty only in case there
- * was an error that caused nft_xt expr to not be initialized fully
- * and noone else requested the same expression later.
- *
- * In this case, the lists contain 0-refcount entries that still
- * hold module reference.
- */
- list_for_each_entry_safe(xt, next, &nft_target_list, head) {
- struct xt_target *target = xt->ops.data;
-
- if (WARN_ON_ONCE(xt->refcnt))
- continue;
- module_put(target->me);
- kfree(xt);
- }
-
- list_for_each_entry_safe(xt, next, &nft_match_list, head) {
- struct xt_match *match = xt->ops.data;
-
- if (WARN_ON_ONCE(xt->refcnt))
- continue;
- module_put(match->me);
- kfree(xt);
- }
nfnetlink_subsys_unregister(&nfnl_compat_subsys);
nft_unregister_expr(&nft_target_type);
nft_unregister_expr(&nft_match_type);
diff --git a/net/netfilter/nft_connlimit.c b/net/netfilter/nft_connlimit.c
index b90d96ba4a12..af1497ab9464 100644
--- a/net/netfilter/nft_connlimit.c
+++ b/net/netfilter/nft_connlimit.c
@@ -30,7 +30,6 @@ static inline void nft_connlimit_do_eval(struct nft_connlimit *priv,
enum ip_conntrack_info ctinfo;
const struct nf_conn *ct;
unsigned int count;
- bool addit;
tuple_ptr = &tuple;
@@ -44,19 +43,12 @@ static inline void nft_connlimit_do_eval(struct nft_connlimit *priv,
return;
}
- nf_conncount_lookup(nft_net(pkt), &priv->list, tuple_ptr, zone,
- &addit);
- count = priv->list.count;
-
- if (!addit)
- goto out;
-
- if (nf_conncount_add(&priv->list, tuple_ptr, zone) == NF_CONNCOUNT_ERR) {
+ if (nf_conncount_add(nft_net(pkt), &priv->list, tuple_ptr, zone)) {
regs->verdict.code = NF_DROP;
return;
}
- count++;
-out:
+
+ count = priv->list.count;
if ((count > priv->limit) ^ priv->invert) {
regs->verdict.code = NFT_BREAK;
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index 6e91a37d57f2..eb7f9a5f2aeb 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -235,14 +235,32 @@ err1:
return err;
}
+static void nft_dynset_deactivate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ enum nft_trans_phase phase)
+{
+ struct nft_dynset *priv = nft_expr_priv(expr);
+
+ nf_tables_deactivate_set(ctx, priv->set, &priv->binding, phase);
+}
+
+static void nft_dynset_activate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr)
+{
+ struct nft_dynset *priv = nft_expr_priv(expr);
+
+ priv->set->use++;
+}
+
static void nft_dynset_destroy(const struct nft_ctx *ctx,
const struct nft_expr *expr)
{
struct nft_dynset *priv = nft_expr_priv(expr);
- nf_tables_unbind_set(ctx, priv->set, &priv->binding);
if (priv->expr != NULL)
nft_expr_destroy(ctx, priv->expr);
+
+ nf_tables_destroy_set(ctx, priv->set);
}
static int nft_dynset_dump(struct sk_buff *skb, const struct nft_expr *expr)
@@ -279,6 +297,8 @@ static const struct nft_expr_ops nft_dynset_ops = {
.eval = nft_dynset_eval,
.init = nft_dynset_init,
.destroy = nft_dynset_destroy,
+ .activate = nft_dynset_activate,
+ .deactivate = nft_dynset_deactivate,
.dump = nft_dynset_dump,
};
diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
index 5fd4c57c79cc..436cc14cfc59 100644
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -12,6 +12,7 @@
#include <net/netfilter/nf_conntrack_core.h>
#include <linux/netfilter/nf_conntrack_common.h>
#include <net/netfilter/nf_flow_table.h>
+#include <net/netfilter/nf_conntrack_helper.h>
struct nft_flow_offload {
struct nft_flowtable *flowtable;
@@ -29,10 +30,12 @@ static int nft_flow_route(const struct nft_pktinfo *pkt,
memset(&fl, 0, sizeof(fl));
switch (nft_pf(pkt)) {
case NFPROTO_IPV4:
- fl.u.ip4.daddr = ct->tuplehash[!dir].tuple.dst.u3.ip;
+ fl.u.ip4.daddr = ct->tuplehash[dir].tuple.src.u3.ip;
+ fl.u.ip4.flowi4_oif = nft_in(pkt)->ifindex;
break;
case NFPROTO_IPV6:
- fl.u.ip6.daddr = ct->tuplehash[!dir].tuple.dst.u3.in6;
+ fl.u.ip6.daddr = ct->tuplehash[dir].tuple.src.u3.in6;
+ fl.u.ip6.flowi6_oif = nft_in(pkt)->ifindex;
break;
}
@@ -41,9 +44,7 @@ static int nft_flow_route(const struct nft_pktinfo *pkt,
return -ENOENT;
route->tuple[dir].dst = this_dst;
- route->tuple[dir].ifindex = nft_in(pkt)->ifindex;
route->tuple[!dir].dst = other_dst;
- route->tuple[!dir].ifindex = nft_out(pkt)->ifindex;
return 0;
}
@@ -66,6 +67,7 @@ static void nft_flow_offload_eval(const struct nft_expr *expr,
{
struct nft_flow_offload *priv = nft_expr_priv(expr);
struct nf_flowtable *flowtable = &priv->flowtable->data;
+ const struct nf_conn_help *help;
enum ip_conntrack_info ctinfo;
struct nf_flow_route route;
struct flow_offload *flow;
@@ -88,7 +90,8 @@ static void nft_flow_offload_eval(const struct nft_expr *expr,
goto out;
}
- if (test_bit(IPS_HELPER_BIT, &ct->status))
+ help = nfct_help(ct);
+ if (help)
goto out;
if (ctinfo == IP_CT_NEW ||
diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c
index 0777a93211e2..3f6d1d2a6281 100644
--- a/net/netfilter/nft_immediate.c
+++ b/net/netfilter/nft_immediate.c
@@ -72,10 +72,14 @@ static void nft_immediate_activate(const struct nft_ctx *ctx,
}
static void nft_immediate_deactivate(const struct nft_ctx *ctx,
- const struct nft_expr *expr)
+ const struct nft_expr *expr,
+ enum nft_trans_phase phase)
{
const struct nft_immediate_expr *priv = nft_expr_priv(expr);
+ if (phase == NFT_TRANS_COMMIT)
+ return;
+
return nft_data_release(&priv->data, nft_dreg_to_type(priv->dreg));
}
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index ad13e8643599..161c3451a747 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -121,12 +121,29 @@ static int nft_lookup_init(const struct nft_ctx *ctx,
return 0;
}
+static void nft_lookup_deactivate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ enum nft_trans_phase phase)
+{
+ struct nft_lookup *priv = nft_expr_priv(expr);
+
+ nf_tables_deactivate_set(ctx, priv->set, &priv->binding, phase);
+}
+
+static void nft_lookup_activate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr)
+{
+ struct nft_lookup *priv = nft_expr_priv(expr);
+
+ priv->set->use++;
+}
+
static void nft_lookup_destroy(const struct nft_ctx *ctx,
const struct nft_expr *expr)
{
struct nft_lookup *priv = nft_expr_priv(expr);
- nf_tables_unbind_set(ctx, priv->set, &priv->binding);
+ nf_tables_destroy_set(ctx, priv->set);
}
static int nft_lookup_dump(struct sk_buff *skb, const struct nft_expr *expr)
@@ -209,6 +226,8 @@ static const struct nft_expr_ops nft_lookup_ops = {
.size = NFT_EXPR_SIZE(sizeof(struct nft_lookup)),
.eval = nft_lookup_eval,
.init = nft_lookup_init,
+ .activate = nft_lookup_activate,
+ .deactivate = nft_lookup_deactivate,
.destroy = nft_lookup_destroy,
.dump = nft_lookup_dump,
.validate = nft_lookup_validate,
diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c
index cdf348f751ec..bf92a40dd1b2 100644
--- a/net/netfilter/nft_objref.c
+++ b/net/netfilter/nft_objref.c
@@ -64,21 +64,34 @@ nla_put_failure:
return -1;
}
-static void nft_objref_destroy(const struct nft_ctx *ctx,
- const struct nft_expr *expr)
+static void nft_objref_deactivate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ enum nft_trans_phase phase)
{
struct nft_object *obj = nft_objref_priv(expr);
+ if (phase == NFT_TRANS_COMMIT)
+ return;
+
obj->use--;
}
+static void nft_objref_activate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr)
+{
+ struct nft_object *obj = nft_objref_priv(expr);
+
+ obj->use++;
+}
+
static struct nft_expr_type nft_objref_type;
static const struct nft_expr_ops nft_objref_ops = {
.type = &nft_objref_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_object *)),
.eval = nft_objref_eval,
.init = nft_objref_init,
- .destroy = nft_objref_destroy,
+ .activate = nft_objref_activate,
+ .deactivate = nft_objref_deactivate,
.dump = nft_objref_dump,
};
@@ -155,12 +168,29 @@ nla_put_failure:
return -1;
}
+static void nft_objref_map_deactivate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ enum nft_trans_phase phase)
+{
+ struct nft_objref_map *priv = nft_expr_priv(expr);
+
+ nf_tables_deactivate_set(ctx, priv->set, &priv->binding, phase);
+}
+
+static void nft_objref_map_activate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr)
+{
+ struct nft_objref_map *priv = nft_expr_priv(expr);
+
+ priv->set->use++;
+}
+
static void nft_objref_map_destroy(const struct nft_ctx *ctx,
const struct nft_expr *expr)
{
struct nft_objref_map *priv = nft_expr_priv(expr);
- nf_tables_unbind_set(ctx, priv->set, &priv->binding);
+ nf_tables_destroy_set(ctx, priv->set);
}
static struct nft_expr_type nft_objref_type;
@@ -169,6 +199,8 @@ static const struct nft_expr_ops nft_objref_map_ops = {
.size = NFT_EXPR_SIZE(sizeof(struct nft_objref_map)),
.eval = nft_objref_map_eval,
.init = nft_objref_map_init,
+ .activate = nft_objref_map_activate,
+ .deactivate = nft_objref_map_deactivate,
.destroy = nft_objref_map_destroy,
.dump = nft_objref_map_dump,
};
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index 0e5ec126f6ad..b3e75f9cb686 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -302,10 +302,6 @@ static void *nft_rbtree_deactivate(const struct net *net,
else if (d > 0)
parent = parent->rb_right;
else {
- if (!nft_set_elem_active(&rbe->ext, genmask)) {
- parent = parent->rb_left;
- continue;
- }
if (nft_rbtree_interval_end(rbe) &&
!nft_rbtree_interval_end(this)) {
parent = parent->rb_left;
@@ -314,6 +310,9 @@ static void *nft_rbtree_deactivate(const struct net *net,
nft_rbtree_interval_end(this)) {
parent = parent->rb_right;
continue;
+ } else if (!nft_set_elem_active(&rbe->ext, genmask)) {
+ parent = parent->rb_left;
+ continue;
}
nft_rbtree_flush(net, set, rbe);
return rbe;
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index aecadd471e1d..13e1ac333fa4 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1899,7 +1899,7 @@ static int __init xt_init(void)
seqcount_init(&per_cpu(xt_recseq, i));
}
- xt = kmalloc_array(NFPROTO_NUMPROTO, sizeof(struct xt_af), GFP_KERNEL);
+ xt = kcalloc(NFPROTO_NUMPROTO, sizeof(struct xt_af), GFP_KERNEL);
if (!xt)
return -ENOMEM;
diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c
index 0d0d68c989df..1dae02a97ee3 100644
--- a/net/netfilter/xt_TEE.c
+++ b/net/netfilter/xt_TEE.c
@@ -14,6 +14,8 @@
#include <linux/skbuff.h>
#include <linux/route.h>
#include <linux/netfilter/x_tables.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
#include <net/route.h>
#include <net/netfilter/ipv4/nf_dup_ipv4.h>
#include <net/netfilter/ipv6/nf_dup_ipv6.h>
@@ -25,8 +27,15 @@ struct xt_tee_priv {
int oif;
};
+static unsigned int tee_net_id __read_mostly;
static const union nf_inet_addr tee_zero_address;
+struct tee_net {
+ struct list_head priv_list;
+ /* lock protects the priv_list */
+ struct mutex lock;
+};
+
static unsigned int
tee_tg4(struct sk_buff *skb, const struct xt_action_param *par)
{
@@ -51,17 +60,16 @@ tee_tg6(struct sk_buff *skb, const struct xt_action_param *par)
}
#endif
-static DEFINE_MUTEX(priv_list_mutex);
-static LIST_HEAD(priv_list);
-
static int tee_netdev_event(struct notifier_block *this, unsigned long event,
void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct net *net = dev_net(dev);
+ struct tee_net *tn = net_generic(net, tee_net_id);
struct xt_tee_priv *priv;
- mutex_lock(&priv_list_mutex);
- list_for_each_entry(priv, &priv_list, list) {
+ mutex_lock(&tn->lock);
+ list_for_each_entry(priv, &tn->priv_list, list) {
switch (event) {
case NETDEV_REGISTER:
if (!strcmp(dev->name, priv->tginfo->oif))
@@ -79,13 +87,14 @@ static int tee_netdev_event(struct notifier_block *this, unsigned long event,
break;
}
}
- mutex_unlock(&priv_list_mutex);
+ mutex_unlock(&tn->lock);
return NOTIFY_DONE;
}
static int tee_tg_check(const struct xt_tgchk_param *par)
{
+ struct tee_net *tn = net_generic(par->net, tee_net_id);
struct xt_tee_tginfo *info = par->targinfo;
struct xt_tee_priv *priv;
@@ -95,6 +104,8 @@ static int tee_tg_check(const struct xt_tgchk_param *par)
return -EINVAL;
if (info->oif[0]) {
+ struct net_device *dev;
+
if (info->oif[sizeof(info->oif)-1] != '\0')
return -EINVAL;
@@ -106,9 +117,14 @@ static int tee_tg_check(const struct xt_tgchk_param *par)
priv->oif = -1;
info->priv = priv;
- mutex_lock(&priv_list_mutex);
- list_add(&priv->list, &priv_list);
- mutex_unlock(&priv_list_mutex);
+ dev = dev_get_by_name(par->net, info->oif);
+ if (dev) {
+ priv->oif = dev->ifindex;
+ dev_put(dev);
+ }
+ mutex_lock(&tn->lock);
+ list_add(&priv->list, &tn->priv_list);
+ mutex_unlock(&tn->lock);
} else
info->priv = NULL;
@@ -118,12 +134,13 @@ static int tee_tg_check(const struct xt_tgchk_param *par)
static void tee_tg_destroy(const struct xt_tgdtor_param *par)
{
+ struct tee_net *tn = net_generic(par->net, tee_net_id);
struct xt_tee_tginfo *info = par->targinfo;
if (info->priv) {
- mutex_lock(&priv_list_mutex);
+ mutex_lock(&tn->lock);
list_del(&info->priv->list);
- mutex_unlock(&priv_list_mutex);
+ mutex_unlock(&tn->lock);
kfree(info->priv);
}
static_key_slow_dec(&xt_tee_enabled);
@@ -156,6 +173,21 @@ static struct xt_target tee_tg_reg[] __read_mostly = {
#endif
};
+static int __net_init tee_net_init(struct net *net)
+{
+ struct tee_net *tn = net_generic(net, tee_net_id);
+
+ INIT_LIST_HEAD(&tn->priv_list);
+ mutex_init(&tn->lock);
+ return 0;
+}
+
+static struct pernet_operations tee_net_ops = {
+ .init = tee_net_init,
+ .id = &tee_net_id,
+ .size = sizeof(struct tee_net),
+};
+
static struct notifier_block tee_netdev_notifier = {
.notifier_call = tee_netdev_event,
};
@@ -164,22 +196,32 @@ static int __init tee_tg_init(void)
{
int ret;
- ret = xt_register_targets(tee_tg_reg, ARRAY_SIZE(tee_tg_reg));
- if (ret)
+ ret = register_pernet_subsys(&tee_net_ops);
+ if (ret < 0)
return ret;
+
+ ret = xt_register_targets(tee_tg_reg, ARRAY_SIZE(tee_tg_reg));
+ if (ret < 0)
+ goto cleanup_subsys;
+
ret = register_netdevice_notifier(&tee_netdev_notifier);
- if (ret) {
- xt_unregister_targets(tee_tg_reg, ARRAY_SIZE(tee_tg_reg));
- return ret;
- }
+ if (ret < 0)
+ goto unregister_targets;
return 0;
+
+unregister_targets:
+ xt_unregister_targets(tee_tg_reg, ARRAY_SIZE(tee_tg_reg));
+cleanup_subsys:
+ unregister_pernet_subsys(&tee_net_ops);
+ return ret;
}
static void __exit tee_tg_exit(void)
{
unregister_netdevice_notifier(&tee_netdev_notifier);
xt_unregister_targets(tee_tg_reg, ARRAY_SIZE(tee_tg_reg));
+ unregister_pernet_subsys(&tee_net_ops);
}
module_init(tee_tg_init);
diff --git a/net/netfilter/xt_cgroup.c b/net/netfilter/xt_cgroup.c
index 5d92e1781980..5cb1ecb29ea4 100644
--- a/net/netfilter/xt_cgroup.c
+++ b/net/netfilter/xt_cgroup.c
@@ -68,6 +68,38 @@ static int cgroup_mt_check_v1(const struct xt_mtchk_param *par)
return 0;
}
+static int cgroup_mt_check_v2(const struct xt_mtchk_param *par)
+{
+ struct xt_cgroup_info_v2 *info = par->matchinfo;
+ struct cgroup *cgrp;
+
+ if ((info->invert_path & ~1) || (info->invert_classid & ~1))
+ return -EINVAL;
+
+ if (!info->has_path && !info->has_classid) {
+ pr_info("xt_cgroup: no path or classid specified\n");
+ return -EINVAL;
+ }
+
+ if (info->has_path && info->has_classid) {
+ pr_info_ratelimited("path and classid specified\n");
+ return -EINVAL;
+ }
+
+ info->priv = NULL;
+ if (info->has_path) {
+ cgrp = cgroup_get_from_path(info->path);
+ if (IS_ERR(cgrp)) {
+ pr_info_ratelimited("invalid path, errno=%ld\n",
+ PTR_ERR(cgrp));
+ return -EINVAL;
+ }
+ info->priv = cgrp;
+ }
+
+ return 0;
+}
+
static bool
cgroup_mt_v0(const struct sk_buff *skb, struct xt_action_param *par)
{
@@ -99,6 +131,24 @@ static bool cgroup_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
info->invert_classid;
}
+static bool cgroup_mt_v2(const struct sk_buff *skb, struct xt_action_param *par)
+{
+ const struct xt_cgroup_info_v2 *info = par->matchinfo;
+ struct sock_cgroup_data *skcd = &skb->sk->sk_cgrp_data;
+ struct cgroup *ancestor = info->priv;
+ struct sock *sk = skb->sk;
+
+ if (!sk || !sk_fullsock(sk) || !net_eq(xt_net(par), sock_net(sk)))
+ return false;
+
+ if (ancestor)
+ return cgroup_is_descendant(sock_cgroup_ptr(skcd), ancestor) ^
+ info->invert_path;
+ else
+ return (info->classid == sock_cgroup_classid(skcd)) ^
+ info->invert_classid;
+}
+
static void cgroup_mt_destroy_v1(const struct xt_mtdtor_param *par)
{
struct xt_cgroup_info_v1 *info = par->matchinfo;
@@ -107,6 +157,14 @@ static void cgroup_mt_destroy_v1(const struct xt_mtdtor_param *par)
cgroup_put(info->priv);
}
+static void cgroup_mt_destroy_v2(const struct xt_mtdtor_param *par)
+{
+ struct xt_cgroup_info_v2 *info = par->matchinfo;
+
+ if (info->priv)
+ cgroup_put(info->priv);
+}
+
static struct xt_match cgroup_mt_reg[] __read_mostly = {
{
.name = "cgroup",
@@ -134,6 +192,20 @@ static struct xt_match cgroup_mt_reg[] __read_mostly = {
(1 << NF_INET_POST_ROUTING) |
(1 << NF_INET_LOCAL_IN),
},
+ {
+ .name = "cgroup",
+ .revision = 2,
+ .family = NFPROTO_UNSPEC,
+ .checkentry = cgroup_mt_check_v2,
+ .match = cgroup_mt_v2,
+ .matchsize = sizeof(struct xt_cgroup_info_v2),
+ .usersize = offsetof(struct xt_cgroup_info_v2, priv),
+ .destroy = cgroup_mt_destroy_v2,
+ .me = THIS_MODULE,
+ .hooks = (1 << NF_INET_LOCAL_OUT) |
+ (1 << NF_INET_POST_ROUTING) |
+ (1 << NF_INET_LOCAL_IN),
+ },
};
static int __init cgroup_mt_init(void)
diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c
index 9d6d67b953ac..05f00fb20b04 100644
--- a/net/netfilter/xt_physdev.c
+++ b/net/netfilter/xt_physdev.c
@@ -96,8 +96,7 @@ match_outdev:
static int physdev_mt_check(const struct xt_mtchk_param *par)
{
const struct xt_physdev_info *info = par->matchinfo;
-
- br_netfilter_enable();
+ static bool brnf_probed __read_mostly;
if (!(info->bitmask & XT_PHYSDEV_OP_MASK) ||
info->bitmask & ~XT_PHYSDEV_OP_MASK)
@@ -111,6 +110,12 @@ static int physdev_mt_check(const struct xt_mtchk_param *par)
if (par->hook_mask & (1 << NF_INET_LOCAL_OUT))
return -EINVAL;
}
+
+ if (!brnf_probed) {
+ brnf_probed = true;
+ request_module("br_netfilter");
+ }
+
return 0;
}
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
index ea7c67050792..ee3e5b6471a6 100644
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c
@@ -903,7 +903,8 @@ int netlbl_bitmap_walk(const unsigned char *bitmap, u32 bitmap_len,
(state == 0 && (byte & bitmask) == 0))
return bit_spot;
- bit_spot++;
+ if (++bit_spot >= bitmap_len)
+ return -1;
bitmask >>= 1;
if (bitmask == 0) {
byte = bitmap[++byte_offset];
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 25eeb6d2a75a..f0ec068e1d02 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -366,7 +366,7 @@ int genl_register_family(struct genl_family *family)
start, end + 1, GFP_KERNEL);
if (family->id < 0) {
err = family->id;
- goto errout_locked;
+ goto errout_free;
}
err = genl_validate_assign_mc_groups(family);
@@ -385,6 +385,7 @@ int genl_register_family(struct genl_family *family)
errout_remove:
idr_remove(&genl_fam_idr, family->id);
+errout_free:
kfree(family->attrbuf);
errout_locked:
genl_unlock_all();
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index 03f37c4e64fe..71ffd1a6dc7c 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -153,7 +153,7 @@ static struct sock *nr_find_listener(ax25_address *addr)
sk_for_each(s, &nr_list)
if (!ax25cmp(&nr_sk(s)->source_addr, addr) &&
s->sk_state == TCP_LISTEN) {
- bh_lock_sock(s);
+ sock_hold(s);
goto found;
}
s = NULL;
@@ -174,7 +174,7 @@ static struct sock *nr_find_socket(unsigned char index, unsigned char id)
struct nr_sock *nr = nr_sk(s);
if (nr->my_index == index && nr->my_id == id) {
- bh_lock_sock(s);
+ sock_hold(s);
goto found;
}
}
@@ -198,7 +198,7 @@ static struct sock *nr_find_peer(unsigned char index, unsigned char id,
if (nr->your_index == index && nr->your_id == id &&
!ax25cmp(&nr->dest_addr, dest)) {
- bh_lock_sock(s);
+ sock_hold(s);
goto found;
}
}
@@ -224,7 +224,7 @@ static unsigned short nr_find_next_circuit(void)
if (i != 0 && j != 0) {
if ((sk=nr_find_socket(i, j)) == NULL)
break;
- bh_unlock_sock(sk);
+ sock_put(sk);
}
id++;
@@ -920,6 +920,7 @@ int nr_rx_frame(struct sk_buff *skb, struct net_device *dev)
}
if (sk != NULL) {
+ bh_lock_sock(sk);
skb_reset_transport_header(skb);
if (frametype == NR_CONNACK && skb->len == 22)
@@ -929,6 +930,7 @@ int nr_rx_frame(struct sk_buff *skb, struct net_device *dev)
ret = nr_process_rx_frame(sk, skb);
bh_unlock_sock(sk);
+ sock_put(sk);
return ret;
}
@@ -960,10 +962,12 @@ int nr_rx_frame(struct sk_buff *skb, struct net_device *dev)
(make = nr_make_new(sk)) == NULL) {
nr_transmit_refusal(skb, 0);
if (sk)
- bh_unlock_sock(sk);
+ sock_put(sk);
return 0;
}
+ bh_lock_sock(sk);
+
window = skb->data[20];
skb->sk = make;
@@ -1016,6 +1020,7 @@ int nr_rx_frame(struct sk_buff *skb, struct net_device *dev)
sk->sk_data_ready(sk);
bh_unlock_sock(sk);
+ sock_put(sk);
nr_insert_socket(make);
@@ -1387,18 +1392,22 @@ static int __init nr_proto_init(void)
int i;
int rc = proto_register(&nr_proto, 0);
- if (rc != 0)
- goto out;
+ if (rc)
+ return rc;
if (nr_ndevs > 0x7fffffff/sizeof(struct net_device *)) {
- printk(KERN_ERR "NET/ROM: nr_proto_init - nr_ndevs parameter to large\n");
- return -1;
+ pr_err("NET/ROM: %s - nr_ndevs parameter too large\n",
+ __func__);
+ rc = -EINVAL;
+ goto unregister_proto;
}
dev_nr = kcalloc(nr_ndevs, sizeof(struct net_device *), GFP_KERNEL);
- if (dev_nr == NULL) {
- printk(KERN_ERR "NET/ROM: nr_proto_init - unable to allocate device array\n");
- return -1;
+ if (!dev_nr) {
+ pr_err("NET/ROM: %s - unable to allocate device array\n",
+ __func__);
+ rc = -ENOMEM;
+ goto unregister_proto;
}
for (i = 0; i < nr_ndevs; i++) {
@@ -1408,13 +1417,13 @@ static int __init nr_proto_init(void)
sprintf(name, "nr%d", i);
dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, nr_setup);
if (!dev) {
- printk(KERN_ERR "NET/ROM: nr_proto_init - unable to allocate device structure\n");
+ rc = -ENOMEM;
goto fail;
}
dev->base_addr = i;
- if (register_netdev(dev)) {
- printk(KERN_ERR "NET/ROM: nr_proto_init - unable to register network device\n");
+ rc = register_netdev(dev);
+ if (rc) {
free_netdev(dev);
goto fail;
}
@@ -1422,36 +1431,64 @@ static int __init nr_proto_init(void)
dev_nr[i] = dev;
}
- if (sock_register(&nr_family_ops)) {
- printk(KERN_ERR "NET/ROM: nr_proto_init - unable to register socket family\n");
+ rc = sock_register(&nr_family_ops);
+ if (rc)
goto fail;
- }
- register_netdevice_notifier(&nr_dev_notifier);
+ rc = register_netdevice_notifier(&nr_dev_notifier);
+ if (rc)
+ goto out_sock;
ax25_register_pid(&nr_pid);
ax25_linkfail_register(&nr_linkfail_notifier);
#ifdef CONFIG_SYSCTL
- nr_register_sysctl();
+ rc = nr_register_sysctl();
+ if (rc)
+ goto out_sysctl;
#endif
nr_loopback_init();
- proc_create_seq("nr", 0444, init_net.proc_net, &nr_info_seqops);
- proc_create_seq("nr_neigh", 0444, init_net.proc_net, &nr_neigh_seqops);
- proc_create_seq("nr_nodes", 0444, init_net.proc_net, &nr_node_seqops);
-out:
- return rc;
+ rc = -ENOMEM;
+ if (!proc_create_seq("nr", 0444, init_net.proc_net, &nr_info_seqops))
+ goto proc_remove1;
+ if (!proc_create_seq("nr_neigh", 0444, init_net.proc_net,
+ &nr_neigh_seqops))
+ goto proc_remove2;
+ if (!proc_create_seq("nr_nodes", 0444, init_net.proc_net,
+ &nr_node_seqops))
+ goto proc_remove3;
+
+ return 0;
+
+proc_remove3:
+ remove_proc_entry("nr_neigh", init_net.proc_net);
+proc_remove2:
+ remove_proc_entry("nr", init_net.proc_net);
+proc_remove1:
+
+ nr_loopback_clear();
+ nr_rt_free();
+
+#ifdef CONFIG_SYSCTL
+ nr_unregister_sysctl();
+out_sysctl:
+#endif
+ ax25_linkfail_release(&nr_linkfail_notifier);
+ ax25_protocol_release(AX25_P_NETROM);
+ unregister_netdevice_notifier(&nr_dev_notifier);
+out_sock:
+ sock_unregister(PF_NETROM);
fail:
while (--i >= 0) {
unregister_netdev(dev_nr[i]);
free_netdev(dev_nr[i]);
}
kfree(dev_nr);
+unregister_proto:
proto_unregister(&nr_proto);
- rc = -1;
- goto out;
+ return rc;
}
module_init(nr_proto_init);
diff --git a/net/netrom/nr_loopback.c b/net/netrom/nr_loopback.c
index 215ad22a9647..93d13f019981 100644
--- a/net/netrom/nr_loopback.c
+++ b/net/netrom/nr_loopback.c
@@ -70,7 +70,7 @@ static void nr_loopback_timer(struct timer_list *unused)
}
}
-void __exit nr_loopback_clear(void)
+void nr_loopback_clear(void)
{
del_timer_sync(&loopback_timer);
skb_queue_purge(&loopback_queue);
diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c
index 6485f593e2f0..b76aa668a94b 100644
--- a/net/netrom/nr_route.c
+++ b/net/netrom/nr_route.c
@@ -953,7 +953,7 @@ const struct seq_operations nr_neigh_seqops = {
/*
* Free all memory associated with the nodes and routes lists.
*/
-void __exit nr_rt_free(void)
+void nr_rt_free(void)
{
struct nr_neigh *s = NULL;
struct nr_node *t = NULL;
diff --git a/net/netrom/nr_timer.c b/net/netrom/nr_timer.c
index cbd51ed5a2d7..908e53ab47a4 100644
--- a/net/netrom/nr_timer.c
+++ b/net/netrom/nr_timer.c
@@ -52,21 +52,21 @@ void nr_start_t1timer(struct sock *sk)
{
struct nr_sock *nr = nr_sk(sk);
- mod_timer(&nr->t1timer, jiffies + nr->t1);
+ sk_reset_timer(sk, &nr->t1timer, jiffies + nr->t1);
}
void nr_start_t2timer(struct sock *sk)
{
struct nr_sock *nr = nr_sk(sk);
- mod_timer(&nr->t2timer, jiffies + nr->t2);
+ sk_reset_timer(sk, &nr->t2timer, jiffies + nr->t2);
}
void nr_start_t4timer(struct sock *sk)
{
struct nr_sock *nr = nr_sk(sk);
- mod_timer(&nr->t4timer, jiffies + nr->t4);
+ sk_reset_timer(sk, &nr->t4timer, jiffies + nr->t4);
}
void nr_start_idletimer(struct sock *sk)
@@ -74,37 +74,37 @@ void nr_start_idletimer(struct sock *sk)
struct nr_sock *nr = nr_sk(sk);
if (nr->idle > 0)
- mod_timer(&nr->idletimer, jiffies + nr->idle);
+ sk_reset_timer(sk, &nr->idletimer, jiffies + nr->idle);
}
void nr_start_heartbeat(struct sock *sk)
{
- mod_timer(&sk->sk_timer, jiffies + 5 * HZ);
+ sk_reset_timer(sk, &sk->sk_timer, jiffies + 5 * HZ);
}
void nr_stop_t1timer(struct sock *sk)
{
- del_timer(&nr_sk(sk)->t1timer);
+ sk_stop_timer(sk, &nr_sk(sk)->t1timer);
}
void nr_stop_t2timer(struct sock *sk)
{
- del_timer(&nr_sk(sk)->t2timer);
+ sk_stop_timer(sk, &nr_sk(sk)->t2timer);
}
void nr_stop_t4timer(struct sock *sk)
{
- del_timer(&nr_sk(sk)->t4timer);
+ sk_stop_timer(sk, &nr_sk(sk)->t4timer);
}
void nr_stop_idletimer(struct sock *sk)
{
- del_timer(&nr_sk(sk)->idletimer);
+ sk_stop_timer(sk, &nr_sk(sk)->idletimer);
}
void nr_stop_heartbeat(struct sock *sk)
{
- del_timer(&sk->sk_timer);
+ sk_stop_timer(sk, &sk->sk_timer);
}
int nr_t1timer_running(struct sock *sk)
diff --git a/net/netrom/sysctl_net_netrom.c b/net/netrom/sysctl_net_netrom.c
index ba1c368b3f18..771011b84270 100644
--- a/net/netrom/sysctl_net_netrom.c
+++ b/net/netrom/sysctl_net_netrom.c
@@ -146,9 +146,12 @@ static struct ctl_table nr_table[] = {
{ }
};
-void __init nr_register_sysctl(void)
+int __init nr_register_sysctl(void)
{
nr_table_header = register_net_sysctl(&init_net, "net/netrom", nr_table);
+ if (!nr_table_header)
+ return -ENOMEM;
+ return 0;
}
void nr_unregister_sysctl(void)
diff --git a/net/nfc/llcp_commands.c b/net/nfc/llcp_commands.c
index 6a196e438b6c..d1fc019e932e 100644
--- a/net/nfc/llcp_commands.c
+++ b/net/nfc/llcp_commands.c
@@ -419,6 +419,10 @@ int nfc_llcp_send_connect(struct nfc_llcp_sock *sock)
sock->service_name,
sock->service_name_len,
&service_name_tlv_length);
+ if (!service_name_tlv) {
+ err = -ENOMEM;
+ goto error_tlv;
+ }
size += service_name_tlv_length;
}
@@ -429,9 +433,17 @@ int nfc_llcp_send_connect(struct nfc_llcp_sock *sock)
miux_tlv = nfc_llcp_build_tlv(LLCP_TLV_MIUX, (u8 *)&miux, 0,
&miux_tlv_length);
+ if (!miux_tlv) {
+ err = -ENOMEM;
+ goto error_tlv;
+ }
size += miux_tlv_length;
rw_tlv = nfc_llcp_build_tlv(LLCP_TLV_RW, &rw, 0, &rw_tlv_length);
+ if (!rw_tlv) {
+ err = -ENOMEM;
+ goto error_tlv;
+ }
size += rw_tlv_length;
pr_debug("SKB size %d SN length %zu\n", size, sock->service_name_len);
@@ -484,9 +496,17 @@ int nfc_llcp_send_cc(struct nfc_llcp_sock *sock)
miux_tlv = nfc_llcp_build_tlv(LLCP_TLV_MIUX, (u8 *)&miux, 0,
&miux_tlv_length);
+ if (!miux_tlv) {
+ err = -ENOMEM;
+ goto error_tlv;
+ }
size += miux_tlv_length;
rw_tlv = nfc_llcp_build_tlv(LLCP_TLV_RW, &rw, 0, &rw_tlv_length);
+ if (!rw_tlv) {
+ err = -ENOMEM;
+ goto error_tlv;
+ }
size += rw_tlv_length;
skb = llcp_allocate_pdu(sock, LLCP_PDU_CC, size);
diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c
index ef4026a23e80..4fa015208aab 100644
--- a/net/nfc/llcp_core.c
+++ b/net/nfc/llcp_core.c
@@ -532,10 +532,10 @@ static u8 nfc_llcp_reserve_sdp_ssap(struct nfc_llcp_local *local)
static int nfc_llcp_build_gb(struct nfc_llcp_local *local)
{
- u8 *gb_cur, *version_tlv, version, version_length;
- u8 *lto_tlv, lto_length;
- u8 *wks_tlv, wks_length;
- u8 *miux_tlv, miux_length;
+ u8 *gb_cur, version, version_length;
+ u8 lto_length, wks_length, miux_length;
+ u8 *version_tlv = NULL, *lto_tlv = NULL,
+ *wks_tlv = NULL, *miux_tlv = NULL;
__be16 wks = cpu_to_be16(local->local_wks);
u8 gb_len = 0;
int ret = 0;
@@ -543,17 +543,33 @@ static int nfc_llcp_build_gb(struct nfc_llcp_local *local)
version = LLCP_VERSION_11;
version_tlv = nfc_llcp_build_tlv(LLCP_TLV_VERSION, &version,
1, &version_length);
+ if (!version_tlv) {
+ ret = -ENOMEM;
+ goto out;
+ }
gb_len += version_length;
lto_tlv = nfc_llcp_build_tlv(LLCP_TLV_LTO, &local->lto, 1, &lto_length);
+ if (!lto_tlv) {
+ ret = -ENOMEM;
+ goto out;
+ }
gb_len += lto_length;
pr_debug("Local wks 0x%lx\n", local->local_wks);
wks_tlv = nfc_llcp_build_tlv(LLCP_TLV_WKS, (u8 *)&wks, 2, &wks_length);
+ if (!wks_tlv) {
+ ret = -ENOMEM;
+ goto out;
+ }
gb_len += wks_length;
miux_tlv = nfc_llcp_build_tlv(LLCP_TLV_MIUX, (u8 *)&local->miux, 0,
&miux_length);
+ if (!miux_tlv) {
+ ret = -ENOMEM;
+ goto out;
+ }
gb_len += miux_length;
gb_len += ARRAY_SIZE(llcp_magic);
diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c
index ddfc52ac1f9b..c0d323b58e73 100644
--- a/net/nfc/nci/hci.c
+++ b/net/nfc/nci/hci.c
@@ -312,6 +312,10 @@ static void nci_hci_cmd_received(struct nci_dev *ndev, u8 pipe,
create_info = (struct nci_hci_create_pipe_resp *)skb->data;
dest_gate = create_info->dest_gate;
new_pipe = create_info->pipe;
+ if (new_pipe >= NCI_HCI_MAX_PIPES) {
+ status = NCI_HCI_ANY_E_NOK;
+ goto exit;
+ }
/* Save the new created pipe and bind with local gate,
* the description for skb->data[3] is destination gate id
@@ -336,6 +340,10 @@ static void nci_hci_cmd_received(struct nci_dev *ndev, u8 pipe,
goto exit;
}
delete_info = (struct nci_hci_delete_pipe_noti *)skb->data;
+ if (delete_info->pipe >= NCI_HCI_MAX_PIPES) {
+ status = NCI_HCI_ANY_E_NOK;
+ goto exit;
+ }
ndev->hci_dev->pipes[delete_info->pipe].gate =
NCI_HCI_INVALID_GATE;
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 865ecef68196..eab5e8eaddaa 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -500,7 +500,7 @@ static int __parse_flow_nlattrs(const struct nlattr *attr,
return -EINVAL;
}
- if (!nz || !is_all_zero(nla_data(nla), expected_len)) {
+ if (!nz || !is_all_zero(nla_data(nla), nla_len(nla))) {
attrs |= 1 << type;
a[type] = nla;
}
@@ -2306,14 +2306,14 @@ static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa,
struct sw_flow_actions *acts;
int new_acts_size;
- int req_size = NLA_ALIGN(attr_len);
+ size_t req_size = NLA_ALIGN(attr_len);
int next_offset = offsetof(struct sw_flow_actions, actions) +
(*sfa)->actions_len;
if (req_size <= (ksize(*sfa) - next_offset))
goto out;
- new_acts_size = ksize(*sfa) * 2;
+ new_acts_size = max(next_offset + req_size, ksize(*sfa) * 2);
if (new_acts_size > MAX_ACTIONS_BUFSIZE) {
if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size) {
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 0c6e031a38b6..e99b69846aeb 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2604,8 +2604,8 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
void *ph;
DECLARE_SOCKADDR(struct sockaddr_ll *, saddr, msg->msg_name);
bool need_wait = !(msg->msg_flags & MSG_DONTWAIT);
+ unsigned char *addr = NULL;
int tp_len, size_max;
- unsigned char *addr;
void *data;
int len_sum = 0;
int status = TP_STATUS_AVAILABLE;
@@ -2616,7 +2616,6 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
if (likely(saddr == NULL)) {
dev = packet_cached_dev_get(po);
proto = po->num;
- addr = NULL;
} else {
err = -EINVAL;
if (msg->msg_namelen < sizeof(struct sockaddr_ll))
@@ -2626,8 +2625,13 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
sll_addr)))
goto out;
proto = saddr->sll_protocol;
- addr = saddr->sll_addr;
dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex);
+ if (po->sk.sk_socket->type == SOCK_DGRAM) {
+ if (dev && msg->msg_namelen < dev->addr_len +
+ offsetof(struct sockaddr_ll, sll_addr))
+ goto out_put;
+ addr = saddr->sll_addr;
+ }
}
err = -ENXIO;
@@ -2799,7 +2803,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
struct sk_buff *skb;
struct net_device *dev;
__be16 proto;
- unsigned char *addr;
+ unsigned char *addr = NULL;
int err, reserve = 0;
struct sockcm_cookie sockc;
struct virtio_net_hdr vnet_hdr = { 0 };
@@ -2816,7 +2820,6 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
if (likely(saddr == NULL)) {
dev = packet_cached_dev_get(po);
proto = po->num;
- addr = NULL;
} else {
err = -EINVAL;
if (msg->msg_namelen < sizeof(struct sockaddr_ll))
@@ -2824,8 +2827,13 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr)))
goto out;
proto = saddr->sll_protocol;
- addr = saddr->sll_addr;
dev = dev_get_by_index(sock_net(sk), saddr->sll_ifindex);
+ if (sock->type == SOCK_DGRAM) {
+ if (dev && msg->msg_namelen < dev->addr_len +
+ offsetof(struct sockaddr_ll, sll_addr))
+ goto out_unlock;
+ addr = saddr->sll_addr;
+ }
}
err = -ENXIO;
@@ -2884,7 +2892,8 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
goto out_free;
} else if (reserve) {
skb_reserve(skb, -reserve);
- if (len < reserve)
+ if (len < reserve + sizeof(struct ipv6hdr) &&
+ dev->min_header_len != dev->hard_header_len)
skb_reset_network_header(skb);
}
@@ -3009,8 +3018,8 @@ static int packet_release(struct socket *sock)
synchronize_net();
+ kfree(po->rollover);
if (f) {
- kfree(po->rollover);
fanout_release_data(f);
kfree(f);
}
@@ -3241,7 +3250,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol,
}
mutex_lock(&net->packet.sklist_lock);
- sk_add_node_rcu(sk, &net->packet.sklist);
+ sk_add_node_tail_rcu(sk, &net->packet.sklist);
mutex_unlock(&net->packet.sklist_lock);
preempt_disable();
@@ -4190,7 +4199,7 @@ static struct pgv *alloc_pg_vec(struct tpacket_req *req, int order)
struct pgv *pg_vec;
int i;
- pg_vec = kcalloc(block_nr, sizeof(struct pgv), GFP_KERNEL);
+ pg_vec = kcalloc(block_nr, sizeof(struct pgv), GFP_KERNEL | __GFP_NOWARN);
if (unlikely(!pg_vec))
goto out;
@@ -4271,7 +4280,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
rb->frames_per_block = req->tp_block_size / req->tp_frame_size;
if (unlikely(rb->frames_per_block == 0))
goto out;
- if (unlikely(req->tp_block_size > UINT_MAX / req->tp_block_nr))
+ if (unlikely(rb->frames_per_block > UINT_MAX / req->tp_block_nr))
goto out;
if (unlikely((rb->frames_per_block * req->tp_block_nr) !=
req->tp_frame_nr))
@@ -4570,14 +4579,29 @@ static void __exit packet_exit(void)
static int __init packet_init(void)
{
- int rc = proto_register(&packet_proto, 0);
+ int rc;
- if (rc != 0)
+ rc = proto_register(&packet_proto, 0);
+ if (rc)
goto out;
+ rc = sock_register(&packet_family_ops);
+ if (rc)
+ goto out_proto;
+ rc = register_pernet_subsys(&packet_net_ops);
+ if (rc)
+ goto out_sock;
+ rc = register_netdevice_notifier(&packet_netdev_notifier);
+ if (rc)
+ goto out_pernet;
+
+ return 0;
- sock_register(&packet_family_ops);
- register_pernet_subsys(&packet_net_ops);
- register_netdevice_notifier(&packet_netdev_notifier);
+out_pernet:
+ unregister_pernet_subsys(&packet_net_ops);
+out_sock:
+ sock_unregister(PF_PACKET);
+out_proto:
+ proto_unregister(&packet_proto);
out:
return rc;
}
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index 9fc76b19cd3c..db3473540303 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -132,7 +132,7 @@ static int pep_indicate(struct sock *sk, u8 id, u8 code,
ph->utid = 0;
ph->message_id = id;
ph->pipe_handle = pn->pipe_handle;
- ph->data[0] = code;
+ ph->error_code = code;
return pn_skb_send(sk, skb, NULL);
}
@@ -153,7 +153,7 @@ static int pipe_handler_request(struct sock *sk, u8 id, u8 code,
ph->utid = id; /* whatever */
ph->message_id = id;
ph->pipe_handle = pn->pipe_handle;
- ph->data[0] = code;
+ ph->error_code = code;
return pn_skb_send(sk, skb, NULL);
}
@@ -208,7 +208,7 @@ static int pep_ctrlreq_error(struct sock *sk, struct sk_buff *oskb, u8 code,
struct pnpipehdr *ph;
struct sockaddr_pn dst;
u8 data[4] = {
- oph->data[0], /* PEP type */
+ oph->pep_type, /* PEP type */
code, /* error code, at an unusual offset */
PAD, PAD,
};
@@ -221,7 +221,7 @@ static int pep_ctrlreq_error(struct sock *sk, struct sk_buff *oskb, u8 code,
ph->utid = oph->utid;
ph->message_id = PNS_PEP_CTRL_RESP;
ph->pipe_handle = oph->pipe_handle;
- ph->data[0] = oph->data[1]; /* CTRL id */
+ ph->data0 = oph->data[0]; /* CTRL id */
pn_skb_get_src_sockaddr(oskb, &dst);
return pn_skb_send(sk, skb, &dst);
@@ -272,17 +272,17 @@ static int pipe_rcv_status(struct sock *sk, struct sk_buff *skb)
return -EINVAL;
hdr = pnp_hdr(skb);
- if (hdr->data[0] != PN_PEP_TYPE_COMMON) {
+ if (hdr->pep_type != PN_PEP_TYPE_COMMON) {
net_dbg_ratelimited("Phonet unknown PEP type: %u\n",
- (unsigned int)hdr->data[0]);
+ (unsigned int)hdr->pep_type);
return -EOPNOTSUPP;
}
- switch (hdr->data[1]) {
+ switch (hdr->data[0]) {
case PN_PEP_IND_FLOW_CONTROL:
switch (pn->tx_fc) {
case PN_LEGACY_FLOW_CONTROL:
- switch (hdr->data[4]) {
+ switch (hdr->data[3]) {
case PEP_IND_BUSY:
atomic_set(&pn->tx_credits, 0);
break;
@@ -292,7 +292,7 @@ static int pipe_rcv_status(struct sock *sk, struct sk_buff *skb)
}
break;
case PN_ONE_CREDIT_FLOW_CONTROL:
- if (hdr->data[4] == PEP_IND_READY)
+ if (hdr->data[3] == PEP_IND_READY)
atomic_set(&pn->tx_credits, wake = 1);
break;
}
@@ -301,12 +301,12 @@ static int pipe_rcv_status(struct sock *sk, struct sk_buff *skb)
case PN_PEP_IND_ID_MCFC_GRANT_CREDITS:
if (pn->tx_fc != PN_MULTI_CREDIT_FLOW_CONTROL)
break;
- atomic_add(wake = hdr->data[4], &pn->tx_credits);
+ atomic_add(wake = hdr->data[3], &pn->tx_credits);
break;
default:
net_dbg_ratelimited("Phonet unknown PEP indication: %u\n",
- (unsigned int)hdr->data[1]);
+ (unsigned int)hdr->data[0]);
return -EOPNOTSUPP;
}
if (wake)
@@ -318,7 +318,7 @@ static int pipe_rcv_created(struct sock *sk, struct sk_buff *skb)
{
struct pep_sock *pn = pep_sk(sk);
struct pnpipehdr *hdr = pnp_hdr(skb);
- u8 n_sb = hdr->data[0];
+ u8 n_sb = hdr->data0;
pn->rx_fc = pn->tx_fc = PN_LEGACY_FLOW_CONTROL;
__skb_pull(skb, sizeof(*hdr));
@@ -506,7 +506,7 @@ static int pep_connresp_rcv(struct sock *sk, struct sk_buff *skb)
return -ECONNREFUSED;
/* Parse sub-blocks */
- n_sb = hdr->data[4];
+ n_sb = hdr->data[3];
while (n_sb > 0) {
u8 type, buf[6], len = sizeof(buf);
const u8 *data = pep_get_sb(skb, &type, &len, buf);
@@ -739,7 +739,7 @@ static int pipe_do_remove(struct sock *sk)
ph->utid = 0;
ph->message_id = PNS_PIPE_REMOVE_REQ;
ph->pipe_handle = pn->pipe_handle;
- ph->data[0] = PAD;
+ ph->data0 = PAD;
return pn_skb_send(sk, skb, NULL);
}
@@ -817,7 +817,7 @@ static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp,
peer_type = hdr->other_pep_type << 8;
/* Parse sub-blocks (options) */
- n_sb = hdr->data[4];
+ n_sb = hdr->data[3];
while (n_sb > 0) {
u8 type, buf[1], len = sizeof(buf);
const u8 *data = pep_get_sb(skb, &type, &len, buf);
@@ -1109,7 +1109,7 @@ static int pipe_skb_send(struct sock *sk, struct sk_buff *skb)
ph->utid = 0;
if (pn->aligned) {
ph->message_id = PNS_PIPE_ALIGNED_DATA;
- ph->data[0] = 0; /* padding */
+ ph->data0 = 0; /* padding */
} else
ph->message_id = PNS_PIPE_DATA;
ph->pipe_handle = pn->pipe_handle;
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index 65387e1e6964..cd7e01ea8144 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -506,6 +506,9 @@ static int rds_connect(struct socket *sock, struct sockaddr *uaddr,
struct rds_sock *rs = rds_sk_to_rs(sk);
int ret = 0;
+ if (addr_len < offsetofend(struct sockaddr, sa_family))
+ return -EINVAL;
+
lock_sock(sk);
switch (uaddr->sa_family) {
diff --git a/net/rds/bind.c b/net/rds/bind.c
index 762d2c6788a3..0f4398e7f2a7 100644
--- a/net/rds/bind.c
+++ b/net/rds/bind.c
@@ -78,10 +78,10 @@ struct rds_sock *rds_find_bound(const struct in6_addr *addr, __be16 port,
__rds_create_bind_key(key, addr, port, scope_id);
rcu_read_lock();
rs = rhashtable_lookup(&bind_hash_table, key, ht_parms);
- if (rs && !sock_flag(rds_rs_to_sk(rs), SOCK_DEAD))
- rds_sock_addref(rs);
- else
+ if (rs && (sock_flag(rds_rs_to_sk(rs), SOCK_DEAD) ||
+ !refcount_inc_not_zero(&rds_rs_to_sk(rs)->sk_refcnt)))
rs = NULL;
+
rcu_read_unlock();
rdsdebug("returning rs %p for %pI6c:%u\n", rs, addr,
@@ -173,6 +173,8 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
/* We allow an RDS socket to be bound to either IPv4 or IPv6
* address.
*/
+ if (addr_len < offsetofend(struct sockaddr, sa_family))
+ return -EINVAL;
if (uaddr->sa_family == AF_INET) {
struct sockaddr_in *sin = (struct sockaddr_in *)uaddr;
diff --git a/net/rds/ib_fmr.c b/net/rds/ib_fmr.c
index e0f70c4051b6..01e764f8f224 100644
--- a/net/rds/ib_fmr.c
+++ b/net/rds/ib_fmr.c
@@ -44,6 +44,17 @@ struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev, int npages)
else
pool = rds_ibdev->mr_1m_pool;
+ if (atomic_read(&pool->dirty_count) >= pool->max_items / 10)
+ queue_delayed_work(rds_ib_mr_wq, &pool->flush_worker, 10);
+
+ /* Switch pools if one of the pool is reaching upper limit */
+ if (atomic_read(&pool->dirty_count) >= pool->max_items * 9 / 10) {
+ if (pool->pool_type == RDS_IB_MR_8K_POOL)
+ pool = rds_ibdev->mr_1m_pool;
+ else
+ pool = rds_ibdev->mr_8k_pool;
+ }
+
ibmr = rds_ib_try_reuse_ibmr(pool);
if (ibmr)
return ibmr;
diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
index 671f8ad38864..f395f06031bc 100644
--- a/net/rds/ib_rdma.c
+++ b/net/rds/ib_rdma.c
@@ -429,12 +429,14 @@ int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool,
wait_clean_list_grace();
list_to_llist_nodes(pool, &unmap_list, &clean_nodes, &clean_tail);
- if (ibmr_ret)
+ if (ibmr_ret) {
*ibmr_ret = llist_entry(clean_nodes, struct rds_ib_mr, llnode);
-
+ clean_nodes = clean_nodes->next;
+ }
/* more than one entry in llist nodes */
- if (clean_nodes->next)
- llist_add_batch(clean_nodes->next, clean_tail, &pool->clean_list);
+ if (clean_nodes)
+ llist_add_batch(clean_nodes, clean_tail,
+ &pool->clean_list);
}
@@ -455,9 +457,6 @@ struct rds_ib_mr *rds_ib_try_reuse_ibmr(struct rds_ib_mr_pool *pool)
struct rds_ib_mr *ibmr = NULL;
int iter = 0;
- if (atomic_read(&pool->dirty_count) >= pool->max_items_soft / 10)
- queue_delayed_work(rds_ib_mr_wq, &pool->flush_worker, 10);
-
while (1) {
ibmr = rds_ib_reuse_mr(pool);
if (ibmr)
diff --git a/net/rds/rdma.c b/net/rds/rdma.c
index 98237feb607a..e1965d9cbcf8 100644
--- a/net/rds/rdma.c
+++ b/net/rds/rdma.c
@@ -517,9 +517,10 @@ static int rds_rdma_pages(struct rds_iovec iov[], int nr_iovecs)
return tot_pages;
}
-int rds_rdma_extra_size(struct rds_rdma_args *args)
+int rds_rdma_extra_size(struct rds_rdma_args *args,
+ struct rds_iov_vector *iov)
{
- struct rds_iovec vec;
+ struct rds_iovec *vec;
struct rds_iovec __user *local_vec;
int tot_pages = 0;
unsigned int nr_pages;
@@ -530,13 +531,23 @@ int rds_rdma_extra_size(struct rds_rdma_args *args)
if (args->nr_local == 0)
return -EINVAL;
+ iov->iov = kcalloc(args->nr_local,
+ sizeof(struct rds_iovec),
+ GFP_KERNEL);
+ if (!iov->iov)
+ return -ENOMEM;
+
+ vec = &iov->iov[0];
+
+ if (copy_from_user(vec, local_vec, args->nr_local *
+ sizeof(struct rds_iovec)))
+ return -EFAULT;
+ iov->len = args->nr_local;
+
/* figure out the number of pages in the vector */
- for (i = 0; i < args->nr_local; i++) {
- if (copy_from_user(&vec, &local_vec[i],
- sizeof(struct rds_iovec)))
- return -EFAULT;
+ for (i = 0; i < args->nr_local; i++, vec++) {
- nr_pages = rds_pages_in_vec(&vec);
+ nr_pages = rds_pages_in_vec(vec);
if (nr_pages == 0)
return -EINVAL;
@@ -558,15 +569,15 @@ int rds_rdma_extra_size(struct rds_rdma_args *args)
* Extract all arguments and set up the rdma_op
*/
int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
- struct cmsghdr *cmsg)
+ struct cmsghdr *cmsg,
+ struct rds_iov_vector *vec)
{
struct rds_rdma_args *args;
struct rm_rdma_op *op = &rm->rdma;
int nr_pages;
unsigned int nr_bytes;
struct page **pages = NULL;
- struct rds_iovec iovstack[UIO_FASTIOV], *iovs = iovstack;
- int iov_size;
+ struct rds_iovec *iovs;
unsigned int i, j;
int ret = 0;
@@ -586,31 +597,23 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
goto out_ret;
}
- /* Check whether to allocate the iovec area */
- iov_size = args->nr_local * sizeof(struct rds_iovec);
- if (args->nr_local > UIO_FASTIOV) {
- iovs = sock_kmalloc(rds_rs_to_sk(rs), iov_size, GFP_KERNEL);
- if (!iovs) {
- ret = -ENOMEM;
- goto out_ret;
- }
+ if (vec->len != args->nr_local) {
+ ret = -EINVAL;
+ goto out_ret;
}
- if (copy_from_user(iovs, (struct rds_iovec __user *)(unsigned long) args->local_vec_addr, iov_size)) {
- ret = -EFAULT;
- goto out;
- }
+ iovs = vec->iov;
nr_pages = rds_rdma_pages(iovs, args->nr_local);
if (nr_pages < 0) {
ret = -EINVAL;
- goto out;
+ goto out_ret;
}
pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
if (!pages) {
ret = -ENOMEM;
- goto out;
+ goto out_ret;
}
op->op_write = !!(args->flags & RDS_RDMA_READWRITE);
@@ -623,7 +626,7 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
op->op_sg = rds_message_alloc_sgs(rm, nr_pages);
if (!op->op_sg) {
ret = -ENOMEM;
- goto out;
+ goto out_pages;
}
if (op->op_notify || op->op_recverr) {
@@ -635,7 +638,7 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
op->op_notifier = kmalloc(sizeof(struct rds_notifier), GFP_KERNEL);
if (!op->op_notifier) {
ret = -ENOMEM;
- goto out;
+ goto out_pages;
}
op->op_notifier->n_user_token = args->user_token;
op->op_notifier->n_status = RDS_RDMA_SUCCESS;
@@ -681,7 +684,7 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
*/
ret = rds_pin_pages(iov->addr, nr, pages, !op->op_write);
if (ret < 0)
- goto out;
+ goto out_pages;
else
ret = 0;
@@ -714,13 +717,11 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
nr_bytes,
(unsigned int) args->remote_vec.bytes);
ret = -EINVAL;
- goto out;
+ goto out_pages;
}
op->op_bytes = nr_bytes;
-out:
- if (iovs != iovstack)
- sock_kfree_s(rds_rs_to_sk(rs), iovs, iov_size);
+out_pages:
kfree(pages);
out_ret:
if (ret)
diff --git a/net/rds/rds.h b/net/rds/rds.h
index c4dcf654d8fe..4234ab81b5af 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -386,6 +386,18 @@ static inline void rds_message_zcopy_queue_init(struct rds_msg_zcopy_queue *q)
INIT_LIST_HEAD(&q->zcookie_head);
}
+struct rds_iov_vector {
+ struct rds_iovec *iov;
+ int len;
+};
+
+struct rds_iov_vector_arr {
+ struct rds_iov_vector *vec;
+ int len;
+ int indx;
+ int incr;
+};
+
struct rds_message {
refcount_t m_refcount;
struct list_head m_sock_item;
@@ -904,13 +916,13 @@ int rds_get_mr(struct rds_sock *rs, char __user *optval, int optlen);
int rds_get_mr_for_dest(struct rds_sock *rs, char __user *optval, int optlen);
int rds_free_mr(struct rds_sock *rs, char __user *optval, int optlen);
void rds_rdma_drop_keys(struct rds_sock *rs);
-int rds_rdma_extra_size(struct rds_rdma_args *args);
-int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
- struct cmsghdr *cmsg);
+int rds_rdma_extra_size(struct rds_rdma_args *args,
+ struct rds_iov_vector *iov);
int rds_cmsg_rdma_dest(struct rds_sock *rs, struct rds_message *rm,
struct cmsghdr *cmsg);
int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
- struct cmsghdr *cmsg);
+ struct cmsghdr *cmsg,
+ struct rds_iov_vector *vec);
int rds_cmsg_rdma_map(struct rds_sock *rs, struct rds_message *rm,
struct cmsghdr *cmsg);
void rds_rdma_free_op(struct rm_rdma_op *ro);
diff --git a/net/rds/send.c b/net/rds/send.c
index fe785ee819dd..ec2267cbf85f 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -876,13 +876,15 @@ out:
* rds_message is getting to be quite complicated, and we'd like to allocate
* it all in one go. This figures out how big it needs to be up front.
*/
-static int rds_rm_size(struct msghdr *msg, int num_sgs)
+static int rds_rm_size(struct msghdr *msg, int num_sgs,
+ struct rds_iov_vector_arr *vct)
{
struct cmsghdr *cmsg;
int size = 0;
int cmsg_groups = 0;
int retval;
bool zcopy_cookie = false;
+ struct rds_iov_vector *iov, *tmp_iov;
for_each_cmsghdr(cmsg, msg) {
if (!CMSG_OK(msg, cmsg))
@@ -893,8 +895,24 @@ static int rds_rm_size(struct msghdr *msg, int num_sgs)
switch (cmsg->cmsg_type) {
case RDS_CMSG_RDMA_ARGS:
+ if (vct->indx >= vct->len) {
+ vct->len += vct->incr;
+ tmp_iov =
+ krealloc(vct->vec,
+ vct->len *
+ sizeof(struct rds_iov_vector),
+ GFP_KERNEL);
+ if (!tmp_iov) {
+ vct->len -= vct->incr;
+ return -ENOMEM;
+ }
+ vct->vec = tmp_iov;
+ }
+ iov = &vct->vec[vct->indx];
+ memset(iov, 0, sizeof(struct rds_iov_vector));
+ vct->indx++;
cmsg_groups |= 1;
- retval = rds_rdma_extra_size(CMSG_DATA(cmsg));
+ retval = rds_rdma_extra_size(CMSG_DATA(cmsg), iov);
if (retval < 0)
return retval;
size += retval;
@@ -951,10 +969,11 @@ static int rds_cmsg_zcopy(struct rds_sock *rs, struct rds_message *rm,
}
static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm,
- struct msghdr *msg, int *allocated_mr)
+ struct msghdr *msg, int *allocated_mr,
+ struct rds_iov_vector_arr *vct)
{
struct cmsghdr *cmsg;
- int ret = 0;
+ int ret = 0, ind = 0;
for_each_cmsghdr(cmsg, msg) {
if (!CMSG_OK(msg, cmsg))
@@ -968,7 +987,10 @@ static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm,
*/
switch (cmsg->cmsg_type) {
case RDS_CMSG_RDMA_ARGS:
- ret = rds_cmsg_rdma_args(rs, rm, cmsg);
+ if (ind >= vct->indx)
+ return -ENOMEM;
+ ret = rds_cmsg_rdma_args(rs, rm, cmsg, &vct->vec[ind]);
+ ind++;
break;
case RDS_CMSG_RDMA_DEST:
@@ -1084,6 +1106,11 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
sock_flag(rds_rs_to_sk(rs), SOCK_ZEROCOPY));
int num_sgs = ceil(payload_len, PAGE_SIZE);
int namelen;
+ struct rds_iov_vector_arr vct = {0};
+ int ind;
+
+ /* expect 1 RDMA CMSG per rds_sendmsg. can still grow if more needed. */
+ vct.incr = 1;
/* Mirror Linux UDP mirror of BSD error message compatibility */
/* XXX: Perhaps MSG_MORE someday */
@@ -1220,7 +1247,7 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
num_sgs = iov_iter_npages(&msg->msg_iter, INT_MAX);
}
/* size of rm including all sgs */
- ret = rds_rm_size(msg, num_sgs);
+ ret = rds_rm_size(msg, num_sgs, &vct);
if (ret < 0)
goto out;
@@ -1270,7 +1297,7 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
rm->m_conn_path = cpath;
/* Parse any control messages the user may have included. */
- ret = rds_cmsg_send(rs, rm, msg, &allocated_mr);
+ ret = rds_cmsg_send(rs, rm, msg, &allocated_mr, &vct);
if (ret) {
/* Trigger connection so that its ready for the next retry */
if (ret == -EAGAIN)
@@ -1348,9 +1375,18 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
if (ret)
goto out;
rds_message_put(rm);
+
+ for (ind = 0; ind < vct.indx; ind++)
+ kfree(vct.vec[ind].iov);
+ kfree(vct.vec);
+
return payload_len;
out:
+ for (ind = 0; ind < vct.indx; ind++)
+ kfree(vct.vec[ind].iov);
+ kfree(vct.vec);
+
/* If the user included a RDMA_MAP cmsg, we allocated a MR on the fly.
* If the sendmsg goes through, we keep the MR. If it fails with EAGAIN
* or in any other way, we need to destroy the MR again */
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index b9bbcf3d6c63..18bb522df282 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -600,7 +600,7 @@ static void rds_tcp_kill_sock(struct net *net)
list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net);
- if (net != c_net || !tc->t_sock)
+ if (net != c_net)
continue;
if (!list_has_conn(&tmp_list, tc->t_cpath->cp_conn)) {
list_move_tail(&tc->t_tcp_node, &tmp_list);
diff --git a/net/rose/rose_loopback.c b/net/rose/rose_loopback.c
index 7af4f99c4a93..094a6621f8e8 100644
--- a/net/rose/rose_loopback.c
+++ b/net/rose/rose_loopback.c
@@ -16,6 +16,7 @@
#include <linux/init.h>
static struct sk_buff_head loopback_queue;
+#define ROSE_LOOPBACK_LIMIT 1000
static struct timer_list loopback_timer;
static void rose_set_loopback_timer(void);
@@ -35,29 +36,27 @@ static int rose_loopback_running(void)
int rose_loopback_queue(struct sk_buff *skb, struct rose_neigh *neigh)
{
- struct sk_buff *skbn;
+ struct sk_buff *skbn = NULL;
- skbn = skb_clone(skb, GFP_ATOMIC);
+ if (skb_queue_len(&loopback_queue) < ROSE_LOOPBACK_LIMIT)
+ skbn = skb_clone(skb, GFP_ATOMIC);
- kfree_skb(skb);
-
- if (skbn != NULL) {
+ if (skbn) {
+ consume_skb(skb);
skb_queue_tail(&loopback_queue, skbn);
if (!rose_loopback_running())
rose_set_loopback_timer();
+ } else {
+ kfree_skb(skb);
}
return 1;
}
-
static void rose_set_loopback_timer(void)
{
- del_timer(&loopback_timer);
-
- loopback_timer.expires = jiffies + 10;
- add_timer(&loopback_timer);
+ mod_timer(&loopback_timer, jiffies + 10);
}
static void rose_loopback_timer(struct timer_list *unused)
@@ -68,8 +67,12 @@ static void rose_loopback_timer(struct timer_list *unused)
struct sock *sk;
unsigned short frametype;
unsigned int lci_i, lci_o;
+ int count;
- while ((skb = skb_dequeue(&loopback_queue)) != NULL) {
+ for (count = 0; count < ROSE_LOOPBACK_LIMIT; count++) {
+ skb = skb_dequeue(&loopback_queue);
+ if (!skb)
+ return;
if (skb->len < ROSE_MIN_LEN) {
kfree_skb(skb);
continue;
@@ -106,6 +109,8 @@ static void rose_loopback_timer(struct timer_list *unused)
kfree_skb(skb);
}
}
+ if (!skb_queue_empty(&loopback_queue))
+ mod_timer(&loopback_timer, jiffies + 1);
}
void __exit rose_loopback_clear(void)
diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c
index 77e9f85a2c92..f2ff21d7df08 100644
--- a/net/rose/rose_route.c
+++ b/net/rose/rose_route.c
@@ -850,6 +850,7 @@ void rose_link_device_down(struct net_device *dev)
/*
* Route a frame to an appropriate AX.25 connection.
+ * A NULL ax25_cb indicates an internally generated frame.
*/
int rose_route_frame(struct sk_buff *skb, ax25_cb *ax25)
{
@@ -867,6 +868,10 @@ int rose_route_frame(struct sk_buff *skb, ax25_cb *ax25)
if (skb->len < ROSE_MIN_LEN)
return res;
+
+ if (!ax25)
+ return rose_loopback_queue(skb, NULL);
+
frametype = skb->data[2];
lci = ((skb->data[0] << 8) & 0xF00) + ((skb->data[1] << 0) & 0x0FF);
if (frametype == ROSE_CALL_REQUEST &&
diff --git a/net/rose/rose_subr.c b/net/rose/rose_subr.c
index 7ca57741b2fb..7849f286bb93 100644
--- a/net/rose/rose_subr.c
+++ b/net/rose/rose_subr.c
@@ -105,16 +105,17 @@ void rose_write_internal(struct sock *sk, int frametype)
struct sk_buff *skb;
unsigned char *dptr;
unsigned char lci1, lci2;
- char buffer[100];
- int len, faclen = 0;
+ int maxfaclen = 0;
+ int len, faclen;
+ int reserve;
- len = AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN + ROSE_MIN_LEN + 1;
+ reserve = AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN + 1;
+ len = ROSE_MIN_LEN;
switch (frametype) {
case ROSE_CALL_REQUEST:
len += 1 + ROSE_ADDR_LEN + ROSE_ADDR_LEN;
- faclen = rose_create_facilities(buffer, rose);
- len += faclen;
+ maxfaclen = 256;
break;
case ROSE_CALL_ACCEPTED:
case ROSE_CLEAR_REQUEST:
@@ -123,15 +124,16 @@ void rose_write_internal(struct sock *sk, int frametype)
break;
}
- if ((skb = alloc_skb(len, GFP_ATOMIC)) == NULL)
+ skb = alloc_skb(reserve + len + maxfaclen, GFP_ATOMIC);
+ if (!skb)
return;
/*
* Space for AX.25 header and PID.
*/
- skb_reserve(skb, AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN + 1);
+ skb_reserve(skb, reserve);
- dptr = skb_put(skb, skb_tailroom(skb));
+ dptr = skb_put(skb, len);
lci1 = (rose->lci >> 8) & 0x0F;
lci2 = (rose->lci >> 0) & 0xFF;
@@ -146,7 +148,8 @@ void rose_write_internal(struct sock *sk, int frametype)
dptr += ROSE_ADDR_LEN;
memcpy(dptr, &rose->source_addr, ROSE_ADDR_LEN);
dptr += ROSE_ADDR_LEN;
- memcpy(dptr, buffer, faclen);
+ faclen = rose_create_facilities(dptr, rose);
+ skb_put(skb, faclen);
dptr += faclen;
break;
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index 8f1a8f85b1f9..215f4d98baa0 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -701,30 +701,30 @@ void rxrpc_destroy_all_calls(struct rxrpc_net *rxnet)
_enter("");
- if (list_empty(&rxnet->calls))
- return;
+ if (!list_empty(&rxnet->calls)) {
+ write_lock(&rxnet->call_lock);
- write_lock(&rxnet->call_lock);
+ while (!list_empty(&rxnet->calls)) {
+ call = list_entry(rxnet->calls.next,
+ struct rxrpc_call, link);
+ _debug("Zapping call %p", call);
- while (!list_empty(&rxnet->calls)) {
- call = list_entry(rxnet->calls.next, struct rxrpc_call, link);
- _debug("Zapping call %p", call);
+ rxrpc_see_call(call);
+ list_del_init(&call->link);
- rxrpc_see_call(call);
- list_del_init(&call->link);
+ pr_err("Call %p still in use (%d,%s,%lx,%lx)!\n",
+ call, atomic_read(&call->usage),
+ rxrpc_call_states[call->state],
+ call->flags, call->events);
- pr_err("Call %p still in use (%d,%s,%lx,%lx)!\n",
- call, atomic_read(&call->usage),
- rxrpc_call_states[call->state],
- call->flags, call->events);
+ write_unlock(&rxnet->call_lock);
+ cond_resched();
+ write_lock(&rxnet->call_lock);
+ }
write_unlock(&rxnet->call_lock);
- cond_resched();
- write_lock(&rxnet->call_lock);
}
- write_unlock(&rxnet->call_lock);
-
atomic_dec(&rxnet->nr_calls);
wait_var_event(&rxnet->nr_calls, !atomic_read(&rxnet->nr_calls));
}
diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
index 521189f4b666..c979a56faaef 100644
--- a/net/rxrpc/conn_client.c
+++ b/net/rxrpc/conn_client.c
@@ -353,7 +353,7 @@ static int rxrpc_get_client_conn(struct rxrpc_sock *rx,
* normally have to take channel_lock but we do this before anyone else
* can see the connection.
*/
- list_add_tail(&call->chan_wait_link, &candidate->waiting_calls);
+ list_add(&call->chan_wait_link, &candidate->waiting_calls);
if (cp->exclusive) {
call->conn = candidate;
@@ -432,7 +432,7 @@ found_extant_conn:
call->conn = conn;
call->security_ix = conn->security_ix;
call->service_id = conn->service_id;
- list_add(&call->chan_wait_link, &conn->waiting_calls);
+ list_add_tail(&call->chan_wait_link, &conn->waiting_calls);
spin_unlock(&conn->channel_lock);
_leave(" = 0 [extant %d]", conn->debug_id);
return 0;
@@ -707,6 +707,7 @@ int rxrpc_connect_call(struct rxrpc_sock *rx,
ret = rxrpc_wait_for_channel(call, gfp);
if (ret < 0) {
+ trace_rxrpc_client(call->conn, ret, rxrpc_client_chan_wait_failed);
rxrpc_disconnect_client_call(call);
goto out;
}
@@ -777,16 +778,22 @@ static void rxrpc_set_client_reap_timer(struct rxrpc_net *rxnet)
*/
void rxrpc_disconnect_client_call(struct rxrpc_call *call)
{
- unsigned int channel = call->cid & RXRPC_CHANNELMASK;
struct rxrpc_connection *conn = call->conn;
- struct rxrpc_channel *chan = &conn->channels[channel];
+ struct rxrpc_channel *chan = NULL;
struct rxrpc_net *rxnet = conn->params.local->rxnet;
+ unsigned int channel = -1;
+ u32 cid;
+ spin_lock(&conn->channel_lock);
+
+ cid = call->cid;
+ if (cid) {
+ channel = cid & RXRPC_CHANNELMASK;
+ chan = &conn->channels[channel];
+ }
trace_rxrpc_client(conn, channel, rxrpc_client_chan_disconnect);
call->conn = NULL;
- spin_lock(&conn->channel_lock);
-
/* Calls that have never actually been assigned a channel can simply be
* discarded. If the conn didn't get used either, it will follow
* immediately unless someone else grabs it in the meantime.
@@ -810,7 +817,10 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *call)
goto out;
}
- ASSERTCMP(rcu_access_pointer(chan->call), ==, call);
+ if (rcu_access_pointer(chan->call) != call) {
+ spin_unlock(&conn->channel_lock);
+ BUG();
+ }
/* If a client call was exposed to the world, we save the result for
* retransmission.
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 570b49d2da42..d591f54cb91f 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -1155,19 +1155,19 @@ int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb)
* handle data received on the local endpoint
* - may be called in interrupt context
*
- * The socket is locked by the caller and this prevents the socket from being
- * shut down and the local endpoint from going away, thus sk_user_data will not
- * be cleared until this function returns.
+ * [!] Note that as this is called from the encap_rcv hook, the socket is not
+ * held locked by the caller and nothing prevents sk_user_data on the UDP from
+ * being cleared in the middle of processing this function.
*
* Called with the RCU read lock held from the IP layer via UDP.
*/
int rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb)
{
+ struct rxrpc_local *local = rcu_dereference_sk_user_data(udp_sk);
struct rxrpc_connection *conn;
struct rxrpc_channel *chan;
struct rxrpc_call *call = NULL;
struct rxrpc_skb_priv *sp;
- struct rxrpc_local *local = udp_sk->sk_user_data;
struct rxrpc_peer *peer = NULL;
struct rxrpc_sock *rx = NULL;
unsigned int channel;
@@ -1175,6 +1175,10 @@ int rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb)
_enter("%p", udp_sk);
+ if (unlikely(!local)) {
+ kfree_skb(skb);
+ return 0;
+ }
if (skb->tstamp == 0)
skb->tstamp = ktime_get_real();
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index 0906e51d3cfb..10317dbdab5f 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -304,7 +304,8 @@ nomem:
ret = -ENOMEM;
sock_error:
mutex_unlock(&rxnet->local_mutex);
- kfree(local);
+ if (local)
+ call_rcu(&local->rcu, rxrpc_local_rcu);
_leave(" = %d", ret);
return ERR_PTR(ret);
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index 816b19a78809..0374b0623c8b 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -596,6 +596,7 @@ error_requeue_call:
}
error_no_call:
release_sock(&rx->sk);
+error_trace:
trace_rxrpc_recvmsg(call, rxrpc_recvmsg_return, 0, 0, 0, ret);
return ret;
@@ -604,7 +605,7 @@ wait_interrupted:
wait_error:
finish_wait(sk_sleep(&rx->sk), &wait);
call = NULL;
- goto error_no_call;
+ goto error_trace;
}
/**
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index e12f8ef7baa4..7c4a4b874248 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -744,7 +744,7 @@ int tcf_action_dump(struct sk_buff *skb, struct tc_action *actions[],
for (i = 0; i < TCA_ACT_MAX_PRIO && actions[i]; i++) {
a = actions[i];
- nest = nla_nest_start(skb, a->order);
+ nest = nla_nest_start(skb, i + 1);
if (nest == NULL)
goto nla_put_failure;
err = tcf_action_dump_1(skb, a, bind, ref);
@@ -1257,7 +1257,6 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
ret = PTR_ERR(act);
goto err;
}
- act->order = i;
attr_size += tcf_action_fill_size(act);
actions[i - 1] = act;
}
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 8525de811616..334f3a057671 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -199,8 +199,7 @@ err3:
err2:
kfree(tname);
err1:
- if (ret == ACT_P_CREATED)
- tcf_idr_release(*a, bind);
+ tcf_idr_release(*a, bind);
return err;
}
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 8bf66d0a6800..f767e78e38c9 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -159,6 +159,9 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
}
m = to_mirred(*a);
+ if (ret == ACT_P_CREATED)
+ INIT_LIST_HEAD(&m->tcfm_list);
+
spin_lock_bh(&m->tcf_lock);
m->tcf_action = parm->action;
m->tcfm_eaction = parm->eaction;
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 6b67aa13d2dd..c7f5d630d97c 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -43,8 +43,8 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
struct tc_action_net *tn = net_generic(net, sample_net_id);
struct nlattr *tb[TCA_SAMPLE_MAX + 1];
struct psample_group *psample_group;
+ u32 psample_group_num, rate;
struct tc_sample *parm;
- u32 psample_group_num;
struct tcf_sample *s;
bool exists = false;
int ret, err;
@@ -80,6 +80,12 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
return -EEXIST;
}
+ rate = nla_get_u32(tb[TCA_SAMPLE_RATE]);
+ if (!rate) {
+ NL_SET_ERR_MSG(extack, "invalid sample rate");
+ tcf_idr_release(*a, bind);
+ return -EINVAL;
+ }
psample_group_num = nla_get_u32(tb[TCA_SAMPLE_PSAMPLE_GROUP]);
psample_group = psample_group_get(net, psample_group_num);
if (!psample_group) {
@@ -91,7 +97,7 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
spin_lock_bh(&s->tcf_lock);
s->tcf_action = parm->action;
- s->rate = nla_get_u32(tb[TCA_SAMPLE_RATE]);
+ s->rate = rate;
s->psample_group_num = psample_group_num;
RCU_INIT_POINTER(s->psample_group, psample_group);
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index 73e44ce2a883..86d90fc5e97e 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -191,8 +191,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
params_new = kzalloc(sizeof(*params_new), GFP_KERNEL);
if (unlikely(!params_new)) {
- if (ret == ACT_P_CREATED)
- tcf_idr_release(*a, bind);
+ tcf_idr_release(*a, bind);
return -ENOMEM;
}
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index 681f6f04e7da..72d9c432e8b4 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -197,6 +197,15 @@ static const struct nla_policy tunnel_key_policy[TCA_TUNNEL_KEY_MAX + 1] = {
[TCA_TUNNEL_KEY_ENC_TTL] = { .type = NLA_U8 },
};
+static void tunnel_key_release_params(struct tcf_tunnel_key_params *p)
+{
+ if (!p)
+ return;
+ if (p->tcft_action == TCA_TUNNEL_KEY_ACT_SET)
+ dst_release(&p->tcft_enc_metadata->dst);
+ kfree_rcu(p, rcu);
+}
+
static int tunnel_key_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
int ovr, int bind, bool rtnl_held,
@@ -360,8 +369,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
rcu_swap_protected(t->params, params_new,
lockdep_is_held(&t->tcf_lock));
spin_unlock_bh(&t->tcf_lock);
- if (params_new)
- kfree_rcu(params_new, rcu);
+ tunnel_key_release_params(params_new);
if (ret == ACT_P_CREATED)
tcf_idr_insert(tn, *a);
@@ -369,7 +377,8 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
return ret;
release_tun_meta:
- dst_release(&metadata->dst);
+ if (metadata)
+ dst_release(&metadata->dst);
err_out:
if (exists)
@@ -385,12 +394,7 @@ static void tunnel_key_release(struct tc_action *a)
struct tcf_tunnel_key_params *params;
params = rcu_dereference_protected(t->params, 1);
- if (params) {
- if (params->tcft_action == TCA_TUNNEL_KEY_ACT_SET)
- dst_release(&params->tcft_enc_metadata->dst);
-
- kfree_rcu(params, rcu);
- }
+ tunnel_key_release_params(params);
}
static int tunnel_key_geneve_opts_dump(struct sk_buff *skb,
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 70f144ac5e1d..2167c6ca55e3 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -960,7 +960,6 @@ static int tcf_block_cb_call(struct tcf_block *block, enum tc_setup_type type,
int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res, bool compat_mode)
{
- __be16 protocol = tc_skb_protocol(skb);
#ifdef CONFIG_NET_CLS_ACT
const int max_reclassify_loop = 4;
const struct tcf_proto *orig_tp = tp;
@@ -970,6 +969,7 @@ int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
reclassify:
#endif
for (; tp; tp = rcu_dereference_bh(tp->next)) {
+ __be16 protocol = tc_skb_protocol(skb);
int err;
if (tp->protocol != protocol &&
@@ -1002,7 +1002,6 @@ reset:
}
tp = first_tp;
- protocol = tc_skb_protocol(skb);
goto reclassify;
#endif
}
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 7fade7107f95..09b359784629 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -1176,17 +1176,23 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
struct cls_fl_head *head = rtnl_dereference(tp->root);
struct cls_fl_filter *fold = *arg;
struct cls_fl_filter *fnew;
+ struct fl_flow_mask *mask;
struct nlattr **tb;
- struct fl_flow_mask mask = {};
int err;
if (!tca[TCA_OPTIONS])
return -EINVAL;
- tb = kcalloc(TCA_FLOWER_MAX + 1, sizeof(struct nlattr *), GFP_KERNEL);
- if (!tb)
+ mask = kzalloc(sizeof(struct fl_flow_mask), GFP_KERNEL);
+ if (!mask)
return -ENOBUFS;
+ tb = kcalloc(TCA_FLOWER_MAX + 1, sizeof(struct nlattr *), GFP_KERNEL);
+ if (!tb) {
+ err = -ENOBUFS;
+ goto errout_mask_alloc;
+ }
+
err = nla_parse_nested(tb, TCA_FLOWER_MAX, tca[TCA_OPTIONS],
fl_policy, NULL);
if (err < 0)
@@ -1207,47 +1213,47 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
if (err < 0)
goto errout;
- if (!handle) {
- handle = 1;
- err = idr_alloc_u32(&head->handle_idr, fnew, &handle,
- INT_MAX, GFP_KERNEL);
- } else if (!fold) {
- /* user specifies a handle and it doesn't exist */
- err = idr_alloc_u32(&head->handle_idr, fnew, &handle,
- handle, GFP_KERNEL);
- }
- if (err)
- goto errout;
- fnew->handle = handle;
-
if (tb[TCA_FLOWER_FLAGS]) {
fnew->flags = nla_get_u32(tb[TCA_FLOWER_FLAGS]);
if (!tc_flags_valid(fnew->flags)) {
err = -EINVAL;
- goto errout_idr;
+ goto errout;
}
}
- err = fl_set_parms(net, tp, fnew, &mask, base, tb, tca[TCA_RATE], ovr,
+ err = fl_set_parms(net, tp, fnew, mask, base, tb, tca[TCA_RATE], ovr,
tp->chain->tmplt_priv, extack);
if (err)
- goto errout_idr;
+ goto errout;
+
+ err = fl_check_assign_mask(head, fnew, fold, mask);
+ if (err)
+ goto errout;
- err = fl_check_assign_mask(head, fnew, fold, &mask);
+ if (!handle) {
+ handle = 1;
+ err = idr_alloc_u32(&head->handle_idr, fnew, &handle,
+ INT_MAX, GFP_KERNEL);
+ } else if (!fold) {
+ /* user specifies a handle and it doesn't exist */
+ err = idr_alloc_u32(&head->handle_idr, fnew, &handle,
+ handle, GFP_KERNEL);
+ }
if (err)
- goto errout_idr;
+ goto errout_mask;
+ fnew->handle = handle;
if (!tc_skip_sw(fnew->flags)) {
if (!fold && fl_lookup(fnew->mask, &fnew->mkey)) {
err = -EEXIST;
- goto errout_mask;
+ goto errout_idr;
}
err = rhashtable_insert_fast(&fnew->mask->ht, &fnew->ht_node,
fnew->mask->filter_ht_params);
if (err)
- goto errout_mask;
+ goto errout_idr;
}
if (!tc_skip_hw(fnew->flags)) {
@@ -1281,19 +1287,23 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
}
kfree(tb);
+ kfree(mask);
return 0;
-errout_mask:
- fl_mask_put(head, fnew->mask, false);
-
errout_idr:
if (!fold)
idr_remove(&head->handle_idr, fnew->handle);
+
+errout_mask:
+ fl_mask_put(head, fnew->mask, false);
+
errout:
tcf_exts_destroy(&fnew->exts);
kfree(fnew);
errout_tb:
kfree(tb);
+errout_mask_alloc:
+ kfree(mask);
return err;
}
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index 856fa79d4ffd..621bc1d5b057 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -126,6 +126,11 @@ static void mall_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
static void *mall_get(struct tcf_proto *tp, u32 handle)
{
+ struct cls_mall_head *head = rtnl_dereference(tp->root);
+
+ if (head && head->handle == handle)
+ return head;
+
return NULL;
}
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 9ccc93f257db..38bb882bb958 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -48,7 +48,7 @@ struct tcindex_data {
u32 hash; /* hash table size; 0 if undefined */
u32 alloc_hash; /* allocated size */
u32 fall_through; /* 0: only classify if explicit match */
- struct rcu_head rcu;
+ struct rcu_work rwork;
};
static inline int tcindex_filter_is_set(struct tcindex_filter_result *r)
@@ -221,17 +221,11 @@ found:
return 0;
}
-static int tcindex_destroy_element(struct tcf_proto *tp,
- void *arg, struct tcf_walker *walker)
-{
- bool last;
-
- return tcindex_delete(tp, arg, &last, NULL);
-}
-
-static void __tcindex_destroy(struct rcu_head *head)
+static void tcindex_destroy_work(struct work_struct *work)
{
- struct tcindex_data *p = container_of(head, struct tcindex_data, rcu);
+ struct tcindex_data *p = container_of(to_rcu_work(work),
+ struct tcindex_data,
+ rwork);
kfree(p->perfect);
kfree(p->h);
@@ -258,9 +252,11 @@ static int tcindex_filter_result_init(struct tcindex_filter_result *r)
return tcf_exts_init(&r->exts, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
}
-static void __tcindex_partial_destroy(struct rcu_head *head)
+static void tcindex_partial_destroy_work(struct work_struct *work)
{
- struct tcindex_data *p = container_of(head, struct tcindex_data, rcu);
+ struct tcindex_data *p = container_of(to_rcu_work(work),
+ struct tcindex_data,
+ rwork);
kfree(p->perfect);
kfree(p);
@@ -275,7 +271,7 @@ static void tcindex_free_perfect_hash(struct tcindex_data *cp)
kfree(cp->perfect);
}
-static int tcindex_alloc_perfect_hash(struct tcindex_data *cp)
+static int tcindex_alloc_perfect_hash(struct net *net, struct tcindex_data *cp)
{
int i, err = 0;
@@ -289,6 +285,9 @@ static int tcindex_alloc_perfect_hash(struct tcindex_data *cp)
TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
if (err < 0)
goto errout;
+#ifdef CONFIG_NET_CLS_ACT
+ cp->perfect[i].exts.net = net;
+#endif
}
return 0;
@@ -305,9 +304,9 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
struct nlattr *est, bool ovr, struct netlink_ext_ack *extack)
{
struct tcindex_filter_result new_filter_result, *old_r = r;
- struct tcindex_filter_result cr;
struct tcindex_data *cp = NULL, *oldp;
struct tcindex_filter *f = NULL; /* make gcc behave */
+ struct tcf_result cr = {};
int err, balloc = 0;
struct tcf_exts e;
@@ -337,7 +336,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
if (p->perfect) {
int i;
- if (tcindex_alloc_perfect_hash(cp) < 0)
+ if (tcindex_alloc_perfect_hash(net, cp) < 0)
goto errout;
for (i = 0; i < cp->hash; i++)
cp->perfect[i].res = p->perfect[i].res;
@@ -348,11 +347,8 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
err = tcindex_filter_result_init(&new_filter_result);
if (err < 0)
goto errout1;
- err = tcindex_filter_result_init(&cr);
- if (err < 0)
- goto errout1;
if (old_r)
- cr.res = r->res;
+ cr = r->res;
if (tb[TCA_TCINDEX_HASH])
cp->hash = nla_get_u32(tb[TCA_TCINDEX_HASH]);
@@ -406,7 +402,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
err = -ENOMEM;
if (!cp->perfect && !cp->h) {
if (valid_perfect_hash(cp)) {
- if (tcindex_alloc_perfect_hash(cp) < 0)
+ if (tcindex_alloc_perfect_hash(net, cp) < 0)
goto errout_alloc;
balloc = 1;
} else {
@@ -443,8 +439,8 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
}
if (tb[TCA_TCINDEX_CLASSID]) {
- cr.res.classid = nla_get_u32(tb[TCA_TCINDEX_CLASSID]);
- tcf_bind_filter(tp, &cr.res, base);
+ cr.classid = nla_get_u32(tb[TCA_TCINDEX_CLASSID]);
+ tcf_bind_filter(tp, &cr, base);
}
if (old_r && old_r != r) {
@@ -456,7 +452,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
}
oldp = p;
- r->res = cr.res;
+ r->res = cr;
tcf_exts_change(&r->exts, &e);
rcu_assign_pointer(tp->root, cp);
@@ -475,10 +471,12 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
; /* nothing */
rcu_assign_pointer(*fp, f);
+ } else {
+ tcf_exts_destroy(&new_filter_result.exts);
}
if (oldp)
- call_rcu(&oldp->rcu, __tcindex_partial_destroy);
+ tcf_queue_work(&oldp->rwork, tcindex_partial_destroy_work);
return 0;
errout_alloc:
@@ -487,7 +485,6 @@ errout_alloc:
else if (balloc == 2)
kfree(cp->h);
errout1:
- tcf_exts_destroy(&cr.exts);
tcf_exts_destroy(&new_filter_result.exts);
errout:
kfree(cp);
@@ -562,15 +559,34 @@ static void tcindex_destroy(struct tcf_proto *tp,
struct netlink_ext_ack *extack)
{
struct tcindex_data *p = rtnl_dereference(tp->root);
- struct tcf_walker walker;
+ int i;
pr_debug("tcindex_destroy(tp %p),p %p\n", tp, p);
- walker.count = 0;
- walker.skip = 0;
- walker.fn = tcindex_destroy_element;
- tcindex_walk(tp, &walker);
- call_rcu(&p->rcu, __tcindex_destroy);
+ if (p->perfect) {
+ for (i = 0; i < p->hash; i++) {
+ struct tcindex_filter_result *r = p->perfect + i;
+
+ tcf_unbind_filter(tp, &r->res);
+ if (tcf_exts_get_net(&r->exts))
+ tcf_queue_work(&r->rwork,
+ tcindex_destroy_rexts_work);
+ else
+ __tcindex_destroy_rexts(r);
+ }
+ }
+
+ for (i = 0; p->h && i < p->hash; i++) {
+ struct tcindex_filter *f, *next;
+ bool last;
+
+ for (f = rtnl_dereference(p->h[i]); f; f = next) {
+ next = rtnl_dereference(f->next);
+ tcindex_delete(tp, &f->result, &last, NULL);
+ }
+ }
+
+ tcf_queue_work(&p->rwork, tcindex_destroy_work);
}
diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
index 793016d722ec..9fd37d91b5ed 100644
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -1508,32 +1508,29 @@ static unsigned int cake_drop(struct Qdisc *sch, struct sk_buff **to_free)
return idx + (tin << 16);
}
-static void cake_wash_diffserv(struct sk_buff *skb)
-{
- switch (skb->protocol) {
- case htons(ETH_P_IP):
- ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, 0);
- break;
- case htons(ETH_P_IPV6):
- ipv6_change_dsfield(ipv6_hdr(skb), INET_ECN_MASK, 0);
- break;
- default:
- break;
- }
-}
-
static u8 cake_handle_diffserv(struct sk_buff *skb, u16 wash)
{
+ int wlen = skb_network_offset(skb);
u8 dscp;
- switch (skb->protocol) {
+ switch (tc_skb_protocol(skb)) {
case htons(ETH_P_IP):
+ wlen += sizeof(struct iphdr);
+ if (!pskb_may_pull(skb, wlen) ||
+ skb_try_make_writable(skb, wlen))
+ return 0;
+
dscp = ipv4_get_dsfield(ip_hdr(skb)) >> 2;
if (wash && dscp)
ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, 0);
return dscp;
case htons(ETH_P_IPV6):
+ wlen += sizeof(struct ipv6hdr);
+ if (!pskb_may_pull(skb, wlen) ||
+ skb_try_make_writable(skb, wlen))
+ return 0;
+
dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2;
if (wash && dscp)
ipv6_change_dsfield(ipv6_hdr(skb), INET_ECN_MASK, 0);
@@ -1553,25 +1550,27 @@ static struct cake_tin_data *cake_select_tin(struct Qdisc *sch,
{
struct cake_sched_data *q = qdisc_priv(sch);
u32 tin;
+ u8 dscp;
+
+ /* Tin selection: Default to diffserv-based selection, allow overriding
+ * using firewall marks or skb->priority.
+ */
+ dscp = cake_handle_diffserv(skb,
+ q->rate_flags & CAKE_FLAG_WASH);
- if (TC_H_MAJ(skb->priority) == sch->handle &&
- TC_H_MIN(skb->priority) > 0 &&
- TC_H_MIN(skb->priority) <= q->tin_cnt) {
+ if (q->tin_mode == CAKE_DIFFSERV_BESTEFFORT)
+ tin = 0;
+
+ else if (TC_H_MAJ(skb->priority) == sch->handle &&
+ TC_H_MIN(skb->priority) > 0 &&
+ TC_H_MIN(skb->priority) <= q->tin_cnt)
tin = q->tin_order[TC_H_MIN(skb->priority) - 1];
- if (q->rate_flags & CAKE_FLAG_WASH)
- cake_wash_diffserv(skb);
- } else if (q->tin_mode != CAKE_DIFFSERV_BESTEFFORT) {
- /* extract the Diffserv Precedence field, if it exists */
- /* and clear DSCP bits if washing */
- tin = q->tin_index[cake_handle_diffserv(skb,
- q->rate_flags & CAKE_FLAG_WASH)];
+ else {
+ tin = q->tin_index[dscp];
+
if (unlikely(tin >= q->tin_cnt))
tin = 0;
- } else {
- tin = 0;
- if (q->rate_flags & CAKE_FLAG_WASH)
- cake_wash_diffserv(skb);
}
return &q->tins[tin];
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index cfa761f2d041..b0cc57ff96e3 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -68,7 +68,7 @@ static inline struct sk_buff *__skb_dequeue_bad_txq(struct Qdisc *q)
skb = __skb_dequeue(&q->skb_bad_txq);
if (qdisc_is_percpu_stats(q)) {
qdisc_qstats_cpu_backlog_dec(q, skb);
- qdisc_qstats_cpu_qlen_dec(q);
+ qdisc_qstats_atomic_qlen_dec(q);
} else {
qdisc_qstats_backlog_dec(q, skb);
q->q.qlen--;
@@ -108,7 +108,7 @@ static inline void qdisc_enqueue_skb_bad_txq(struct Qdisc *q,
if (qdisc_is_percpu_stats(q)) {
qdisc_qstats_cpu_backlog_inc(q, skb);
- qdisc_qstats_cpu_qlen_inc(q);
+ qdisc_qstats_atomic_qlen_inc(q);
} else {
qdisc_qstats_backlog_inc(q, skb);
q->q.qlen++;
@@ -147,7 +147,7 @@ static inline int dev_requeue_skb_locked(struct sk_buff *skb, struct Qdisc *q)
qdisc_qstats_cpu_requeues_inc(q);
qdisc_qstats_cpu_backlog_inc(q, skb);
- qdisc_qstats_cpu_qlen_inc(q);
+ qdisc_qstats_atomic_qlen_inc(q);
skb = next;
}
@@ -252,7 +252,7 @@ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate,
skb = __skb_dequeue(&q->gso_skb);
if (qdisc_is_percpu_stats(q)) {
qdisc_qstats_cpu_backlog_dec(q, skb);
- qdisc_qstats_cpu_qlen_dec(q);
+ qdisc_qstats_atomic_qlen_dec(q);
} else {
qdisc_qstats_backlog_dec(q, skb);
q->q.qlen--;
@@ -637,7 +637,7 @@ static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc,
if (unlikely(err))
return qdisc_drop_cpu(skb, qdisc, to_free);
- qdisc_qstats_cpu_qlen_inc(qdisc);
+ qdisc_qstats_atomic_qlen_inc(qdisc);
/* Note: skb can not be used after skb_array_produce(),
* so we better not use qdisc_qstats_cpu_backlog_inc()
*/
@@ -662,7 +662,7 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc)
if (likely(skb)) {
qdisc_qstats_cpu_backlog_dec(qdisc, skb);
qdisc_bstats_cpu_update(qdisc, skb);
- qdisc_qstats_cpu_qlen_dec(qdisc);
+ qdisc_qstats_atomic_qlen_dec(qdisc);
}
return skb;
@@ -706,7 +706,6 @@ static void pfifo_fast_reset(struct Qdisc *qdisc)
struct gnet_stats_queue *q = per_cpu_ptr(qdisc->cpu_qstats, i);
q->backlog = 0;
- q->qlen = 0;
}
}
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 74c0f656f28c..4dfe10b9f96c 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -440,6 +440,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
int nb = 0;
int count = 1;
int rc = NET_XMIT_SUCCESS;
+ int rc_drop = NET_XMIT_DROP;
/* Do not fool qdisc_drop_all() */
skb->prev = NULL;
@@ -479,6 +480,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
q->duplicate = 0;
rootq->enqueue(skb2, rootq, to_free);
q->duplicate = dupsave;
+ rc_drop = NET_XMIT_SUCCESS;
}
/*
@@ -491,7 +493,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
if (skb_is_gso(skb)) {
segs = netem_segment(skb, sch, to_free);
if (!segs)
- return NET_XMIT_DROP;
+ return rc_drop;
} else {
segs = skb;
}
@@ -514,8 +516,10 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
1<<(prandom_u32() % 8);
}
- if (unlikely(sch->q.qlen >= sch->limit))
- return qdisc_drop_all(skb, sch, to_free);
+ if (unlikely(sch->q.qlen >= sch->limit)) {
+ qdisc_drop_all(skb, sch, to_free);
+ return rc_drop;
+ }
qdisc_qstats_backlog_inc(sch, skb);
diff --git a/net/sctp/diag.c b/net/sctp/diag.c
index 078f01a8d582..435847d98b51 100644
--- a/net/sctp/diag.c
+++ b/net/sctp/diag.c
@@ -256,6 +256,7 @@ static size_t inet_assoc_attr_size(struct sctp_association *asoc)
+ nla_total_size(1) /* INET_DIAG_TOS */
+ nla_total_size(1) /* INET_DIAG_TCLASS */
+ nla_total_size(4) /* INET_DIAG_MARK */
+ + nla_total_size(4) /* INET_DIAG_CLASS_ID */
+ nla_total_size(addrlen * asoc->peer.transport_count)
+ nla_total_size(addrlen * addrcnt)
+ nla_total_size(sizeof(struct inet_diag_meminfo))
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index fc6c5e4bffa5..4fede55b9010 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -97,10 +97,9 @@ static int sctp_inet6addr_event(struct notifier_block *this, unsigned long ev,
switch (ev) {
case NETDEV_UP:
- addr = kmalloc(sizeof(struct sctp_sockaddr_entry), GFP_ATOMIC);
+ addr = kzalloc(sizeof(*addr), GFP_ATOMIC);
if (addr) {
addr->a.v6.sin6_family = AF_INET6;
- addr->a.v6.sin6_port = 0;
addr->a.v6.sin6_addr = ifa->addr;
addr->a.v6.sin6_scope_id = ifa->idev->dev->ifindex;
addr->valid = 1;
@@ -278,7 +277,8 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
if (saddr) {
fl6->saddr = saddr->v6.sin6_addr;
- fl6->fl6_sport = saddr->v6.sin6_port;
+ if (!fl6->fl6_sport)
+ fl6->fl6_sport = saddr->v6.sin6_port;
pr_debug("src=%pI6 - ", &fl6->saddr);
}
@@ -430,7 +430,6 @@ static void sctp_v6_copy_addrlist(struct list_head *addrlist,
addr = kzalloc(sizeof(*addr), GFP_ATOMIC);
if (addr) {
addr->a.v6.sin6_family = AF_INET6;
- addr->a.v6.sin6_port = 0;
addr->a.v6.sin6_addr = ifp->addr;
addr->a.v6.sin6_scope_id = dev->ifindex;
addr->valid = 1;
diff --git a/net/sctp/offload.c b/net/sctp/offload.c
index 123e9f2dc226..edfcf16e704c 100644
--- a/net/sctp/offload.c
+++ b/net/sctp/offload.c
@@ -36,6 +36,7 @@ static __le32 sctp_gso_make_checksum(struct sk_buff *skb)
{
skb->ip_summed = CHECKSUM_NONE;
skb->csum_not_inet = 0;
+ gso_reset_checksum(skb, ~0);
return sctp_compute_cksum(skb, skb_transport_offset(skb));
}
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index e948db29ab53..d97b2b4b7a8b 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -101,7 +101,6 @@ static void sctp_v4_copy_addrlist(struct list_head *addrlist,
addr = kzalloc(sizeof(*addr), GFP_ATOMIC);
if (addr) {
addr->a.v4.sin_family = AF_INET;
- addr->a.v4.sin_port = 0;
addr->a.v4.sin_addr.s_addr = ifa->ifa_local;
addr->valid = 1;
INIT_LIST_HEAD(&addr->list);
@@ -441,7 +440,8 @@ static void sctp_v4_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
}
if (saddr) {
fl4->saddr = saddr->v4.sin_addr.s_addr;
- fl4->fl4_sport = saddr->v4.sin_port;
+ if (!fl4->fl4_sport)
+ fl4->fl4_sport = saddr->v4.sin_port;
}
pr_debug("%s: dst:%pI4, src:%pI4 - ", __func__, &fl4->daddr,
@@ -600,6 +600,7 @@ out:
static int sctp_v4_addr_to_user(struct sctp_sock *sp, union sctp_addr *addr)
{
/* No address mapping for V4 sockets */
+ memset(addr->v4.sin_zero, 0, sizeof(addr->v4.sin_zero));
return sizeof(struct sockaddr_in);
}
@@ -776,10 +777,9 @@ static int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev,
switch (ev) {
case NETDEV_UP:
- addr = kmalloc(sizeof(struct sctp_sockaddr_entry), GFP_ATOMIC);
+ addr = kzalloc(sizeof(*addr), GFP_ATOMIC);
if (addr) {
addr->a.v4.sin_family = AF_INET;
- addr->a.v4.sin_port = 0;
addr->a.v4.sin_addr.s_addr = ifa->ifa_local;
addr->valid = 1;
spin_lock_bh(&net->sctp.local_addr_lock);
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index f4ac6c592e13..ae65a1cfa596 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -495,7 +495,10 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc,
*
* [INIT ACK back to where the INIT came from.]
*/
- retval->transport = chunk->transport;
+ if (chunk->transport)
+ retval->transport =
+ sctp_assoc_lookup_paddr(asoc,
+ &chunk->transport->ipaddr);
retval->subh.init_hdr =
sctp_addto_chunk(retval, sizeof(initack), &initack);
@@ -642,8 +645,10 @@ struct sctp_chunk *sctp_make_cookie_ack(const struct sctp_association *asoc,
*
* [COOKIE ACK back to where the COOKIE ECHO came from.]
*/
- if (retval && chunk)
- retval->transport = chunk->transport;
+ if (retval && chunk && chunk->transport)
+ retval->transport =
+ sctp_assoc_lookup_paddr(asoc,
+ &chunk->transport->ipaddr);
return retval;
}
@@ -2324,7 +2329,6 @@ int sctp_process_init(struct sctp_association *asoc, struct sctp_chunk *chunk,
union sctp_addr addr;
struct sctp_af *af;
int src_match = 0;
- char *cookie;
/* We must include the address that the INIT packet came from.
* This is the only address that matters for an INIT packet.
@@ -2428,14 +2432,6 @@ int sctp_process_init(struct sctp_association *asoc, struct sctp_chunk *chunk,
/* Peer Rwnd : Current calculated value of the peer's rwnd. */
asoc->peer.rwnd = asoc->peer.i.a_rwnd;
- /* Copy cookie in case we need to resend COOKIE-ECHO. */
- cookie = asoc->peer.cookie;
- if (cookie) {
- asoc->peer.cookie = kmemdup(cookie, asoc->peer.cookie_len, gfp);
- if (!asoc->peer.cookie)
- goto clean_up;
- }
-
/* RFC 2960 7.2.1 The initial value of ssthresh MAY be arbitrarily
* high (for example, implementations MAY use the size of the receiver
* advertised window).
@@ -2604,7 +2600,9 @@ do_addr_param:
case SCTP_PARAM_STATE_COOKIE:
asoc->peer.cookie_len =
ntohs(param.p->length) - sizeof(struct sctp_paramhdr);
- asoc->peer.cookie = param.cookie->body;
+ asoc->peer.cookie = kmemdup(param.cookie->body, asoc->peer.cookie_len, gfp);
+ if (!asoc->peer.cookie)
+ retval = 0;
break;
case SCTP_PARAM_HEARTBEAT_INFO:
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 85d393090238..3131b4154c74 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -898,6 +898,11 @@ static void sctp_cmd_new_state(struct sctp_cmd_seq *cmds,
asoc->rto_initial;
}
+ if (sctp_state(asoc, ESTABLISHED)) {
+ kfree(asoc->peer.cookie);
+ asoc->peer.cookie = NULL;
+ }
+
if (sctp_state(asoc, ESTABLISHED) ||
sctp_state(asoc, CLOSED) ||
sctp_state(asoc, SHUTDOWN_RECEIVED)) {
@@ -1112,32 +1117,6 @@ static void sctp_cmd_send_msg(struct sctp_association *asoc,
}
-/* Sent the next ASCONF packet currently stored in the association.
- * This happens after the ASCONF_ACK was succeffully processed.
- */
-static void sctp_cmd_send_asconf(struct sctp_association *asoc)
-{
- struct net *net = sock_net(asoc->base.sk);
-
- /* Send the next asconf chunk from the addip chunk
- * queue.
- */
- if (!list_empty(&asoc->addip_chunk_list)) {
- struct list_head *entry = asoc->addip_chunk_list.next;
- struct sctp_chunk *asconf = list_entry(entry,
- struct sctp_chunk, list);
- list_del_init(entry);
-
- /* Hold the chunk until an ASCONF_ACK is received. */
- sctp_chunk_hold(asconf);
- if (sctp_primitive_ASCONF(net, asoc, asconf))
- sctp_chunk_free(asconf);
- else
- asoc->addip_last_asconf = asconf;
- }
-}
-
-
/* These three macros allow us to pull the debugging code out of the
* main flow of sctp_do_sm() to keep attention focused on the real
* functionality there.
@@ -1783,9 +1762,6 @@ static int sctp_cmd_interpreter(enum sctp_event event_type,
}
sctp_cmd_send_msg(asoc, cmd->obj.msg, gfp);
break;
- case SCTP_CMD_SEND_NEXT_ASCONF:
- sctp_cmd_send_asconf(asoc);
- break;
case SCTP_CMD_PURGE_ASCONF_QUEUE:
sctp_asconf_queue_teardown(asoc);
break;
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index c9ae3404b1bb..713a669d2058 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -3824,6 +3824,29 @@ enum sctp_disposition sctp_sf_do_asconf(struct net *net,
return SCTP_DISPOSITION_CONSUME;
}
+static enum sctp_disposition sctp_send_next_asconf(
+ struct net *net,
+ const struct sctp_endpoint *ep,
+ struct sctp_association *asoc,
+ const union sctp_subtype type,
+ struct sctp_cmd_seq *commands)
+{
+ struct sctp_chunk *asconf;
+ struct list_head *entry;
+
+ if (list_empty(&asoc->addip_chunk_list))
+ return SCTP_DISPOSITION_CONSUME;
+
+ entry = asoc->addip_chunk_list.next;
+ asconf = list_entry(entry, struct sctp_chunk, list);
+
+ list_del_init(entry);
+ sctp_chunk_hold(asconf);
+ asoc->addip_last_asconf = asconf;
+
+ return sctp_sf_do_prm_asconf(net, ep, asoc, type, asconf, commands);
+}
+
/*
* ADDIP Section 4.3 General rules for address manipulation
* When building TLV parameters for the ASCONF Chunk that will add or
@@ -3915,14 +3938,10 @@ enum sctp_disposition sctp_sf_do_asconf_ack(struct net *net,
SCTP_TO(SCTP_EVENT_TIMEOUT_T4_RTO));
if (!sctp_process_asconf_ack((struct sctp_association *)asoc,
- asconf_ack)) {
- /* Successfully processed ASCONF_ACK. We can
- * release the next asconf if we have one.
- */
- sctp_add_cmd_sf(commands, SCTP_CMD_SEND_NEXT_ASCONF,
- SCTP_NULL());
- return SCTP_DISPOSITION_CONSUME;
- }
+ asconf_ack))
+ return sctp_send_next_asconf(net, ep,
+ (struct sctp_association *)asoc,
+ type, commands);
abort = sctp_make_abort(asoc, asconf_ack,
sizeof(struct sctp_errhdr));
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 876393cf5ed6..8c00a7ef1bcd 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1017,7 +1017,7 @@ static int sctp_setsockopt_bindx(struct sock *sk,
if (unlikely(addrs_size <= 0))
return -EINVAL;
- kaddrs = vmemdup_user(addrs, addrs_size);
+ kaddrs = memdup_user(addrs, addrs_size);
if (unlikely(IS_ERR(kaddrs)))
return PTR_ERR(kaddrs);
@@ -1025,7 +1025,7 @@ static int sctp_setsockopt_bindx(struct sock *sk,
addr_buf = kaddrs;
while (walk_size < addrs_size) {
if (walk_size + sizeof(sa_family_t) > addrs_size) {
- kvfree(kaddrs);
+ kfree(kaddrs);
return -EINVAL;
}
@@ -1036,7 +1036,7 @@ static int sctp_setsockopt_bindx(struct sock *sk,
* causes the address buffer to overflow return EINVAL.
*/
if (!af || (walk_size + af->sockaddr_len) > addrs_size) {
- kvfree(kaddrs);
+ kfree(kaddrs);
return -EINVAL;
}
addrcnt++;
@@ -1072,7 +1072,7 @@ static int sctp_setsockopt_bindx(struct sock *sk,
}
out:
- kvfree(kaddrs);
+ kfree(kaddrs);
return err;
}
@@ -1347,7 +1347,7 @@ static int __sctp_setsockopt_connectx(struct sock *sk,
if (unlikely(addrs_size <= 0))
return -EINVAL;
- kaddrs = vmemdup_user(addrs, addrs_size);
+ kaddrs = memdup_user(addrs, addrs_size);
if (unlikely(IS_ERR(kaddrs)))
return PTR_ERR(kaddrs);
@@ -1367,7 +1367,7 @@ static int __sctp_setsockopt_connectx(struct sock *sk,
err = __sctp_connect(sk, kaddrs, addrs_size, flags, assoc_id);
out_free:
- kvfree(kaddrs);
+ kfree(kaddrs);
return err;
}
@@ -1884,6 +1884,7 @@ static int sctp_sendmsg_check_sflags(struct sctp_association *asoc,
pr_debug("%s: aborting association:%p\n", __func__, asoc);
sctp_primitive_ABORT(net, asoc, chunk);
+ iov_iter_revert(&msg->msg_iter, msg_len);
return 0;
}
@@ -2045,7 +2046,7 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
struct sctp_endpoint *ep = sctp_sk(sk)->ep;
struct sctp_transport *transport = NULL;
struct sctp_sndrcvinfo _sinfo, *sinfo;
- struct sctp_association *asoc;
+ struct sctp_association *asoc, *tmp;
struct sctp_cmsgs cmsgs;
union sctp_addr *daddr;
bool new = false;
@@ -2071,7 +2072,7 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
/* SCTP_SENDALL process */
if ((sflags & SCTP_SENDALL) && sctp_style(sk, UDP)) {
- list_for_each_entry(asoc, &ep->asocs, asocs) {
+ list_for_each_entry_safe(asoc, tmp, &ep->asocs, asocs) {
err = sctp_sendmsg_check_sflags(asoc, sflags, msg,
msg_len);
if (err == 0)
diff --git a/net/sctp/stream.c b/net/sctp/stream.c
index 3892e7630f3a..3b47457862cc 100644
--- a/net/sctp/stream.c
+++ b/net/sctp/stream.c
@@ -84,6 +84,19 @@ static void fa_zero(struct flex_array *fa, size_t index, size_t count)
}
}
+static size_t fa_index(struct flex_array *fa, void *elem, size_t count)
+{
+ size_t index = 0;
+
+ while (count--) {
+ if (elem == flex_array_get(fa, index))
+ break;
+ index++;
+ }
+
+ return index;
+}
+
/* Migrates chunks from stream queues to new stream queues if needed,
* but not across associations. Also, removes those chunks to streams
* higher than the new max.
@@ -131,8 +144,10 @@ static void sctp_stream_outq_migrate(struct sctp_stream *stream,
}
}
- for (i = outcnt; i < stream->outcnt; i++)
+ for (i = outcnt; i < stream->outcnt; i++) {
kfree(SCTP_SO(stream, i)->ext);
+ SCTP_SO(stream, i)->ext = NULL;
+ }
}
static int sctp_stream_alloc_out(struct sctp_stream *stream, __u16 outcnt,
@@ -147,6 +162,13 @@ static int sctp_stream_alloc_out(struct sctp_stream *stream, __u16 outcnt,
if (stream->out) {
fa_copy(out, stream->out, 0, min(outcnt, stream->outcnt));
+ if (stream->out_curr) {
+ size_t index = fa_index(stream->out, stream->out_curr,
+ stream->outcnt);
+
+ BUG_ON(index == stream->outcnt);
+ stream->out_curr = flex_array_get(out, index);
+ }
fa_free(stream->out);
}
@@ -208,8 +230,6 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt,
for (i = 0; i < stream->outcnt; i++)
SCTP_SO(stream, i)->state = SCTP_STREAM_OPEN;
- sched->init(stream);
-
in:
sctp_stream_interleave_init(stream);
if (!incnt)
@@ -585,9 +605,9 @@ struct sctp_chunk *sctp_process_strreset_outreq(
struct sctp_strreset_outreq *outreq = param.v;
struct sctp_stream *stream = &asoc->stream;
__u32 result = SCTP_STRRESET_DENIED;
- __u16 i, nums, flags = 0;
__be16 *str_p = NULL;
__u32 request_seq;
+ __u16 i, nums;
request_seq = ntohl(outreq->request_seq);
@@ -615,6 +635,15 @@ struct sctp_chunk *sctp_process_strreset_outreq(
if (!(asoc->strreset_enable & SCTP_ENABLE_RESET_STREAM_REQ))
goto out;
+ nums = (ntohs(param.p->length) - sizeof(*outreq)) / sizeof(__u16);
+ str_p = outreq->list_of_streams;
+ for (i = 0; i < nums; i++) {
+ if (ntohs(str_p[i]) >= stream->incnt) {
+ result = SCTP_STRRESET_ERR_WRONG_SSN;
+ goto out;
+ }
+ }
+
if (asoc->strreset_chunk) {
if (!sctp_chunk_lookup_strreset_param(
asoc, outreq->response_seq,
@@ -637,32 +666,19 @@ struct sctp_chunk *sctp_process_strreset_outreq(
sctp_chunk_put(asoc->strreset_chunk);
asoc->strreset_chunk = NULL;
}
-
- flags = SCTP_STREAM_RESET_INCOMING_SSN;
}
- nums = (ntohs(param.p->length) - sizeof(*outreq)) / sizeof(__u16);
- if (nums) {
- str_p = outreq->list_of_streams;
- for (i = 0; i < nums; i++) {
- if (ntohs(str_p[i]) >= stream->incnt) {
- result = SCTP_STRRESET_ERR_WRONG_SSN;
- goto out;
- }
- }
-
+ if (nums)
for (i = 0; i < nums; i++)
SCTP_SI(stream, ntohs(str_p[i]))->mid = 0;
- } else {
+ else
for (i = 0; i < stream->incnt; i++)
SCTP_SI(stream, i)->mid = 0;
- }
result = SCTP_STRRESET_PERFORMED;
*evp = sctp_ulpevent_make_stream_reset_event(asoc,
- flags | SCTP_STREAM_RESET_OUTGOING_SSN, nums, str_p,
- GFP_ATOMIC);
+ SCTP_STREAM_RESET_INCOMING_SSN, nums, str_p, GFP_ATOMIC);
out:
sctp_update_strreset_result(asoc, result);
@@ -738,9 +754,6 @@ struct sctp_chunk *sctp_process_strreset_inreq(
result = SCTP_STRRESET_PERFORMED;
- *evp = sctp_ulpevent_make_stream_reset_event(asoc,
- SCTP_STREAM_RESET_INCOMING_SSN, nums, str_p, GFP_ATOMIC);
-
out:
sctp_update_strreset_result(asoc, result);
err:
@@ -873,6 +886,14 @@ struct sctp_chunk *sctp_process_strreset_addstrm_out(
if (!(asoc->strreset_enable & SCTP_ENABLE_CHANGE_ASSOC_REQ))
goto out;
+ in = ntohs(addstrm->number_of_streams);
+ incnt = stream->incnt + in;
+ if (!in || incnt > SCTP_MAX_STREAM)
+ goto out;
+
+ if (sctp_stream_alloc_in(stream, incnt, GFP_ATOMIC))
+ goto out;
+
if (asoc->strreset_chunk) {
if (!sctp_chunk_lookup_strreset_param(
asoc, 0, SCTP_PARAM_RESET_ADD_IN_STREAMS)) {
@@ -896,14 +917,6 @@ struct sctp_chunk *sctp_process_strreset_addstrm_out(
}
}
- in = ntohs(addstrm->number_of_streams);
- incnt = stream->incnt + in;
- if (!in || incnt > SCTP_MAX_STREAM)
- goto out;
-
- if (sctp_stream_alloc_in(stream, incnt, GFP_ATOMIC))
- goto out;
-
stream->incnt = incnt;
result = SCTP_STRRESET_PERFORMED;
@@ -973,9 +986,6 @@ struct sctp_chunk *sctp_process_strreset_addstrm_in(
result = SCTP_STRRESET_PERFORMED;
- *evp = sctp_ulpevent_make_stream_change_event(asoc,
- 0, 0, ntohs(addstrm->number_of_streams), GFP_ATOMIC);
-
out:
sctp_update_strreset_result(asoc, result);
err:
@@ -1036,10 +1046,10 @@ struct sctp_chunk *sctp_process_strreset_resp(
sout->mid_uo = 0;
}
}
-
- flags = SCTP_STREAM_RESET_OUTGOING_SSN;
}
+ flags |= SCTP_STREAM_RESET_OUTGOING_SSN;
+
for (i = 0; i < stream->outcnt; i++)
SCTP_SO(stream, i)->state = SCTP_STREAM_OPEN;
@@ -1058,6 +1068,8 @@ struct sctp_chunk *sctp_process_strreset_resp(
nums = (ntohs(inreq->param_hdr.length) - sizeof(*inreq)) /
sizeof(__u16);
+ flags |= SCTP_STREAM_RESET_INCOMING_SSN;
+
*evp = sctp_ulpevent_make_stream_reset_event(asoc, flags,
nums, str_p, GFP_ATOMIC);
} else if (req->type == SCTP_PARAM_RESET_TSN_REQUEST) {
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 80e2119f1c70..e6e506b2db99 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -144,9 +144,18 @@ static int smc_release(struct socket *sock)
sock_set_flag(sk, SOCK_DEAD);
sk->sk_shutdown |= SHUTDOWN_MASK;
}
+
+ sk->sk_prot->unhash(sk);
+
if (smc->clcsock) {
+ if (smc->use_fallback && sk->sk_state == SMC_LISTEN) {
+ /* wake up clcsock accept */
+ rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR);
+ }
+ mutex_lock(&smc->clcsock_release_lock);
sock_release(smc->clcsock);
smc->clcsock = NULL;
+ mutex_unlock(&smc->clcsock_release_lock);
}
if (smc->use_fallback) {
if (sk->sk_state != SMC_LISTEN && sk->sk_state != SMC_INIT)
@@ -162,7 +171,6 @@ static int smc_release(struct socket *sock)
smc_conn_free(&smc->conn);
release_sock(sk);
- sk->sk_prot->unhash(sk);
sock_put(sk); /* final sock_put */
out:
return rc;
@@ -203,6 +211,7 @@ static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
spin_lock_init(&smc->conn.send_lock);
sk->sk_prot->hash(sk);
sk_refcnt_debug_inc(sk);
+ mutex_init(&smc->clcsock_release_lock);
return sk;
}
@@ -818,7 +827,7 @@ static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc)
struct socket *new_clcsock = NULL;
struct sock *lsk = &lsmc->sk;
struct sock *new_sk;
- int rc;
+ int rc = -EINVAL;
release_sock(lsk);
new_sk = smc_sock_alloc(sock_net(lsk), NULL, lsk->sk_protocol);
@@ -831,7 +840,10 @@ static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc)
}
*new_smc = smc_sk(new_sk);
- rc = kernel_accept(lsmc->clcsock, &new_clcsock, 0);
+ mutex_lock(&lsmc->clcsock_release_lock);
+ if (lsmc->clcsock)
+ rc = kernel_accept(lsmc->clcsock, &new_clcsock, 0);
+ mutex_unlock(&lsmc->clcsock_release_lock);
lock_sock(lsk);
if (rc < 0)
lsk->sk_err = -rc;
diff --git a/net/smc/smc.h b/net/smc/smc.h
index 08786ace6010..adbdf195eb08 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -113,9 +113,9 @@ struct smc_host_cdc_msg { /* Connection Data Control message */
} __aligned(8);
enum smc_urg_state {
- SMC_URG_VALID, /* data present */
- SMC_URG_NOTYET, /* data pending */
- SMC_URG_READ /* data was already read */
+ SMC_URG_VALID = 1, /* data present */
+ SMC_URG_NOTYET = 2, /* data pending */
+ SMC_URG_READ = 3, /* data was already read */
};
struct smc_connection {
@@ -219,6 +219,10 @@ struct smc_sock { /* smc sock container */
* started, waiting for unsent
* data to be sent
*/
+ struct mutex clcsock_release_lock;
+ /* protects clcsock of a listen
+ * socket
+ * */
};
static inline struct smc_sock *smc_sk(const struct sock *sk)
diff --git a/net/socket.c b/net/socket.c
index 390a8ecef4bf..18d27b8c2511 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -577,6 +577,7 @@ static void __sock_release(struct socket *sock, struct inode *inode)
if (inode)
inode_lock(inode);
sock->ops->release(sock);
+ sock->sk = NULL;
if (inode)
inode_unlock(inode);
sock->ops = NULL;
@@ -941,8 +942,7 @@ void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
EXPORT_SYMBOL(dlci_ioctl_set);
static long sock_do_ioctl(struct net *net, struct socket *sock,
- unsigned int cmd, unsigned long arg,
- unsigned int ifreq_size)
+ unsigned int cmd, unsigned long arg)
{
int err;
void __user *argp = (void __user *)arg;
@@ -968,11 +968,11 @@ static long sock_do_ioctl(struct net *net, struct socket *sock,
} else {
struct ifreq ifr;
bool need_copyout;
- if (copy_from_user(&ifr, argp, ifreq_size))
+ if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))
return -EFAULT;
err = dev_ioctl(net, cmd, &ifr, &need_copyout);
if (!err && need_copyout)
- if (copy_to_user(argp, &ifr, ifreq_size))
+ if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))
return -EFAULT;
}
return err;
@@ -1071,8 +1071,7 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
err = open_related_ns(&net->ns, get_net_ns);
break;
default:
- err = sock_do_ioctl(net, sock, cmd, arg,
- sizeof(struct ifreq));
+ err = sock_do_ioctl(net, sock, cmd, arg);
break;
}
return err;
@@ -2752,8 +2751,7 @@ static int do_siocgstamp(struct net *net, struct socket *sock,
int err;
set_fs(KERNEL_DS);
- err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv,
- sizeof(struct compat_ifreq));
+ err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv);
set_fs(old_fs);
if (!err)
err = compat_put_timeval(&ktv, up);
@@ -2769,8 +2767,7 @@ static int do_siocgstampns(struct net *net, struct socket *sock,
int err;
set_fs(KERNEL_DS);
- err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts,
- sizeof(struct compat_ifreq));
+ err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts);
set_fs(old_fs);
if (!err)
err = compat_put_timespec(&kts, up);
@@ -2966,6 +2963,54 @@ static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
return dev_ioctl(net, cmd, &ifreq, NULL);
}
+static int compat_ifreq_ioctl(struct net *net, struct socket *sock,
+ unsigned int cmd,
+ struct compat_ifreq __user *uifr32)
+{
+ struct ifreq __user *uifr;
+ int err;
+
+ /* Handle the fact that while struct ifreq has the same *layout* on
+ * 32/64 for everything but ifreq::ifru_ifmap and ifreq::ifru_data,
+ * which are handled elsewhere, it still has different *size* due to
+ * ifreq::ifru_ifmap (which is 16 bytes on 32 bit, 24 bytes on 64-bit,
+ * resulting in struct ifreq being 32 and 40 bytes respectively).
+ * As a result, if the struct happens to be at the end of a page and
+ * the next page isn't readable/writable, we get a fault. To prevent
+ * that, copy back and forth to the full size.
+ */
+
+ uifr = compat_alloc_user_space(sizeof(*uifr));
+ if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
+ return -EFAULT;
+
+ err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
+
+ if (!err) {
+ switch (cmd) {
+ case SIOCGIFFLAGS:
+ case SIOCGIFMETRIC:
+ case SIOCGIFMTU:
+ case SIOCGIFMEM:
+ case SIOCGIFHWADDR:
+ case SIOCGIFINDEX:
+ case SIOCGIFADDR:
+ case SIOCGIFBRDADDR:
+ case SIOCGIFDSTADDR:
+ case SIOCGIFNETMASK:
+ case SIOCGIFPFLAGS:
+ case SIOCGIFTXQLEN:
+ case SIOCGMIIPHY:
+ case SIOCGMIIREG:
+ case SIOCGIFNAME:
+ if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
+ err = -EFAULT;
+ break;
+ }
+ }
+ return err;
+}
+
static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
struct compat_ifreq __user *uifr32)
{
@@ -3081,8 +3126,7 @@ static int routing_ioctl(struct net *net, struct socket *sock,
}
set_fs(KERNEL_DS);
- ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r,
- sizeof(struct compat_ifreq));
+ ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
set_fs(old_fs);
out:
@@ -3182,21 +3226,22 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
case SIOCSIFTXQLEN:
case SIOCBRADDIF:
case SIOCBRDELIF:
+ case SIOCGIFNAME:
case SIOCSIFNAME:
case SIOCGMIIPHY:
case SIOCGMIIREG:
case SIOCSMIIREG:
- case SIOCSARP:
- case SIOCGARP:
- case SIOCDARP:
- case SIOCATMARK:
case SIOCBONDENSLAVE:
case SIOCBONDRELEASE:
case SIOCBONDSETHWADDR:
case SIOCBONDCHANGEACTIVE:
- case SIOCGIFNAME:
- return sock_do_ioctl(net, sock, cmd, arg,
- sizeof(struct compat_ifreq));
+ return compat_ifreq_ioctl(net, sock, cmd, argp);
+
+ case SIOCSARP:
+ case SIOCGARP:
+ case SIOCDARP:
+ case SIOCATMARK:
+ return sock_do_ioctl(net, sock, cmd, arg);
}
return -ENOIOCTLCMD;
diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c
index da1a676860ca..0f4e42792878 100644
--- a/net/strparser/strparser.c
+++ b/net/strparser/strparser.c
@@ -140,13 +140,11 @@ static int __strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
/* We are going to append to the frags_list of head.
* Need to unshare the frag_list.
*/
- if (skb_has_frag_list(head)) {
- err = skb_unclone(head, GFP_ATOMIC);
- if (err) {
- STRP_STATS_INCR(strp->stats.mem_fail);
- desc->error = err;
- return 0;
- }
+ err = skb_unclone(head, GFP_ATOMIC);
+ if (err) {
+ STRP_STATS_INCR(strp->stats.mem_fail);
+ desc->error = err;
+ return 0;
}
if (unlikely(skb_shinfo(head)->frag_list)) {
diff --git a/net/sunrpc/auth_gss/gss_krb5_seqnum.c b/net/sunrpc/auth_gss/gss_krb5_seqnum.c
index c8b9082f4a9d..2d2ed6772fe4 100644
--- a/net/sunrpc/auth_gss/gss_krb5_seqnum.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seqnum.c
@@ -44,7 +44,7 @@ krb5_make_rc4_seq_num(struct krb5_ctx *kctx, int direction, s32 seqnum,
unsigned char *cksum, unsigned char *buf)
{
struct crypto_skcipher *cipher;
- unsigned char plain[8];
+ unsigned char *plain;
s32 code;
dprintk("RPC: %s:\n", __func__);
@@ -53,6 +53,10 @@ krb5_make_rc4_seq_num(struct krb5_ctx *kctx, int direction, s32 seqnum,
if (IS_ERR(cipher))
return PTR_ERR(cipher);
+ plain = kmalloc(8, GFP_NOFS);
+ if (!plain)
+ return -ENOMEM;
+
plain[0] = (unsigned char) ((seqnum >> 24) & 0xff);
plain[1] = (unsigned char) ((seqnum >> 16) & 0xff);
plain[2] = (unsigned char) ((seqnum >> 8) & 0xff);
@@ -69,6 +73,7 @@ krb5_make_rc4_seq_num(struct krb5_ctx *kctx, int direction, s32 seqnum,
code = krb5_encrypt(cipher, cksum, plain, buf, 8);
out:
crypto_free_skcipher(cipher);
+ kfree(plain);
return code;
}
s32
@@ -78,12 +83,17 @@ krb5_make_seq_num(struct krb5_ctx *kctx,
u32 seqnum,
unsigned char *cksum, unsigned char *buf)
{
- unsigned char plain[8];
+ unsigned char *plain;
+ s32 code;
if (kctx->enctype == ENCTYPE_ARCFOUR_HMAC)
return krb5_make_rc4_seq_num(kctx, direction, seqnum,
cksum, buf);
+ plain = kmalloc(8, GFP_NOFS);
+ if (!plain)
+ return -ENOMEM;
+
plain[0] = (unsigned char) (seqnum & 0xff);
plain[1] = (unsigned char) ((seqnum >> 8) & 0xff);
plain[2] = (unsigned char) ((seqnum >> 16) & 0xff);
@@ -94,7 +104,9 @@ krb5_make_seq_num(struct krb5_ctx *kctx,
plain[6] = direction;
plain[7] = direction;
- return krb5_encrypt(key, cksum, plain, buf, 8);
+ code = krb5_encrypt(key, cksum, plain, buf, 8);
+ kfree(plain);
+ return code;
}
static s32
@@ -102,7 +114,7 @@ krb5_get_rc4_seq_num(struct krb5_ctx *kctx, unsigned char *cksum,
unsigned char *buf, int *direction, s32 *seqnum)
{
struct crypto_skcipher *cipher;
- unsigned char plain[8];
+ unsigned char *plain;
s32 code;
dprintk("RPC: %s:\n", __func__);
@@ -115,20 +127,28 @@ krb5_get_rc4_seq_num(struct krb5_ctx *kctx, unsigned char *cksum,
if (code)
goto out;
+ plain = kmalloc(8, GFP_NOFS);
+ if (!plain) {
+ code = -ENOMEM;
+ goto out;
+ }
+
code = krb5_decrypt(cipher, cksum, buf, plain, 8);
if (code)
- goto out;
+ goto out_plain;
if ((plain[4] != plain[5]) || (plain[4] != plain[6])
|| (plain[4] != plain[7])) {
code = (s32)KG_BAD_SEQ;
- goto out;
+ goto out_plain;
}
*direction = plain[4];
*seqnum = ((plain[0] << 24) | (plain[1] << 16) |
(plain[2] << 8) | (plain[3]));
+out_plain:
+ kfree(plain);
out:
crypto_free_skcipher(cipher);
return code;
@@ -141,26 +161,33 @@ krb5_get_seq_num(struct krb5_ctx *kctx,
int *direction, u32 *seqnum)
{
s32 code;
- unsigned char plain[8];
struct crypto_skcipher *key = kctx->seq;
+ unsigned char *plain;
dprintk("RPC: krb5_get_seq_num:\n");
if (kctx->enctype == ENCTYPE_ARCFOUR_HMAC)
return krb5_get_rc4_seq_num(kctx, cksum, buf,
direction, seqnum);
+ plain = kmalloc(8, GFP_NOFS);
+ if (!plain)
+ return -ENOMEM;
if ((code = krb5_decrypt(key, cksum, buf, plain, 8)))
- return code;
+ goto out;
if ((plain[4] != plain[5]) || (plain[4] != plain[6]) ||
- (plain[4] != plain[7]))
- return (s32)KG_BAD_SEQ;
+ (plain[4] != plain[7])) {
+ code = (s32)KG_BAD_SEQ;
+ goto out;
+ }
*direction = plain[4];
*seqnum = ((plain[0]) |
(plain[1] << 8) | (plain[2] << 16) | (plain[3] << 24));
- return 0;
+out:
+ kfree(plain);
+ return code;
}
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 860f2a1bbb67..1a65f88d021a 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -1122,7 +1122,7 @@ static int svcauth_gss_legacy_init(struct svc_rqst *rqstp,
struct kvec *resv = &rqstp->rq_res.head[0];
struct rsi *rsip, rsikey;
int ret;
- struct sunrpc_net *sn = net_generic(rqstp->rq_xprt->xpt_net, sunrpc_net_id);
+ struct sunrpc_net *sn = net_generic(SVC_NET(rqstp), sunrpc_net_id);
memset(&rsikey, 0, sizeof(rsikey));
ret = gss_read_verf(gc, argv, authp,
@@ -1233,7 +1233,7 @@ static int svcauth_gss_proxy_init(struct svc_rqst *rqstp,
uint64_t handle;
int status;
int ret;
- struct net *net = rqstp->rq_xprt->xpt_net;
+ struct net *net = SVC_NET(rqstp);
struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
memset(&ud, 0, sizeof(ud));
@@ -1424,7 +1424,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
__be32 *rpcstart;
__be32 *reject_stat = resv->iov_base + resv->iov_len;
int ret;
- struct sunrpc_net *sn = net_generic(rqstp->rq_xprt->xpt_net, sunrpc_net_id);
+ struct sunrpc_net *sn = net_generic(SVC_NET(rqstp), sunrpc_net_id);
dprintk("RPC: svcauth_gss: argv->iov_len = %zd\n",
argv->iov_len);
@@ -1714,7 +1714,7 @@ svcauth_gss_release(struct svc_rqst *rqstp)
struct rpc_gss_wire_cred *gc = &gsd->clcred;
struct xdr_buf *resbuf = &rqstp->rq_res;
int stat = -EINVAL;
- struct sunrpc_net *sn = net_generic(rqstp->rq_xprt->xpt_net, sunrpc_net_id);
+ struct sunrpc_net *sn = net_generic(SVC_NET(rqstp), sunrpc_net_id);
if (gc->gc_proc != RPC_GSS_PROC_DATA)
goto out;
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 109fbe591e7b..214440c5b14e 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -54,6 +54,12 @@ static void cache_init(struct cache_head *h, struct cache_detail *detail)
h->last_refresh = now;
}
+static inline int cache_is_valid(struct cache_head *h);
+static void cache_fresh_locked(struct cache_head *head, time_t expiry,
+ struct cache_detail *detail);
+static void cache_fresh_unlocked(struct cache_head *head,
+ struct cache_detail *detail);
+
struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail,
struct cache_head *key, int hash)
{
@@ -95,6 +101,9 @@ struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail,
if (cache_is_expired(detail, tmp)) {
hlist_del_init(&tmp->cache_list);
detail->entries --;
+ if (cache_is_valid(tmp) == -EAGAIN)
+ set_bit(CACHE_NEGATIVE, &tmp->flags);
+ cache_fresh_locked(tmp, 0, detail);
freeme = tmp;
break;
}
@@ -110,8 +119,10 @@ struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail,
cache_get(new);
write_unlock(&detail->hash_lock);
- if (freeme)
+ if (freeme) {
+ cache_fresh_unlocked(freeme, detail);
cache_put(freeme, detail);
+ }
return new;
}
EXPORT_SYMBOL_GPL(sunrpc_cache_lookup);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 8ea2f5fadd96..1fc812ba9871 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1992,13 +1992,15 @@ call_transmit(struct rpc_task *task)
static void
call_transmit_status(struct rpc_task *task)
{
+ struct rpc_xprt *xprt = task->tk_rqstp->rq_xprt;
task->tk_action = call_status;
/*
* Common case: success. Force the compiler to put this
- * test first.
+ * test first. Or, if any error and xprt_close_wait,
+ * release the xprt lock so the socket can close.
*/
- if (task->tk_status == 0) {
+ if (task->tk_status == 0 || xprt_close_wait(xprt)) {
xprt_end_transmit(task);
rpc_task_force_reencode(task);
return;
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index c7872bc13860..08b5fa4a2852 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -771,6 +771,12 @@ void rpcb_getport_async(struct rpc_task *task)
case RPCBVERS_3:
map->r_netid = xprt->address_strings[RPC_DISPLAY_NETID];
map->r_addr = rpc_sockaddr2uaddr(sap, GFP_ATOMIC);
+ if (!map->r_addr) {
+ status = -ENOMEM;
+ dprintk("RPC: %5u %s: no memory available\n",
+ task->tk_pid, __func__);
+ goto bailout_free_args;
+ }
map->r_owner = "";
break;
case RPCBVERS_2:
@@ -793,6 +799,8 @@ void rpcb_getport_async(struct rpc_task *task)
rpc_put_task(child);
return;
+bailout_free_args:
+ kfree(map);
bailout_release_client:
rpc_release_client(rpcb_clnt);
bailout_nofree:
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index d13e05f1a990..d65f8d35de87 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1144,6 +1144,8 @@ void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...)
static __printf(2,3) void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...) {}
#endif
+extern void svc_tcp_prep_reply_hdr(struct svc_rqst *);
+
/*
* Common routine for processing the RPC request.
*/
@@ -1172,7 +1174,8 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
clear_bit(RQ_DROPME, &rqstp->rq_flags);
/* Setup reply header */
- rqstp->rq_xprt->xpt_ops->xpo_prep_reply_hdr(rqstp);
+ if (rqstp->rq_prot == IPPROTO_TCP)
+ svc_tcp_prep_reply_hdr(rqstp);
svc_putu32(resv, rqstp->rq_xid);
@@ -1244,7 +1247,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
* for lower versions. RPC_PROG_MISMATCH seems to be the closest
* fit.
*/
- if (versp->vs_need_cong_ctrl &&
+ if (versp->vs_need_cong_ctrl && rqstp->rq_xprt &&
!test_bit(XPT_CONG_CTRL, &rqstp->rq_xprt->xpt_flags))
goto err_bad_vers;
@@ -1336,7 +1339,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
return 0;
close:
- if (test_bit(XPT_TEMP, &rqstp->rq_xprt->xpt_flags))
+ if (rqstp->rq_xprt && test_bit(XPT_TEMP, &rqstp->rq_xprt->xpt_flags))
svc_close_xprt(rqstp->rq_xprt);
dprintk("svc: svc_process close\n");
return 0;
@@ -1459,10 +1462,10 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req,
dprintk("svc: %s(%p)\n", __func__, req);
/* Build the svc_rqst used by the common processing routine */
- rqstp->rq_xprt = serv->sv_bc_xprt;
rqstp->rq_xid = req->rq_xid;
rqstp->rq_prot = req->rq_xprt->prot;
rqstp->rq_server = serv;
+ rqstp->rq_bc_net = req->rq_xprt->xprt_net;
rqstp->rq_addrlen = sizeof(req->rq_xprt->addr);
memcpy(&rqstp->rq_addr, &req->rq_xprt->addr, rqstp->rq_addrlen);
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index b7b9cf3b375a..48c0a0b90946 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -469,10 +469,11 @@ out:
*/
void svc_reserve(struct svc_rqst *rqstp, int space)
{
+ struct svc_xprt *xprt = rqstp->rq_xprt;
+
space += rqstp->rq_res.head[0].iov_len;
- if (space < rqstp->rq_reserved) {
- struct svc_xprt *xprt = rqstp->rq_xprt;
+ if (xprt && space < rqstp->rq_reserved) {
atomic_sub((rqstp->rq_reserved - space), &xprt->xpt_reserved);
rqstp->rq_reserved = space;
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 5445145e639c..8566531c2f10 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -381,12 +381,16 @@ static int svc_partial_recvfrom(struct svc_rqst *rqstp,
/*
* Set socket snd and rcv buffer lengths
*/
-static void svc_sock_setbufsize(struct socket *sock, unsigned int snd,
- unsigned int rcv)
+static void svc_sock_setbufsize(struct svc_sock *svsk, unsigned int nreqs)
{
+ unsigned int max_mesg = svsk->sk_xprt.xpt_server->sv_max_mesg;
+ struct socket *sock = svsk->sk_sock;
+
+ nreqs = min(nreqs, INT_MAX / 2 / max_mesg);
+
lock_sock(sock->sk);
- sock->sk->sk_sndbuf = snd * 2;
- sock->sk->sk_rcvbuf = rcv * 2;
+ sock->sk->sk_sndbuf = nreqs * max_mesg * 2;
+ sock->sk->sk_rcvbuf = nreqs * max_mesg * 2;
sock->sk->sk_write_space(sock->sk);
release_sock(sock->sk);
}
@@ -548,9 +552,7 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp)
* provides an upper bound on the number of threads
* which will access the socket.
*/
- svc_sock_setbufsize(svsk->sk_sock,
- (serv->sv_nrthreads+3) * serv->sv_max_mesg,
- (serv->sv_nrthreads+3) * serv->sv_max_mesg);
+ svc_sock_setbufsize(svsk, serv->sv_nrthreads + 3);
clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
skb = NULL;
@@ -574,7 +576,7 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp)
/* Don't enable netstamp, sunrpc doesn't
need that much accuracy */
}
- svsk->sk_sk->sk_stamp = skb->tstamp;
+ sock_write_timestamp(svsk->sk_sk, skb->tstamp);
set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); /* there may be more data... */
len = skb->len;
@@ -718,9 +720,7 @@ static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv)
* receive and respond to one request.
* svc_udp_recvfrom will re-adjust if necessary
*/
- svc_sock_setbufsize(svsk->sk_sock,
- 3 * svsk->sk_xprt.xpt_server->sv_max_mesg,
- 3 * svsk->sk_xprt.xpt_server->sv_max_mesg);
+ svc_sock_setbufsize(svsk, 3);
/* data might have come in before data_ready set up */
set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
@@ -1198,7 +1198,7 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp)
/*
* Setup response header. TCP has a 4B record length field.
*/
-static void svc_tcp_prep_reply_hdr(struct svc_rqst *rqstp)
+void svc_tcp_prep_reply_hdr(struct svc_rqst *rqstp)
{
struct kvec *resv = &rqstp->rq_res.head[0];
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index a8db2e3f8904..d066aae3cb6d 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -781,8 +781,15 @@ void xprt_connect(struct rpc_task *task)
return;
if (xprt_test_and_set_connecting(xprt))
return;
- xprt->stat.connect_start = jiffies;
- xprt->ops->connect(xprt, task);
+ /* Race breaker */
+ if (!xprt_connected(xprt)) {
+ xprt->stat.connect_start = jiffies;
+ xprt->ops->connect(xprt, task);
+ } else {
+ xprt_clear_connecting(xprt);
+ task->tk_status = 0;
+ rpc_wake_up_queued_task(&xprt->pending, task);
+ }
}
xprt_release_write(xprt, task);
}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 8602a5f1b515..e8ad7ddf347a 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -563,6 +563,99 @@ void svc_rdma_sync_reply_hdr(struct svcxprt_rdma *rdma,
DMA_TO_DEVICE);
}
+/* If the xdr_buf has more elements than the device can
+ * transmit in a single RDMA Send, then the reply will
+ * have to be copied into a bounce buffer.
+ */
+static bool svc_rdma_pull_up_needed(struct svcxprt_rdma *rdma,
+ struct xdr_buf *xdr,
+ __be32 *wr_lst)
+{
+ int elements;
+
+ /* xdr->head */
+ elements = 1;
+
+ /* xdr->pages */
+ if (!wr_lst) {
+ unsigned int remaining;
+ unsigned long pageoff;
+
+ pageoff = xdr->page_base & ~PAGE_MASK;
+ remaining = xdr->page_len;
+ while (remaining) {
+ ++elements;
+ remaining -= min_t(u32, PAGE_SIZE - pageoff,
+ remaining);
+ pageoff = 0;
+ }
+ }
+
+ /* xdr->tail */
+ if (xdr->tail[0].iov_len)
+ ++elements;
+
+ /* assume 1 SGE is needed for the transport header */
+ return elements >= rdma->sc_max_send_sges;
+}
+
+/* The device is not capable of sending the reply directly.
+ * Assemble the elements of @xdr into the transport header
+ * buffer.
+ */
+static int svc_rdma_pull_up_reply_msg(struct svcxprt_rdma *rdma,
+ struct svc_rdma_send_ctxt *ctxt,
+ struct xdr_buf *xdr, __be32 *wr_lst)
+{
+ unsigned char *dst, *tailbase;
+ unsigned int taillen;
+
+ dst = ctxt->sc_xprt_buf;
+ dst += ctxt->sc_sges[0].length;
+
+ memcpy(dst, xdr->head[0].iov_base, xdr->head[0].iov_len);
+ dst += xdr->head[0].iov_len;
+
+ tailbase = xdr->tail[0].iov_base;
+ taillen = xdr->tail[0].iov_len;
+ if (wr_lst) {
+ u32 xdrpad;
+
+ xdrpad = xdr_padsize(xdr->page_len);
+ if (taillen && xdrpad) {
+ tailbase += xdrpad;
+ taillen -= xdrpad;
+ }
+ } else {
+ unsigned int len, remaining;
+ unsigned long pageoff;
+ struct page **ppages;
+
+ ppages = xdr->pages + (xdr->page_base >> PAGE_SHIFT);
+ pageoff = xdr->page_base & ~PAGE_MASK;
+ remaining = xdr->page_len;
+ while (remaining) {
+ len = min_t(u32, PAGE_SIZE - pageoff, remaining);
+
+ memcpy(dst, page_address(*ppages), len);
+ remaining -= len;
+ dst += len;
+ pageoff = 0;
+ }
+ }
+
+ if (taillen)
+ memcpy(dst, tailbase, taillen);
+
+ ctxt->sc_sges[0].length += xdr->len;
+ ib_dma_sync_single_for_device(rdma->sc_pd->device,
+ ctxt->sc_sges[0].addr,
+ ctxt->sc_sges[0].length,
+ DMA_TO_DEVICE);
+
+ return 0;
+}
+
/* svc_rdma_map_reply_msg - Map the buffer holding RPC message
* @rdma: controlling transport
* @ctxt: send_ctxt for the Send WR
@@ -585,8 +678,10 @@ int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma,
u32 xdr_pad;
int ret;
- if (++ctxt->sc_cur_sge_no >= rdma->sc_max_send_sges)
- return -EIO;
+ if (svc_rdma_pull_up_needed(rdma, xdr, wr_lst))
+ return svc_rdma_pull_up_reply_msg(rdma, ctxt, xdr, wr_lst);
+
+ ++ctxt->sc_cur_sge_no;
ret = svc_rdma_dma_map_buf(rdma, ctxt,
xdr->head[0].iov_base,
xdr->head[0].iov_len);
@@ -617,8 +712,7 @@ int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma,
while (remaining) {
len = min_t(u32, PAGE_SIZE - page_off, remaining);
- if (++ctxt->sc_cur_sge_no >= rdma->sc_max_send_sges)
- return -EIO;
+ ++ctxt->sc_cur_sge_no;
ret = svc_rdma_dma_map_page(rdma, ctxt, *ppages++,
page_off, len);
if (ret < 0)
@@ -632,8 +726,7 @@ int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma,
len = xdr->tail[0].iov_len;
tail:
if (len) {
- if (++ctxt->sc_cur_sge_no >= rdma->sc_max_send_sges)
- return -EIO;
+ ++ctxt->sc_cur_sge_no;
ret = svc_rdma_dma_map_buf(rdma, ctxt, base, len);
if (ret < 0)
return ret;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 2848cafd4a17..ce5c610b49c7 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -475,13 +475,12 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
/* Qualify the transport resource defaults with the
* capabilities of this particular device */
- newxprt->sc_max_send_sges = dev->attrs.max_send_sge;
- /* transport hdr, head iovec, one page list entry, tail iovec */
- if (newxprt->sc_max_send_sges < 4) {
- pr_err("svcrdma: too few Send SGEs available (%d)\n",
- newxprt->sc_max_send_sges);
- goto errout;
- }
+ /* Transport header, head iovec, tail iovec */
+ newxprt->sc_max_send_sges = 3;
+ /* Add one SGE per page list entry */
+ newxprt->sc_max_send_sges += (svcrdma_max_req_size / PAGE_SIZE) + 1;
+ if (newxprt->sc_max_send_sges > dev->attrs.max_send_sge)
+ newxprt->sc_max_send_sges = dev->attrs.max_send_sge;
newxprt->sc_max_req_size = svcrdma_max_req_size;
newxprt->sc_max_requests = svcrdma_max_requests;
newxprt->sc_max_bc_requests = svcrdma_max_bc_requests;
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 956a5ea47b58..5ddbf227e7c6 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -546,7 +546,8 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
sendcq = ib_alloc_cq(ia->ri_device, NULL,
ep->rep_attr.cap.max_send_wr + 1,
- 1, IB_POLL_WORKQUEUE);
+ ia->ri_device->num_comp_vectors > 1 ? 1 : 0,
+ IB_POLL_WORKQUEUE);
if (IS_ERR(sendcq)) {
rc = PTR_ERR(sendcq);
dprintk("RPC: %s: failed to create send CQ: %i\n",
@@ -872,7 +873,7 @@ static int rpcrdma_sendctxs_create(struct rpcrdma_xprt *r_xprt)
for (i = 0; i <= buf->rb_sc_last; i++) {
sc = rpcrdma_sendctx_create(&r_xprt->rx_ia);
if (!sc)
- goto out_destroy;
+ return -ENOMEM;
sc->sc_xprt = r_xprt;
buf->rb_sc_ctxs[i] = sc;
@@ -880,10 +881,6 @@ static int rpcrdma_sendctxs_create(struct rpcrdma_xprt *r_xprt)
buf->rb_flags = 0;
return 0;
-
-out_destroy:
- rpcrdma_sendctxs_destroy(buf);
- return -ENOMEM;
}
/* The sendctx queue is not guaranteed to have a size that is a
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 6b7539c0466e..7d8cce1dfcad 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2244,8 +2244,8 @@ static void xs_udp_setup_socket(struct work_struct *work)
trace_rpc_socket_connect(xprt, sock, 0);
status = 0;
out:
- xprt_unlock_connect(xprt, transport);
xprt_clear_connecting(xprt);
+ xprt_unlock_connect(xprt, transport);
xprt_wake_pending_tasks(xprt, status);
}
@@ -2480,8 +2480,8 @@ static void xs_tcp_setup_socket(struct work_struct *work)
}
status = -EAGAIN;
out:
- xprt_unlock_connect(xprt, transport);
xprt_clear_connecting(xprt);
+ xprt_unlock_connect(xprt, transport);
xprt_wake_pending_tasks(xprt, status);
}
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 645c16052052..2649a0a0d45e 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -317,7 +317,6 @@ static int tipc_enable_bearer(struct net *net, const char *name,
res = tipc_disc_create(net, b, &b->bcast_addr, &skb);
if (res) {
bearer_disable(net, b);
- kfree(b);
errstr = "failed to create discoverer";
goto rejected;
}
diff --git a/net/tipc/core.c b/net/tipc/core.c
index 5b38f5164281..3ecca3b88bf8 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -75,9 +75,6 @@ static int __net_init tipc_init_net(struct net *net)
goto out_nametbl;
INIT_LIST_HEAD(&tn->dist_queue);
- err = tipc_topsrv_start(net);
- if (err)
- goto out_subscr;
err = tipc_bcast_init(net);
if (err)
@@ -86,8 +83,6 @@ static int __net_init tipc_init_net(struct net *net)
return 0;
out_bclink:
- tipc_bcast_stop(net);
-out_subscr:
tipc_nametbl_stop(net);
out_nametbl:
tipc_sk_rht_destroy(net);
@@ -97,7 +92,6 @@ out_sk_rht:
static void __net_exit tipc_exit_net(struct net *net)
{
- tipc_topsrv_stop(net);
tipc_net_stop(net);
tipc_bcast_stop(net);
tipc_nametbl_stop(net);
@@ -111,6 +105,11 @@ static struct pernet_operations tipc_net_ops = {
.size = sizeof(struct tipc_net),
};
+static struct pernet_operations tipc_topsrv_net_ops = {
+ .init = tipc_topsrv_init_net,
+ .exit = tipc_topsrv_exit_net,
+};
+
static int __init tipc_init(void)
{
int err;
@@ -129,10 +128,6 @@ static int __init tipc_init(void)
if (err)
goto out_netlink_compat;
- err = tipc_socket_init();
- if (err)
- goto out_socket;
-
err = tipc_register_sysctl();
if (err)
goto out_sysctl;
@@ -141,6 +136,14 @@ static int __init tipc_init(void)
if (err)
goto out_pernet;
+ err = tipc_socket_init();
+ if (err)
+ goto out_socket;
+
+ err = register_pernet_subsys(&tipc_topsrv_net_ops);
+ if (err)
+ goto out_pernet_topsrv;
+
err = tipc_bearer_setup();
if (err)
goto out_bearer;
@@ -148,12 +151,14 @@ static int __init tipc_init(void)
pr_info("Started in single node mode\n");
return 0;
out_bearer:
+ unregister_pernet_subsys(&tipc_topsrv_net_ops);
+out_pernet_topsrv:
+ tipc_socket_stop();
+out_socket:
unregister_pernet_subsys(&tipc_net_ops);
out_pernet:
tipc_unregister_sysctl();
out_sysctl:
- tipc_socket_stop();
-out_socket:
tipc_netlink_compat_stop();
out_netlink_compat:
tipc_netlink_stop();
@@ -165,10 +170,11 @@ out_netlink:
static void __exit tipc_exit(void)
{
tipc_bearer_cleanup();
+ unregister_pernet_subsys(&tipc_topsrv_net_ops);
+ tipc_socket_stop();
unregister_pernet_subsys(&tipc_net_ops);
tipc_netlink_stop();
tipc_netlink_compat_stop();
- tipc_socket_stop();
tipc_unregister_sysctl();
pr_info("Deactivated\n");
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 66d5b2c5987a..d72985ca1d55 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -908,7 +908,8 @@ static int tipc_nl_service_list(struct net *net, struct tipc_nl_msg *msg,
for (; i < TIPC_NAMETBL_SIZE; i++) {
head = &tn->nametbl->services[i];
- if (*last_type) {
+ if (*last_type ||
+ (!i && *last_key && (*last_lower == *last_key))) {
service = tipc_service_find(net, *last_type);
if (!service)
return -EPIPE;
diff --git a/net/tipc/net.c b/net/tipc/net.c
index f076edb74338..7ce1e86b024f 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -163,12 +163,9 @@ void tipc_sched_net_finalize(struct net *net, u32 addr)
void tipc_net_stop(struct net *net)
{
- u32 self = tipc_own_addr(net);
-
- if (!self)
+ if (!tipc_own_id(net))
return;
- tipc_nametbl_withdraw(net, TIPC_CFG_SRV, self, self, self);
rtnl_lock();
tipc_bearer_stop(net);
tipc_node_stop(net);
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index 6376467e78f8..588d5aa14c41 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -87,6 +87,11 @@ static int tipc_skb_tailroom(struct sk_buff *skb)
return limit;
}
+static inline int TLV_GET_DATA_LEN(struct tlv_desc *tlv)
+{
+ return TLV_GET_LEN(tlv) - TLV_SPACE(0);
+}
+
static int tipc_add_tlv(struct sk_buff *skb, u16 type, void *data, u16 len)
{
struct tlv_desc *tlv = (struct tlv_desc *)skb_tail_pointer(skb);
@@ -166,6 +171,11 @@ static struct sk_buff *tipc_get_err_tlv(char *str)
return buf;
}
+static inline bool string_is_valid(char *s, int len)
+{
+ return memchr(s, '\0', len) ? true : false;
+}
+
static int __tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd,
struct tipc_nl_compat_msg *msg,
struct sk_buff *arg)
@@ -257,8 +267,14 @@ static int tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd,
if (msg->rep_type)
tipc_tlv_init(msg->rep, msg->rep_type);
- if (cmd->header)
- (*cmd->header)(msg);
+ if (cmd->header) {
+ err = (*cmd->header)(msg);
+ if (err) {
+ kfree_skb(msg->rep);
+ msg->rep = NULL;
+ return err;
+ }
+ }
arg = nlmsg_new(0, GFP_KERNEL);
if (!arg) {
@@ -379,6 +395,7 @@ static int tipc_nl_compat_bearer_enable(struct tipc_nl_compat_cmd_doit *cmd,
struct nlattr *prop;
struct nlattr *bearer;
struct tipc_bearer_config *b;
+ int len;
b = (struct tipc_bearer_config *)TLV_DATA(msg->req);
@@ -386,6 +403,15 @@ static int tipc_nl_compat_bearer_enable(struct tipc_nl_compat_cmd_doit *cmd,
if (!bearer)
return -EMSGSIZE;
+ len = TLV_GET_DATA_LEN(msg->req);
+ len -= offsetof(struct tipc_bearer_config, name);
+ if (len <= 0)
+ return -EINVAL;
+
+ len = min_t(int, len, TIPC_MAX_BEARER_NAME);
+ if (!string_is_valid(b->name, len))
+ return -EINVAL;
+
if (nla_put_string(skb, TIPC_NLA_BEARER_NAME, b->name))
return -EMSGSIZE;
@@ -411,6 +437,7 @@ static int tipc_nl_compat_bearer_disable(struct tipc_nl_compat_cmd_doit *cmd,
{
char *name;
struct nlattr *bearer;
+ int len;
name = (char *)TLV_DATA(msg->req);
@@ -418,6 +445,10 @@ static int tipc_nl_compat_bearer_disable(struct tipc_nl_compat_cmd_doit *cmd,
if (!bearer)
return -EMSGSIZE;
+ len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_BEARER_NAME);
+ if (!string_is_valid(name, len))
+ return -EINVAL;
+
if (nla_put_string(skb, TIPC_NLA_BEARER_NAME, name))
return -EMSGSIZE;
@@ -478,6 +509,7 @@ static int tipc_nl_compat_link_stat_dump(struct tipc_nl_compat_msg *msg,
struct nlattr *prop[TIPC_NLA_PROP_MAX + 1];
struct nlattr *stats[TIPC_NLA_STATS_MAX + 1];
int err;
+ int len;
if (!attrs[TIPC_NLA_LINK])
return -EINVAL;
@@ -504,6 +536,11 @@ static int tipc_nl_compat_link_stat_dump(struct tipc_nl_compat_msg *msg,
return err;
name = (char *)TLV_DATA(msg->req);
+
+ len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_LINK_NAME);
+ if (!string_is_valid(name, len))
+ return -EINVAL;
+
if (strcmp(name, nla_data(link[TIPC_NLA_LINK_NAME])) != 0)
return 0;
@@ -644,6 +681,7 @@ static int tipc_nl_compat_media_set(struct sk_buff *skb,
struct nlattr *prop;
struct nlattr *media;
struct tipc_link_config *lc;
+ int len;
lc = (struct tipc_link_config *)TLV_DATA(msg->req);
@@ -651,6 +689,10 @@ static int tipc_nl_compat_media_set(struct sk_buff *skb,
if (!media)
return -EMSGSIZE;
+ len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_MEDIA_NAME);
+ if (!string_is_valid(lc->name, len))
+ return -EINVAL;
+
if (nla_put_string(skb, TIPC_NLA_MEDIA_NAME, lc->name))
return -EMSGSIZE;
@@ -671,6 +713,7 @@ static int tipc_nl_compat_bearer_set(struct sk_buff *skb,
struct nlattr *prop;
struct nlattr *bearer;
struct tipc_link_config *lc;
+ int len;
lc = (struct tipc_link_config *)TLV_DATA(msg->req);
@@ -678,6 +721,10 @@ static int tipc_nl_compat_bearer_set(struct sk_buff *skb,
if (!bearer)
return -EMSGSIZE;
+ len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_MEDIA_NAME);
+ if (!string_is_valid(lc->name, len))
+ return -EINVAL;
+
if (nla_put_string(skb, TIPC_NLA_BEARER_NAME, lc->name))
return -EMSGSIZE;
@@ -726,9 +773,19 @@ static int tipc_nl_compat_link_set(struct tipc_nl_compat_cmd_doit *cmd,
struct tipc_link_config *lc;
struct tipc_bearer *bearer;
struct tipc_media *media;
+ int len;
lc = (struct tipc_link_config *)TLV_DATA(msg->req);
+ len = TLV_GET_DATA_LEN(msg->req);
+ len -= offsetof(struct tipc_link_config, name);
+ if (len <= 0)
+ return -EINVAL;
+
+ len = min_t(int, len, TIPC_MAX_LINK_NAME);
+ if (!string_is_valid(lc->name, len))
+ return -EINVAL;
+
media = tipc_media_find(lc->name);
if (media) {
cmd->doit = &__tipc_nl_media_set;
@@ -750,6 +807,7 @@ static int tipc_nl_compat_link_reset_stats(struct tipc_nl_compat_cmd_doit *cmd,
{
char *name;
struct nlattr *link;
+ int len;
name = (char *)TLV_DATA(msg->req);
@@ -757,6 +815,10 @@ static int tipc_nl_compat_link_reset_stats(struct tipc_nl_compat_cmd_doit *cmd,
if (!link)
return -EMSGSIZE;
+ len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_LINK_NAME);
+ if (!string_is_valid(name, len))
+ return -EINVAL;
+
if (nla_put_string(skb, TIPC_NLA_LINK_NAME, name))
return -EMSGSIZE;
@@ -778,6 +840,8 @@ static int tipc_nl_compat_name_table_dump_header(struct tipc_nl_compat_msg *msg)
};
ntq = (struct tipc_name_table_query *)TLV_DATA(msg->req);
+ if (TLV_GET_DATA_LEN(msg->req) < sizeof(struct tipc_name_table_query))
+ return -EINVAL;
depth = ntohl(ntq->depth);
@@ -1201,7 +1265,7 @@ static int tipc_nl_compat_recv(struct sk_buff *skb, struct genl_info *info)
}
len = nlmsg_attrlen(req_nlh, GENL_HDRLEN + TIPC_GENL_HDRLEN);
- if (len && !TLV_OK(msg.req, len)) {
+ if (!len || !TLV_OK(msg.req, len)) {
msg.rep = tipc_get_err_tlv(TIPC_CFG_NOT_SUPPORTED);
err = -EOPNOTSUPP;
goto send;
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 488019766433..32556f480a60 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -624,6 +624,12 @@ static void tipc_node_timeout(struct timer_list *t)
__skb_queue_head_init(&xmitq);
+ /* Initial node interval to value larger (10 seconds), then it will be
+ * recalculated with link lowest tolerance
+ */
+ tipc_node_read_lock(n);
+ n->keepalive_intv = 10000;
+ tipc_node_read_unlock(n);
for (bearer_id = 0; remains && (bearer_id < MAX_BEARERS); bearer_id++) {
tipc_node_read_lock(n);
le = &n->links[bearer_id];
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 366ce0bf2658..6c91f1217dcf 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -377,11 +377,13 @@ static int tipc_sk_sock_err(struct socket *sock, long *timeout)
#define tipc_wait_for_cond(sock_, timeo_, condition_) \
({ \
+ DEFINE_WAIT_FUNC(wait_, woken_wake_function); \
struct sock *sk_; \
int rc_; \
\
while ((rc_ = !(condition_))) { \
- DEFINE_WAIT_FUNC(wait_, woken_wake_function); \
+ /* coupled with smp_wmb() in tipc_sk_proto_rcv() */ \
+ smp_rmb(); \
sk_ = (sock_)->sk; \
rc_ = tipc_sk_sock_err((sock_), timeo_); \
if (rc_) \
@@ -724,11 +726,11 @@ static __poll_t tipc_poll(struct file *file, struct socket *sock,
switch (sk->sk_state) {
case TIPC_ESTABLISHED:
- case TIPC_CONNECTING:
if (!tsk->cong_link_cnt && !tsk_conn_cong(tsk))
revents |= EPOLLOUT;
/* fall thru' */
case TIPC_LISTEN:
+ case TIPC_CONNECTING:
if (!skb_queue_empty(&sk->sk_receive_queue))
revents |= EPOLLIN | EPOLLRDNORM;
break;
@@ -878,7 +880,6 @@ static int tipc_send_group_unicast(struct socket *sock, struct msghdr *m,
DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
int blks = tsk_blocks(GROUP_H_SIZE + dlen);
struct tipc_sock *tsk = tipc_sk(sk);
- struct tipc_group *grp = tsk->group;
struct net *net = sock_net(sk);
struct tipc_member *mb = NULL;
u32 node, port;
@@ -892,7 +893,9 @@ static int tipc_send_group_unicast(struct socket *sock, struct msghdr *m,
/* Block or return if destination link or member is congested */
rc = tipc_wait_for_cond(sock, &timeout,
!tipc_dest_find(&tsk->cong_links, node, 0) &&
- !tipc_group_cong(grp, node, port, blks, &mb));
+ tsk->group &&
+ !tipc_group_cong(tsk->group, node, port, blks,
+ &mb));
if (unlikely(rc))
return rc;
@@ -922,7 +925,6 @@ static int tipc_send_group_anycast(struct socket *sock, struct msghdr *m,
struct tipc_sock *tsk = tipc_sk(sk);
struct list_head *cong_links = &tsk->cong_links;
int blks = tsk_blocks(GROUP_H_SIZE + dlen);
- struct tipc_group *grp = tsk->group;
struct tipc_msg *hdr = &tsk->phdr;
struct tipc_member *first = NULL;
struct tipc_member *mbr = NULL;
@@ -939,9 +941,10 @@ static int tipc_send_group_anycast(struct socket *sock, struct msghdr *m,
type = msg_nametype(hdr);
inst = dest->addr.name.name.instance;
scope = msg_lookup_scope(hdr);
- exclude = tipc_group_exclude(grp);
while (++lookups < 4) {
+ exclude = tipc_group_exclude(tsk->group);
+
first = NULL;
/* Look for a non-congested destination member, if any */
@@ -950,7 +953,8 @@ static int tipc_send_group_anycast(struct socket *sock, struct msghdr *m,
&dstcnt, exclude, false))
return -EHOSTUNREACH;
tipc_dest_pop(&dsts, &node, &port);
- cong = tipc_group_cong(grp, node, port, blks, &mbr);
+ cong = tipc_group_cong(tsk->group, node, port, blks,
+ &mbr);
if (!cong)
break;
if (mbr == first)
@@ -969,7 +973,8 @@ static int tipc_send_group_anycast(struct socket *sock, struct msghdr *m,
/* Block or return if destination link or member is congested */
rc = tipc_wait_for_cond(sock, &timeout,
!tipc_dest_find(cong_links, node, 0) &&
- !tipc_group_cong(grp, node, port,
+ tsk->group &&
+ !tipc_group_cong(tsk->group, node, port,
blks, &mbr));
if (unlikely(rc))
return rc;
@@ -1004,8 +1009,7 @@ static int tipc_send_group_bcast(struct socket *sock, struct msghdr *m,
struct sock *sk = sock->sk;
struct net *net = sock_net(sk);
struct tipc_sock *tsk = tipc_sk(sk);
- struct tipc_group *grp = tsk->group;
- struct tipc_nlist *dsts = tipc_group_dests(grp);
+ struct tipc_nlist *dsts;
struct tipc_mc_method *method = &tsk->mc_method;
bool ack = method->mandatory && method->rcast;
int blks = tsk_blocks(MCAST_H_SIZE + dlen);
@@ -1014,15 +1018,17 @@ static int tipc_send_group_bcast(struct socket *sock, struct msghdr *m,
struct sk_buff_head pkts;
int rc = -EHOSTUNREACH;
- if (!dsts->local && !dsts->remote)
- return -EHOSTUNREACH;
-
/* Block or return if any destination link or member is congested */
- rc = tipc_wait_for_cond(sock, &timeout, !tsk->cong_link_cnt &&
- !tipc_group_bc_cong(grp, blks));
+ rc = tipc_wait_for_cond(sock, &timeout,
+ !tsk->cong_link_cnt && tsk->group &&
+ !tipc_group_bc_cong(tsk->group, blks));
if (unlikely(rc))
return rc;
+ dsts = tipc_group_dests(tsk->group);
+ if (!dsts->local && !dsts->remote)
+ return -EHOSTUNREACH;
+
/* Complete message header */
if (dest) {
msg_set_type(hdr, TIPC_GRP_MCAST_MSG);
@@ -1034,7 +1040,7 @@ static int tipc_send_group_bcast(struct socket *sock, struct msghdr *m,
msg_set_hdr_sz(hdr, GROUP_H_SIZE);
msg_set_destport(hdr, 0);
msg_set_destnode(hdr, 0);
- msg_set_grp_bc_seqno(hdr, tipc_group_bc_snd_nxt(grp));
+ msg_set_grp_bc_seqno(hdr, tipc_group_bc_snd_nxt(tsk->group));
/* Avoid getting stuck with repeated forced replicasts */
msg_set_grp_bc_ack_req(hdr, ack);
@@ -1314,7 +1320,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
if (unlikely(!dest)) {
dest = &tsk->peer;
- if (!syn || dest->family != AF_TIPC)
+ if (!syn && dest->family != AF_TIPC)
return -EDESTADDRREQ;
}
@@ -1957,6 +1963,8 @@ static void tipc_sk_proto_rcv(struct sock *sk,
return;
case SOCK_WAKEUP:
tipc_dest_del(&tsk->cong_links, msg_orignode(hdr), 0);
+ /* coupled with smp_rmb() in tipc_wait_for_cond() */
+ smp_wmb();
tsk->cong_link_cnt--;
wakeup = true;
break;
@@ -2031,7 +2039,7 @@ static bool tipc_sk_filter_connect(struct tipc_sock *tsk, struct sk_buff *skb)
return true;
/* If empty 'ACK-' message, wake up sleeping connect() */
- sk->sk_data_ready(sk);
+ sk->sk_state_change(sk);
/* 'ACK-' message is neither accepted nor rejected: */
msg_set_dest_droppable(hdr, 1);
@@ -2302,6 +2310,16 @@ static int tipc_wait_for_connect(struct socket *sock, long *timeo_p)
return 0;
}
+static bool tipc_sockaddr_is_sane(struct sockaddr_tipc *addr)
+{
+ if (addr->family != AF_TIPC)
+ return false;
+ if (addr->addrtype == TIPC_SERVICE_RANGE)
+ return (addr->addr.nameseq.lower <= addr->addr.nameseq.upper);
+ return (addr->addrtype == TIPC_SERVICE_ADDR ||
+ addr->addrtype == TIPC_SOCKET_ADDR);
+}
+
/**
* tipc_connect - establish a connection to another TIPC port
* @sock: socket structure
@@ -2337,18 +2355,18 @@ static int tipc_connect(struct socket *sock, struct sockaddr *dest,
if (!tipc_sk_type_connectionless(sk))
res = -EINVAL;
goto exit;
- } else if (dst->family != AF_TIPC) {
- res = -EINVAL;
}
- if (dst->addrtype != TIPC_ADDR_ID && dst->addrtype != TIPC_ADDR_NAME)
+ if (!tipc_sockaddr_is_sane(dst)) {
res = -EINVAL;
- if (res)
goto exit;
-
+ }
/* DGRAM/RDM connect(), just save the destaddr */
if (tipc_sk_type_connectionless(sk)) {
memcpy(&tsk->peer, dest, destlen);
goto exit;
+ } else if (dst->addrtype == TIPC_SERVICE_RANGE) {
+ res = -EINVAL;
+ goto exit;
}
previous = sk->sk_state;
@@ -2683,11 +2701,15 @@ void tipc_sk_reinit(struct net *net)
rhashtable_walk_start(&iter);
while ((tsk = rhashtable_walk_next(&iter)) && !IS_ERR(tsk)) {
- spin_lock_bh(&tsk->sk.sk_lock.slock);
+ sock_hold(&tsk->sk);
+ rhashtable_walk_stop(&iter);
+ lock_sock(&tsk->sk);
msg = &tsk->phdr;
msg_set_prevnode(msg, tipc_own_addr(net));
msg_set_orignode(msg, tipc_own_addr(net));
- spin_unlock_bh(&tsk->sk.sk_lock.slock);
+ release_sock(&tsk->sk);
+ rhashtable_walk_start(&iter);
+ sock_put(&tsk->sk);
}
rhashtable_walk_stop(&iter);
diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h
index d793b4343885..aa015c233898 100644
--- a/net/tipc/subscr.h
+++ b/net/tipc/subscr.h
@@ -77,8 +77,9 @@ void tipc_sub_report_overlap(struct tipc_subscription *sub,
u32 found_lower, u32 found_upper,
u32 event, u32 port, u32 node,
u32 scope, int must);
-int tipc_topsrv_start(struct net *net);
-void tipc_topsrv_stop(struct net *net);
+
+int __net_init tipc_topsrv_init_net(struct net *net);
+void __net_exit tipc_topsrv_exit_net(struct net *net);
void tipc_sub_put(struct tipc_subscription *subscription);
void tipc_sub_get(struct tipc_subscription *subscription);
diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c
index b84c0059214f..35558656fe02 100644
--- a/net/tipc/topsrv.c
+++ b/net/tipc/topsrv.c
@@ -371,6 +371,7 @@ static int tipc_conn_rcv_sub(struct tipc_topsrv *srv,
struct tipc_subscription *sub;
if (tipc_sub_read(s, filter) & TIPC_SUB_CANCEL) {
+ s->filter &= __constant_ntohl(~TIPC_SUB_CANCEL);
tipc_conn_delete_sub(con, s);
return 0;
}
@@ -404,7 +405,7 @@ static int tipc_conn_rcv_from_sock(struct tipc_conn *con)
ret = sock_recvmsg(con->sock, &msg, MSG_DONTWAIT);
if (ret == -EWOULDBLOCK)
return -EWOULDBLOCK;
- if (ret > 0) {
+ if (ret == sizeof(s)) {
read_lock_bh(&sk->sk_callback_lock);
ret = tipc_conn_rcv_sub(srv, con, &s);
read_unlock_bh(&sk->sk_callback_lock);
@@ -642,7 +643,7 @@ static void tipc_topsrv_work_stop(struct tipc_topsrv *s)
destroy_workqueue(s->send_wq);
}
-int tipc_topsrv_start(struct net *net)
+static int tipc_topsrv_start(struct net *net)
{
struct tipc_net *tn = tipc_net(net);
const char name[] = "topology_server";
@@ -676,7 +677,7 @@ int tipc_topsrv_start(struct net *net)
return ret;
}
-void tipc_topsrv_stop(struct net *net)
+static void tipc_topsrv_stop(struct net *net)
{
struct tipc_topsrv *srv = tipc_topsrv(net);
struct socket *lsock = srv->listener;
@@ -701,3 +702,13 @@ void tipc_topsrv_stop(struct net *net)
idr_destroy(&srv->conn_idr);
kfree(srv);
}
+
+int __net_init tipc_topsrv_init_net(struct net *net)
+{
+ return tipc_topsrv_start(net);
+}
+
+void __net_exit tipc_topsrv_exit_net(struct net *net)
+{
+ tipc_topsrv_stop(net);
+}
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index 9783101bc4a9..da2d311476ab 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -245,10 +245,8 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb,
}
err = tipc_udp_xmit(net, _skb, ub, src, &rcast->addr);
- if (err) {
- kfree_skb(_skb);
+ if (err)
goto out;
- }
}
err = 0;
out:
@@ -680,6 +678,11 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b,
if (err)
goto err;
+ if (remote.proto != local.proto) {
+ err = -EINVAL;
+ goto err;
+ }
+
/* Autoconfigure own node identity if needed */
if (!tipc_own_id(net)) {
memcpy(node_id, local.ipv6.in6_u.u6_addr8, 16);
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index 961b07d4d41c..ead29c2aefa7 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -52,8 +52,11 @@ static DEFINE_SPINLOCK(tls_device_lock);
static void tls_device_free_ctx(struct tls_context *ctx)
{
- if (ctx->tx_conf == TLS_HW)
+ if (ctx->tx_conf == TLS_HW) {
kfree(tls_offload_ctx_tx(ctx));
+ kfree(ctx->tx.rec_seq);
+ kfree(ctx->tx.iv);
+ }
if (ctx->rx_conf == TLS_HW)
kfree(tls_offload_ctx_rx(ctx));
@@ -542,10 +545,22 @@ static int tls_device_push_pending_record(struct sock *sk, int flags)
return tls_push_data(sk, &msg_iter, 0, flags, TLS_RECORD_TYPE_DATA);
}
+static void tls_device_resync_rx(struct tls_context *tls_ctx,
+ struct sock *sk, u32 seq, u64 rcd_sn)
+{
+ struct net_device *netdev;
+
+ if (WARN_ON(test_and_set_bit(TLS_RX_SYNC_RUNNING, &tls_ctx->flags)))
+ return;
+ netdev = READ_ONCE(tls_ctx->netdev);
+ if (netdev)
+ netdev->tlsdev_ops->tls_dev_resync_rx(netdev, sk, seq, rcd_sn);
+ clear_bit_unlock(TLS_RX_SYNC_RUNNING, &tls_ctx->flags);
+}
+
void handle_device_resync(struct sock *sk, u32 seq, u64 rcd_sn)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
- struct net_device *netdev = tls_ctx->netdev;
struct tls_offload_context_rx *rx_ctx;
u32 is_req_pending;
s64 resync_req;
@@ -560,16 +575,16 @@ void handle_device_resync(struct sock *sk, u32 seq, u64 rcd_sn)
is_req_pending = resync_req;
if (unlikely(is_req_pending) && req_seq == seq &&
- atomic64_try_cmpxchg(&rx_ctx->resync_req, &resync_req, 0))
- netdev->tlsdev_ops->tls_dev_resync_rx(netdev, sk,
- seq + TLS_HEADER_SIZE - 1,
- rcd_sn);
+ atomic64_try_cmpxchg(&rx_ctx->resync_req, &resync_req, 0)) {
+ seq += TLS_HEADER_SIZE - 1;
+ tls_device_resync_rx(tls_ctx, sk, seq, rcd_sn);
+ }
}
static int tls_device_reencrypt(struct sock *sk, struct sk_buff *skb)
{
struct strp_msg *rxm = strp_msg(skb);
- int err = 0, offset = rxm->offset, copy, nsg;
+ int err = 0, offset = rxm->offset, copy, nsg, data_len, pos;
struct sk_buff *skb_iter, *unused;
struct scatterlist sg[1];
char *orig_buf, *buf;
@@ -600,25 +615,42 @@ static int tls_device_reencrypt(struct sock *sk, struct sk_buff *skb)
else
err = 0;
- copy = min_t(int, skb_pagelen(skb) - offset,
- rxm->full_len - TLS_CIPHER_AES_GCM_128_TAG_SIZE);
+ data_len = rxm->full_len - TLS_CIPHER_AES_GCM_128_TAG_SIZE;
+
+ if (skb_pagelen(skb) > offset) {
+ copy = min_t(int, skb_pagelen(skb) - offset, data_len);
- if (skb->decrypted)
- skb_store_bits(skb, offset, buf, copy);
+ if (skb->decrypted)
+ skb_store_bits(skb, offset, buf, copy);
- offset += copy;
- buf += copy;
+ offset += copy;
+ buf += copy;
+ }
+ pos = skb_pagelen(skb);
skb_walk_frags(skb, skb_iter) {
- copy = min_t(int, skb_iter->len,
- rxm->full_len - offset + rxm->offset -
- TLS_CIPHER_AES_GCM_128_TAG_SIZE);
+ int frag_pos;
+
+ /* Practically all frags must belong to msg if reencrypt
+ * is needed with current strparser and coalescing logic,
+ * but strparser may "get optimized", so let's be safe.
+ */
+ if (pos + skb_iter->len <= offset)
+ goto done_with_frag;
+ if (pos >= data_len + rxm->offset)
+ break;
+
+ frag_pos = offset - pos;
+ copy = min_t(int, skb_iter->len - frag_pos,
+ data_len + rxm->offset - offset);
if (skb_iter->decrypted)
- skb_store_bits(skb_iter, offset, buf, copy);
+ skb_store_bits(skb_iter, frag_pos, buf, copy);
offset += copy;
buf += copy;
+done_with_frag:
+ pos += skb_iter->len;
}
free_buf:
@@ -874,7 +906,9 @@ int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx)
goto release_netdev;
free_sw_resources:
+ up_read(&device_offload_lock);
tls_sw_free_resources_rx(sk);
+ down_read(&device_offload_lock);
release_ctx:
ctx->priv_ctx_rx = NULL;
release_netdev:
@@ -894,12 +928,6 @@ void tls_device_offload_cleanup_rx(struct sock *sk)
if (!netdev)
goto out;
- if (!(netdev->features & NETIF_F_HW_TLS_RX)) {
- pr_err_ratelimited("%s: device is missing NETIF_F_HW_TLS_RX cap\n",
- __func__);
- goto out;
- }
-
netdev->tlsdev_ops->tls_dev_del(netdev, tls_ctx,
TLS_OFFLOAD_CTX_DIR_RX);
@@ -909,8 +937,6 @@ void tls_device_offload_cleanup_rx(struct sock *sk)
}
out:
up_read(&device_offload_lock);
- kfree(tls_ctx->rx.rec_seq);
- kfree(tls_ctx->rx.iv);
tls_sw_release_resources_rx(sk);
}
@@ -940,7 +966,10 @@ static int tls_device_down(struct net_device *netdev)
if (ctx->rx_conf == TLS_HW)
netdev->tlsdev_ops->tls_dev_del(netdev, ctx,
TLS_OFFLOAD_CTX_DIR_RX);
- ctx->netdev = NULL;
+ WRITE_ONCE(ctx->netdev, NULL);
+ smp_mb__before_atomic(); /* pairs with test_and_set_bit() */
+ while (test_bit(TLS_RX_SYNC_RUNNING, &ctx->flags))
+ usleep_range(10, 200);
dev_put(netdev);
list_del_init(&ctx->list);
@@ -960,7 +989,8 @@ static int tls_dev_event(struct notifier_block *this, unsigned long event,
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
- if (!(dev->features & (NETIF_F_HW_TLS_RX | NETIF_F_HW_TLS_TX)))
+ if (!dev->tlsdev_ops &&
+ !(dev->features & (NETIF_F_HW_TLS_RX | NETIF_F_HW_TLS_TX)))
return NOTIFY_DONE;
switch (event) {
diff --git a/net/tls/tls_device_fallback.c b/net/tls/tls_device_fallback.c
index 450a6dbc5a88..426dd97725e4 100644
--- a/net/tls/tls_device_fallback.c
+++ b/net/tls/tls_device_fallback.c
@@ -193,18 +193,26 @@ static void update_chksum(struct sk_buff *skb, int headln)
static void complete_skb(struct sk_buff *nskb, struct sk_buff *skb, int headln)
{
+ struct sock *sk = skb->sk;
+ int delta;
+
skb_copy_header(nskb, skb);
skb_put(nskb, skb->len);
memcpy(nskb->data, skb->data, headln);
- update_chksum(nskb, headln);
nskb->destructor = skb->destructor;
- nskb->sk = skb->sk;
+ nskb->sk = sk;
skb->destructor = NULL;
skb->sk = NULL;
- refcount_add(nskb->truesize - skb->truesize,
- &nskb->sk->sk_wmem_alloc);
+
+ update_chksum(nskb, headln);
+
+ delta = nskb->truesize - skb->truesize;
+ if (likely(delta < 0))
+ WARN_ON_ONCE(refcount_sub_and_test(-delta, &sk->sk_wmem_alloc));
+ else if (delta)
+ refcount_add(delta, &sk->sk_wmem_alloc);
}
/* This function may be called after the user socket is already
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index 523622dc74f8..25b3fb585777 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -290,11 +290,8 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
tls_sw_free_resources_tx(sk);
}
- if (ctx->rx_conf == TLS_SW) {
- kfree(ctx->rx.rec_seq);
- kfree(ctx->rx.iv);
+ if (ctx->rx_conf == TLS_SW)
tls_sw_free_resources_rx(sk);
- }
#ifdef CONFIG_TLS_DEVICE
if (ctx->rx_conf == TLS_HW)
@@ -550,11 +547,14 @@ static struct tls_context *create_ctx(struct sock *sk)
struct inet_connection_sock *icsk = inet_csk(sk);
struct tls_context *ctx;
- ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+ ctx = kzalloc(sizeof(*ctx), GFP_ATOMIC);
if (!ctx)
return NULL;
icsk->icsk_ulp_data = ctx;
+ ctx->setsockopt = sk->sk_prot->setsockopt;
+ ctx->getsockopt = sk->sk_prot->getsockopt;
+ ctx->sk_proto_close = sk->sk_prot->close;
return ctx;
}
@@ -685,9 +685,6 @@ static int tls_init(struct sock *sk)
rc = -ENOMEM;
goto out;
}
- ctx->setsockopt = sk->sk_prot->setsockopt;
- ctx->getsockopt = sk->sk_prot->getsockopt;
- ctx->sk_proto_close = sk->sk_prot->close;
/* Build IPv6 TLS whenever the address of tcpv6 _prot changes */
if (ip_ver == TLSV6 &&
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index b9c6ecfbcfea..6848a8196711 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -1118,6 +1118,9 @@ void tls_sw_release_resources_rx(struct sock *sk)
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
+ kfree(tls_ctx->rx.rec_seq);
+ kfree(tls_ctx->rx.iv);
+
if (ctx->aead_recv) {
kfree_skb(ctx->recv_pkt);
ctx->recv_pkt = NULL;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index c754f3a90a2e..f601933ad728 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -888,7 +888,7 @@ retry:
addr->hash ^= sk->sk_type;
__unix_remove_socket(sk);
- u->addr = addr;
+ smp_store_release(&u->addr, addr);
__unix_insert_socket(&unix_socket_table[addr->hash], sk);
spin_unlock(&unix_table_lock);
err = 0;
@@ -1058,7 +1058,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
err = 0;
__unix_remove_socket(sk);
- u->addr = addr;
+ smp_store_release(&u->addr, addr);
__unix_insert_socket(list, sk);
out_unlock:
@@ -1329,15 +1329,29 @@ restart:
RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
otheru = unix_sk(other);
- /* copy address information from listening to new sock*/
- if (otheru->addr) {
- refcount_inc(&otheru->addr->refcnt);
- newu->addr = otheru->addr;
- }
+ /* copy address information from listening to new sock
+ *
+ * The contents of *(otheru->addr) and otheru->path
+ * are seen fully set up here, since we have found
+ * otheru in hash under unix_table_lock. Insertion
+ * into the hash chain we'd found it in had been done
+ * in an earlier critical area protected by unix_table_lock,
+ * the same one where we'd set *(otheru->addr) contents,
+ * as well as otheru->path and otheru->addr itself.
+ *
+ * Using smp_store_release() here to set newu->addr
+ * is enough to make those stores, as well as stores
+ * to newu->path visible to anyone who gets newu->addr
+ * by smp_load_acquire(). IOW, the same warranties
+ * as for unix_sock instances bound in unix_bind() or
+ * in unix_autobind().
+ */
if (otheru->path.dentry) {
path_get(&otheru->path);
newu->path = otheru->path;
}
+ refcount_inc(&otheru->addr->refcnt);
+ smp_store_release(&newu->addr, otheru->addr);
/* Set credentials */
copy_peercred(sk, other);
@@ -1451,7 +1465,7 @@ out:
static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
{
struct sock *sk = sock->sk;
- struct unix_sock *u;
+ struct unix_address *addr;
DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
int err = 0;
@@ -1466,19 +1480,15 @@ static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
sock_hold(sk);
}
- u = unix_sk(sk);
- unix_state_lock(sk);
- if (!u->addr) {
+ addr = smp_load_acquire(&unix_sk(sk)->addr);
+ if (!addr) {
sunaddr->sun_family = AF_UNIX;
sunaddr->sun_path[0] = 0;
err = sizeof(short);
} else {
- struct unix_address *addr = u->addr;
-
err = addr->len;
memcpy(sunaddr, addr->name, addr->len);
}
- unix_state_unlock(sk);
sock_put(sk);
out:
return err;
@@ -2071,11 +2081,11 @@ static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
{
- struct unix_sock *u = unix_sk(sk);
+ struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
- if (u->addr) {
- msg->msg_namelen = u->addr->len;
- memcpy(msg->msg_name, u->addr->name, u->addr->len);
+ if (addr) {
+ msg->msg_namelen = addr->len;
+ memcpy(msg->msg_name, addr->name, addr->len);
}
}
@@ -2579,15 +2589,14 @@ static int unix_open_file(struct sock *sk)
if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
return -EPERM;
- unix_state_lock(sk);
+ if (!smp_load_acquire(&unix_sk(sk)->addr))
+ return -ENOENT;
+
path = unix_sk(sk)->path;
- if (!path.dentry) {
- unix_state_unlock(sk);
+ if (!path.dentry)
return -ENOENT;
- }
path_get(&path);
- unix_state_unlock(sk);
fd = get_unused_fd_flags(O_CLOEXEC);
if (fd < 0)
@@ -2828,7 +2837,7 @@ static int unix_seq_show(struct seq_file *seq, void *v)
(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
sock_i_ino(s));
- if (u->addr) {
+ if (u->addr) { // under unix_table_lock here
int i, len;
seq_putc(seq, ' ');
diff --git a/net/unix/diag.c b/net/unix/diag.c
index 384c84e83462..3183d9b8ab33 100644
--- a/net/unix/diag.c
+++ b/net/unix/diag.c
@@ -10,7 +10,8 @@
static int sk_diag_dump_name(struct sock *sk, struct sk_buff *nlskb)
{
- struct unix_address *addr = unix_sk(sk)->addr;
+ /* might or might not have unix_table_lock */
+ struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
if (!addr)
return 0;
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index 5d3cce9e8744..96ab344f17bb 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -75,6 +75,9 @@ static u32 virtio_transport_get_local_cid(void)
{
struct virtio_vsock *vsock = virtio_vsock_get();
+ if (!vsock)
+ return VMADDR_CID_ANY;
+
return vsock->guest_cid;
}
@@ -584,10 +587,6 @@ static int virtio_vsock_probe(struct virtio_device *vdev)
virtio_vsock_update_guest_cid(vsock);
- ret = vsock_core_init(&virtio_transport.transport);
- if (ret < 0)
- goto out_vqs;
-
vsock->rx_buf_nr = 0;
vsock->rx_buf_max_nr = 0;
atomic_set(&vsock->queued_replies, 0);
@@ -618,8 +617,6 @@ static int virtio_vsock_probe(struct virtio_device *vdev)
mutex_unlock(&the_virtio_vsock_mutex);
return 0;
-out_vqs:
- vsock->vdev->config->del_vqs(vsock->vdev);
out:
kfree(vsock);
mutex_unlock(&the_virtio_vsock_mutex);
@@ -637,6 +634,9 @@ static void virtio_vsock_remove(struct virtio_device *vdev)
flush_work(&vsock->event_work);
flush_work(&vsock->send_pkt_work);
+ /* Reset all connected sockets when the device disappear */
+ vsock_for_each_connected_socket(virtio_vsock_reset_sock);
+
vdev->config->reset(vdev);
mutex_lock(&vsock->rx_lock);
@@ -669,7 +669,6 @@ static void virtio_vsock_remove(struct virtio_device *vdev)
mutex_lock(&the_virtio_vsock_mutex);
the_virtio_vsock = NULL;
- vsock_core_exit();
mutex_unlock(&the_virtio_vsock_mutex);
vdev->config->del_vqs(vdev);
@@ -702,15 +701,28 @@ static int __init virtio_vsock_init(void)
virtio_vsock_workqueue = alloc_workqueue("virtio_vsock", 0, 0);
if (!virtio_vsock_workqueue)
return -ENOMEM;
+
+ ret = vsock_core_init(&virtio_transport.transport);
+ if (ret)
+ goto out_wq;
+
ret = register_virtio_driver(&virtio_vsock_driver);
if (ret)
- destroy_workqueue(virtio_vsock_workqueue);
+ goto out_vci;
+
+ return 0;
+
+out_vci:
+ vsock_core_exit();
+out_wq:
+ destroy_workqueue(virtio_vsock_workqueue);
return ret;
}
static void __exit virtio_vsock_exit(void)
{
unregister_virtio_driver(&virtio_vsock_driver);
+ vsock_core_exit();
destroy_workqueue(virtio_vsock_workqueue);
}
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index 3ae3a33da70b..f3f3d06cb6d8 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -662,6 +662,8 @@ static int virtio_transport_reset(struct vsock_sock *vsk,
*/
static int virtio_transport_reset_no_sock(struct virtio_vsock_pkt *pkt)
{
+ const struct virtio_transport *t;
+ struct virtio_vsock_pkt *reply;
struct virtio_vsock_pkt_info info = {
.op = VIRTIO_VSOCK_OP_RST,
.type = le16_to_cpu(pkt->hdr.type),
@@ -672,15 +674,21 @@ static int virtio_transport_reset_no_sock(struct virtio_vsock_pkt *pkt)
if (le16_to_cpu(pkt->hdr.op) == VIRTIO_VSOCK_OP_RST)
return 0;
- pkt = virtio_transport_alloc_pkt(&info, 0,
- le64_to_cpu(pkt->hdr.dst_cid),
- le32_to_cpu(pkt->hdr.dst_port),
- le64_to_cpu(pkt->hdr.src_cid),
- le32_to_cpu(pkt->hdr.src_port));
- if (!pkt)
+ reply = virtio_transport_alloc_pkt(&info, 0,
+ le64_to_cpu(pkt->hdr.dst_cid),
+ le32_to_cpu(pkt->hdr.dst_port),
+ le64_to_cpu(pkt->hdr.src_cid),
+ le32_to_cpu(pkt->hdr.src_port));
+ if (!reply)
return -ENOMEM;
- return virtio_transport_get_ops()->send_pkt(pkt);
+ t = virtio_transport_get_ops();
+ if (!t) {
+ virtio_transport_free_pkt(reply);
+ return -ENOTCONN;
+ }
+
+ return t->send_pkt(reply);
}
static void virtio_transport_wait_close(struct sock *sk, long timeout)
@@ -778,12 +786,19 @@ static bool virtio_transport_close(struct vsock_sock *vsk)
void virtio_transport_release(struct vsock_sock *vsk)
{
+ struct virtio_vsock_sock *vvs = vsk->trans;
+ struct virtio_vsock_pkt *pkt, *tmp;
struct sock *sk = &vsk->sk;
bool remove_sock = true;
lock_sock(sk);
if (sk->sk_type == SOCK_STREAM)
remove_sock = virtio_transport_close(vsk);
+
+ list_for_each_entry_safe(pkt, tmp, &vvs->rx_queue, list) {
+ list_del(&pkt->list);
+ virtio_transport_free_pkt(pkt);
+ }
release_sock(sk);
if (remove_sock)
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index cb332adb84cd..c3d5ab01fba7 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -264,6 +264,31 @@ vmci_transport_send_control_pkt_bh(struct sockaddr_vm *src,
}
static int
+vmci_transport_alloc_send_control_pkt(struct sockaddr_vm *src,
+ struct sockaddr_vm *dst,
+ enum vmci_transport_packet_type type,
+ u64 size,
+ u64 mode,
+ struct vmci_transport_waiting_info *wait,
+ u16 proto,
+ struct vmci_handle handle)
+{
+ struct vmci_transport_packet *pkt;
+ int err;
+
+ pkt = kmalloc(sizeof(*pkt), GFP_KERNEL);
+ if (!pkt)
+ return -ENOMEM;
+
+ err = __vmci_transport_send_control_pkt(pkt, src, dst, type, size,
+ mode, wait, proto, handle,
+ true);
+ kfree(pkt);
+
+ return err;
+}
+
+static int
vmci_transport_send_control_pkt(struct sock *sk,
enum vmci_transport_packet_type type,
u64 size,
@@ -272,9 +297,7 @@ vmci_transport_send_control_pkt(struct sock *sk,
u16 proto,
struct vmci_handle handle)
{
- struct vmci_transport_packet *pkt;
struct vsock_sock *vsk;
- int err;
vsk = vsock_sk(sk);
@@ -284,17 +307,10 @@ vmci_transport_send_control_pkt(struct sock *sk,
if (!vsock_addr_bound(&vsk->remote_addr))
return -EINVAL;
- pkt = kmalloc(sizeof(*pkt), GFP_KERNEL);
- if (!pkt)
- return -ENOMEM;
-
- err = __vmci_transport_send_control_pkt(pkt, &vsk->local_addr,
- &vsk->remote_addr, type, size,
- mode, wait, proto, handle,
- true);
- kfree(pkt);
-
- return err;
+ return vmci_transport_alloc_send_control_pkt(&vsk->local_addr,
+ &vsk->remote_addr,
+ type, size, mode,
+ wait, proto, handle);
}
static int vmci_transport_send_reset_bh(struct sockaddr_vm *dst,
@@ -312,12 +328,29 @@ static int vmci_transport_send_reset_bh(struct sockaddr_vm *dst,
static int vmci_transport_send_reset(struct sock *sk,
struct vmci_transport_packet *pkt)
{
+ struct sockaddr_vm *dst_ptr;
+ struct sockaddr_vm dst;
+ struct vsock_sock *vsk;
+
if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST)
return 0;
- return vmci_transport_send_control_pkt(sk,
- VMCI_TRANSPORT_PACKET_TYPE_RST,
- 0, 0, NULL, VSOCK_PROTO_INVALID,
- VMCI_INVALID_HANDLE);
+
+ vsk = vsock_sk(sk);
+
+ if (!vsock_addr_bound(&vsk->local_addr))
+ return -EINVAL;
+
+ if (vsock_addr_bound(&vsk->remote_addr)) {
+ dst_ptr = &vsk->remote_addr;
+ } else {
+ vsock_addr_init(&dst, pkt->dg.src.context,
+ pkt->src_port);
+ dst_ptr = &dst;
+ }
+ return vmci_transport_alloc_send_control_pkt(&vsk->local_addr, dst_ptr,
+ VMCI_TRANSPORT_PACKET_TYPE_RST,
+ 0, 0, NULL, VSOCK_PROTO_INVALID,
+ VMCI_INVALID_HANDLE);
}
static int vmci_transport_send_negotiate(struct sock *sk, size_t size)
@@ -1618,6 +1651,10 @@ static void vmci_transport_cleanup(struct work_struct *work)
static void vmci_transport_destruct(struct vsock_sock *vsk)
{
+ /* transport can be NULL if we hit a failure at init() time */
+ if (!vmci_trans(vsk))
+ return;
+
/* Ensure that the detach callback doesn't use the sk/vsk
* we are about to destruct.
*/
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 176edfefcbaa..c6711ead5e59 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -8993,8 +8993,10 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info)
if (info->attrs[NL80211_ATTR_CONTROL_PORT_OVER_NL80211]) {
int r = validate_pae_over_nl80211(rdev, info);
- if (r < 0)
+ if (r < 0) {
+ kzfree(connkeys);
return r;
+ }
ibss.control_port_over_nl80211 = true;
}
@@ -13390,7 +13392,8 @@ static const struct genl_ops nl80211_ops[] = {
.policy = nl80211_policy,
.flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ NL80211_FLAG_NEED_RTNL |
+ NL80211_FLAG_CLEAR_SKB,
},
{
.cmd = NL80211_CMD_DEAUTHENTICATE,
@@ -13441,7 +13444,8 @@ static const struct genl_ops nl80211_ops[] = {
.policy = nl80211_policy,
.flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ NL80211_FLAG_NEED_RTNL |
+ NL80211_FLAG_CLEAR_SKB,
},
{
.cmd = NL80211_CMD_UPDATE_CONNECT_PARAMS,
@@ -13449,7 +13453,8 @@ static const struct genl_ops nl80211_ops[] = {
.policy = nl80211_policy,
.flags = GENL_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ NL80211_FLAG_NEED_RTNL |
+ NL80211_FLAG_CLEAR_SKB,
},
{
.cmd = NL80211_CMD_DISCONNECT,
@@ -13478,7 +13483,8 @@ static const struct genl_ops nl80211_ops[] = {
.policy = nl80211_policy,
.flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ NL80211_FLAG_NEED_RTNL |
+ NL80211_FLAG_CLEAR_SKB,
},
{
.cmd = NL80211_CMD_DEL_PMKSA,
@@ -13830,7 +13836,8 @@ static const struct genl_ops nl80211_ops[] = {
.policy = nl80211_policy,
.flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_WIPHY |
- NL80211_FLAG_NEED_RTNL,
+ NL80211_FLAG_NEED_RTNL |
+ NL80211_FLAG_CLEAR_SKB,
},
{
.cmd = NL80211_CMD_SET_QOS_MAP,
@@ -13885,7 +13892,8 @@ static const struct genl_ops nl80211_ops[] = {
.doit = nl80211_set_pmk,
.policy = nl80211_policy,
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ NL80211_FLAG_NEED_RTNL |
+ NL80211_FLAG_CLEAR_SKB,
},
{
.cmd = NL80211_CMD_DEL_PMK,
@@ -15433,6 +15441,11 @@ void cfg80211_ch_switch_notify(struct net_device *dev,
wdev->chandef = *chandef;
wdev->preset_chandef = *chandef;
+
+ if (wdev->iftype == NL80211_IFTYPE_STATION &&
+ !WARN_ON(!wdev->current_bss))
+ wdev->current_bss->pub.channel = chandef->chan;
+
nl80211_ch_switch_notify(rdev, dev, chandef, GFP_KERNEL,
NL80211_CMD_CH_SWITCH_NOTIFY, 0);
}
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 24cfa2776f50..8a47297ff206 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -1249,7 +1249,7 @@ static bool is_valid_rd(const struct ieee80211_regdomain *rd)
* definitions (the "2.4 GHz band", the "5 GHz band" and the "60GHz band"),
* however it is safe for now to assume that a frequency rule should not be
* part of a frequency's band if the start freq or end freq are off by more
- * than 2 GHz for the 2.4 and 5 GHz bands, and by more than 10 GHz for the
+ * than 2 GHz for the 2.4 and 5 GHz bands, and by more than 20 GHz for the
* 60 GHz band.
* This resolution can be lowered and should be considered as we add
* regulatory rule support for other "bands".
@@ -1264,7 +1264,7 @@ static bool freq_in_rule_band(const struct ieee80211_freq_range *freq_range,
* with the Channel starting frequency above 45 GHz.
*/
u32 limit = freq_khz > 45 * ONE_GHZ_IN_KHZ ?
- 10 * ONE_GHZ_IN_KHZ : 2 * ONE_GHZ_IN_KHZ;
+ 20 * ONE_GHZ_IN_KHZ : 2 * ONE_GHZ_IN_KHZ;
if (abs(freq_khz - freq_range->start_freq_khz) <= limit)
return true;
if (abs(freq_khz - freq_range->end_freq_khz) <= limit)
@@ -1287,6 +1287,16 @@ reg_intersect_dfs_region(const enum nl80211_dfs_regions dfs_region1,
return dfs_region1;
}
+static void reg_wmm_rules_intersect(const struct ieee80211_wmm_ac *wmm_ac1,
+ const struct ieee80211_wmm_ac *wmm_ac2,
+ struct ieee80211_wmm_ac *intersect)
+{
+ intersect->cw_min = max_t(u16, wmm_ac1->cw_min, wmm_ac2->cw_min);
+ intersect->cw_max = max_t(u16, wmm_ac1->cw_max, wmm_ac2->cw_max);
+ intersect->cot = min_t(u16, wmm_ac1->cot, wmm_ac2->cot);
+ intersect->aifsn = max_t(u8, wmm_ac1->aifsn, wmm_ac2->aifsn);
+}
+
/*
* Helper for regdom_intersect(), this does the real
* mathematical intersection fun
@@ -1301,6 +1311,8 @@ static int reg_rules_intersect(const struct ieee80211_regdomain *rd1,
struct ieee80211_freq_range *freq_range;
const struct ieee80211_power_rule *power_rule1, *power_rule2;
struct ieee80211_power_rule *power_rule;
+ const struct ieee80211_wmm_rule *wmm_rule1, *wmm_rule2;
+ struct ieee80211_wmm_rule *wmm_rule;
u32 freq_diff, max_bandwidth1, max_bandwidth2;
freq_range1 = &rule1->freq_range;
@@ -1311,6 +1323,10 @@ static int reg_rules_intersect(const struct ieee80211_regdomain *rd1,
power_rule2 = &rule2->power_rule;
power_rule = &intersected_rule->power_rule;
+ wmm_rule1 = &rule1->wmm_rule;
+ wmm_rule2 = &rule2->wmm_rule;
+ wmm_rule = &intersected_rule->wmm_rule;
+
freq_range->start_freq_khz = max(freq_range1->start_freq_khz,
freq_range2->start_freq_khz);
freq_range->end_freq_khz = min(freq_range1->end_freq_khz,
@@ -1354,6 +1370,29 @@ static int reg_rules_intersect(const struct ieee80211_regdomain *rd1,
intersected_rule->dfs_cac_ms = max(rule1->dfs_cac_ms,
rule2->dfs_cac_ms);
+ if (rule1->has_wmm && rule2->has_wmm) {
+ u8 ac;
+
+ for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
+ reg_wmm_rules_intersect(&wmm_rule1->client[ac],
+ &wmm_rule2->client[ac],
+ &wmm_rule->client[ac]);
+ reg_wmm_rules_intersect(&wmm_rule1->ap[ac],
+ &wmm_rule2->ap[ac],
+ &wmm_rule->ap[ac]);
+ }
+
+ intersected_rule->has_wmm = true;
+ } else if (rule1->has_wmm) {
+ *wmm_rule = *wmm_rule1;
+ intersected_rule->has_wmm = true;
+ } else if (rule2->has_wmm) {
+ *wmm_rule = *wmm_rule2;
+ intersected_rule->has_wmm = true;
+ } else {
+ intersected_rule->has_wmm = false;
+ }
+
if (!is_valid_reg_rule(intersected_rule))
return -EINVAL;
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index d49aa79b7997..f7f53f9ae7ef 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -352,17 +352,15 @@ static unsigned int x25_new_lci(struct x25_neigh *nb)
unsigned int lci = 1;
struct sock *sk;
- read_lock_bh(&x25_list_lock);
-
- while ((sk = __x25_find_socket(lci, nb)) != NULL) {
+ while ((sk = x25_find_socket(lci, nb)) != NULL) {
sock_put(sk);
if (++lci == 4096) {
lci = 0;
break;
}
+ cond_resched();
}
- read_unlock_bh(&x25_list_lock);
return lci;
}
@@ -681,8 +679,7 @@ static int x25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
struct sockaddr_x25 *addr = (struct sockaddr_x25 *)uaddr;
int len, i, rc = 0;
- if (!sock_flag(sk, SOCK_ZAPPED) ||
- addr_len != sizeof(struct sockaddr_x25) ||
+ if (addr_len != sizeof(struct sockaddr_x25) ||
addr->sx25_family != AF_X25) {
rc = -EINVAL;
goto out;
@@ -697,9 +694,13 @@ static int x25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
}
lock_sock(sk);
- x25_sk(sk)->source_addr = addr->sx25_addr;
- x25_insert_socket(sk);
- sock_reset_flag(sk, SOCK_ZAPPED);
+ if (sock_flag(sk, SOCK_ZAPPED)) {
+ x25_sk(sk)->source_addr = addr->sx25_addr;
+ x25_insert_socket(sk);
+ sock_reset_flag(sk, SOCK_ZAPPED);
+ } else {
+ rc = -EINVAL;
+ }
release_sock(sk);
SOCK_DEBUG(sk, "x25_bind: socket is bound\n");
out:
@@ -815,8 +816,13 @@ static int x25_connect(struct socket *sock, struct sockaddr *uaddr,
sock->state = SS_CONNECTED;
rc = 0;
out_put_neigh:
- if (rc)
+ if (rc) {
+ read_lock_bh(&x25_list_lock);
x25_neigh_put(x25->neighbour);
+ x25->neighbour = NULL;
+ read_unlock_bh(&x25_list_lock);
+ x25->state = X25_STATE_0;
+ }
out_put_route:
x25_route_put(rt);
out:
diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
index bfe2dbea480b..a3b037fbfecd 100644
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c
@@ -152,9 +152,6 @@ static void xdp_umem_unaccount_pages(struct xdp_umem *umem)
static void xdp_umem_release(struct xdp_umem *umem)
{
- struct task_struct *task;
- struct mm_struct *mm;
-
xdp_umem_clear_dev(umem);
if (umem->fq) {
@@ -169,21 +166,10 @@ static void xdp_umem_release(struct xdp_umem *umem)
xdp_umem_unpin_pages(umem);
- task = get_pid_task(umem->pid, PIDTYPE_PID);
- put_pid(umem->pid);
- if (!task)
- goto out;
- mm = get_task_mm(task);
- put_task_struct(task);
- if (!mm)
- goto out;
-
- mmput(mm);
kfree(umem->pages);
umem->pages = NULL;
xdp_umem_unaccount_pages(umem);
-out:
kfree(umem);
}
@@ -312,7 +298,6 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
if (size_chk < 0)
return -EINVAL;
- umem->pid = get_task_pid(current, PIDTYPE_PID);
umem->address = (unsigned long)addr;
umem->props.chunk_mask = ~((u64)chunk_size - 1);
umem->props.size = size;
@@ -328,7 +313,7 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
err = xdp_umem_account_pages(umem);
if (err)
- goto out;
+ return err;
err = xdp_umem_pin_pages(umem);
if (err)
@@ -347,8 +332,6 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
out_account:
xdp_umem_unaccount_pages(umem);
-out:
- put_pid(umem->pid);
return err;
}
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index be3520e429c9..790b514f86b6 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -346,6 +346,12 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
skb->sp->xvec[skb->sp->len++] = x;
+ skb_dst_force(skb);
+ if (!skb_dst(skb)) {
+ XFRM_INC_STATS(net, LINUX_MIB_XFRMINERROR);
+ goto drop;
+ }
+
lock:
spin_lock(&x->lock);
@@ -385,7 +391,6 @@ lock:
XFRM_SKB_CB(skb)->seq.input.low = seq;
XFRM_SKB_CB(skb)->seq.input.hi = seq_hi;
- skb_dst_force(skb);
dev_hold(skb->dev);
if (crypto_done)
diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c
index 6f05e831a73e..555ee2aca6c0 100644
--- a/net/xfrm/xfrm_interface.c
+++ b/net/xfrm/xfrm_interface.c
@@ -70,17 +70,28 @@ static struct xfrm_if *xfrmi_lookup(struct net *net, struct xfrm_state *x)
return NULL;
}
-static struct xfrm_if *xfrmi_decode_session(struct sk_buff *skb)
+static struct xfrm_if *xfrmi_decode_session(struct sk_buff *skb,
+ unsigned short family)
{
struct xfrmi_net *xfrmn;
- int ifindex;
struct xfrm_if *xi;
+ int ifindex = 0;
- if (!skb->dev)
+ if (!secpath_exists(skb) || !skb->dev)
return NULL;
- xfrmn = net_generic(dev_net(skb->dev), xfrmi_net_id);
- ifindex = skb->dev->ifindex;
+ switch (family) {
+ case AF_INET6:
+ ifindex = inet6_sdif(skb);
+ break;
+ case AF_INET:
+ ifindex = inet_sdif(skb);
+ break;
+ }
+ if (!ifindex)
+ ifindex = skb->dev->ifindex;
+
+ xfrmn = net_generic(xs_net(xfrm_input_state(skb)), xfrmi_net_id);
for_each_xfrmi_rcu(xfrmn->xfrmi[0], xi) {
if (ifindex == xi->dev->ifindex &&
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 261995d37ced..6d20fbcde000 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -102,6 +102,7 @@ static int xfrm_output_one(struct sk_buff *skb, int err)
skb_dst_force(skb);
if (!skb_dst(skb)) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR);
+ err = -EHOSTUNREACH;
goto error_nolock;
}
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 119a427d9b2b..ce1b262ce964 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1628,7 +1628,10 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
dst_copy_metrics(dst1, dst);
if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
- __u32 mark = xfrm_smark_get(fl->flowi_mark, xfrm[i]);
+ __u32 mark = 0;
+
+ if (xfrm[i]->props.smark.v || xfrm[i]->props.smark.m)
+ mark = xfrm_smark_get(fl->flowi_mark, xfrm[i]);
family = xfrm[i]->props.family;
dst = xfrm_dst_lookup(xfrm[i], tos, fl->flowi_oif,
@@ -2336,9 +2339,11 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
ifcb = xfrm_if_get_cb();
if (ifcb) {
- xi = ifcb->decode_session(skb);
- if (xi)
+ xi = ifcb->decode_session(skb, family);
+ if (xi) {
if_id = xi->p.if_id;
+ net = xi->net;
+ }
}
rcu_read_unlock();
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index b669262682c9..11e09eb138d6 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -426,7 +426,13 @@ static void xfrm_put_mode(struct xfrm_mode *mode)
module_put(mode->owner);
}
-static void xfrm_state_gc_destroy(struct xfrm_state *x)
+void xfrm_state_free(struct xfrm_state *x)
+{
+ kmem_cache_free(xfrm_state_cache, x);
+}
+EXPORT_SYMBOL(xfrm_state_free);
+
+static void ___xfrm_state_destroy(struct xfrm_state *x)
{
tasklet_hrtimer_cancel(&x->mtimer);
del_timer_sync(&x->rtimer);
@@ -452,7 +458,7 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x)
}
xfrm_dev_state_free(x);
security_xfrm_state_free(x);
- kmem_cache_free(xfrm_state_cache, x);
+ xfrm_state_free(x);
}
static void xfrm_state_gc_task(struct work_struct *work)
@@ -468,7 +474,7 @@ static void xfrm_state_gc_task(struct work_struct *work)
synchronize_rcu();
hlist_for_each_entry_safe(x, tmp, &gc_list, gclist)
- xfrm_state_gc_destroy(x);
+ ___xfrm_state_destroy(x);
}
static enum hrtimer_restart xfrm_timer_handler(struct hrtimer *me)
@@ -592,14 +598,19 @@ struct xfrm_state *xfrm_state_alloc(struct net *net)
}
EXPORT_SYMBOL(xfrm_state_alloc);
-void __xfrm_state_destroy(struct xfrm_state *x)
+void __xfrm_state_destroy(struct xfrm_state *x, bool sync)
{
WARN_ON(x->km.state != XFRM_STATE_DEAD);
- spin_lock_bh(&xfrm_state_gc_lock);
- hlist_add_head(&x->gclist, &xfrm_state_gc_list);
- spin_unlock_bh(&xfrm_state_gc_lock);
- schedule_work(&xfrm_state_gc_work);
+ if (sync) {
+ synchronize_rcu();
+ ___xfrm_state_destroy(x);
+ } else {
+ spin_lock_bh(&xfrm_state_gc_lock);
+ hlist_add_head(&x->gclist, &xfrm_state_gc_list);
+ spin_unlock_bh(&xfrm_state_gc_lock);
+ schedule_work(&xfrm_state_gc_work);
+ }
}
EXPORT_SYMBOL(__xfrm_state_destroy);
@@ -702,7 +713,7 @@ xfrm_dev_state_flush_secctx_check(struct net *net, struct net_device *dev, bool
}
#endif
-int xfrm_state_flush(struct net *net, u8 proto, bool task_valid)
+int xfrm_state_flush(struct net *net, u8 proto, bool task_valid, bool sync)
{
int i, err = 0, cnt = 0;
@@ -724,7 +735,10 @@ restart:
err = xfrm_state_delete(x);
xfrm_audit_state_delete(x, err ? 0 : 1,
task_valid);
- xfrm_state_put(x);
+ if (sync)
+ xfrm_state_put_sync(x);
+ else
+ xfrm_state_put(x);
if (!err)
cnt++;
@@ -788,7 +802,7 @@ void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si)
{
spin_lock_bh(&net->xfrm.xfrm_state_lock);
si->sadcnt = net->xfrm.state_num;
- si->sadhcnt = net->xfrm.state_hmask;
+ si->sadhcnt = net->xfrm.state_hmask + 1;
si->sadhmcnt = xfrm_state_hashmax;
spin_unlock_bh(&net->xfrm.xfrm_state_lock);
}
@@ -2211,7 +2225,7 @@ void xfrm_state_delete_tunnel(struct xfrm_state *x)
if (atomic_read(&t->tunnel_users) == 2)
xfrm_state_delete(t);
atomic_dec(&t->tunnel_users);
- xfrm_state_put(t);
+ xfrm_state_put_sync(t);
x->tunnel = NULL;
}
}
@@ -2371,8 +2385,8 @@ void xfrm_state_fini(struct net *net)
unsigned int sz;
flush_work(&net->xfrm.state_hash_work);
- xfrm_state_flush(net, IPSEC_PROTO_ANY, false);
flush_work(&xfrm_state_gc_work);
+ xfrm_state_flush(net, 0, false, true);
WARN_ON(!list_empty(&net->xfrm.state_all));
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index df7ca2dabc48..2122f89f6155 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1424,7 +1424,7 @@ static int verify_newpolicy_info(struct xfrm_userpolicy_info *p)
ret = verify_policy_dir(p->dir);
if (ret)
return ret;
- if (p->index && ((p->index & XFRM_POLICY_MAX) != p->dir))
+ if (p->index && (xfrm_policy_id2dir(p->index) != p->dir))
return -EINVAL;
return 0;
@@ -1488,10 +1488,15 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family)
if (!ut[i].family)
ut[i].family = family;
- if ((ut[i].mode == XFRM_MODE_TRANSPORT) &&
- (ut[i].family != prev_family))
- return -EINVAL;
-
+ switch (ut[i].mode) {
+ case XFRM_MODE_TUNNEL:
+ case XFRM_MODE_BEET:
+ break;
+ default:
+ if (ut[i].family != prev_family)
+ return -EINVAL;
+ break;
+ }
if (ut[i].mode >= XFRM_MODE_MAX)
return -EINVAL;
@@ -1508,20 +1513,8 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family)
return -EINVAL;
}
- switch (ut[i].id.proto) {
- case IPPROTO_AH:
- case IPPROTO_ESP:
- case IPPROTO_COMP:
-#if IS_ENABLED(CONFIG_IPV6)
- case IPPROTO_ROUTING:
- case IPPROTO_DSTOPTS:
-#endif
- case IPSEC_PROTO_ANY:
- break;
- default:
+ if (!xfrm_id_proto_valid(ut[i].id.proto))
return -EINVAL;
- }
-
}
return 0;
@@ -1927,7 +1920,7 @@ static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
struct xfrm_usersa_flush *p = nlmsg_data(nlh);
int err;
- err = xfrm_state_flush(net, p->proto, true);
+ err = xfrm_state_flush(net, p->proto, true, false);
if (err) {
if (err == -ESRCH) /* empty table */
return 0;
@@ -2288,13 +2281,13 @@ static int xfrm_add_acquire(struct sk_buff *skb, struct nlmsghdr *nlh,
}
- kfree(x);
+ xfrm_state_free(x);
kfree(xp);
return 0;
free_state:
- kfree(x);
+ xfrm_state_free(x);
nomem:
return err;
}