From f9242b6b28d61295f2bf7e8adfb1060b382e5381 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 19 Jun 2012 18:56:21 -0700 Subject: inet: Sanitize inet{,6} protocol demux. Don't pretend that inet_protos[] and inet6_protos[] are hashes, thay are just a straight arrays. Remove all unnecessary hash masking. Document MAX_INET_PROTOS. Use RAW_HTABLE_SIZE when appropriate. Reported-by: Ben Hutchings Signed-off-by: David S. Miller --- include/net/protocol.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/net/protocol.h') diff --git a/include/net/protocol.h b/include/net/protocol.h index 875f4895b033..a1b1b530c338 100644 --- a/include/net/protocol.h +++ b/include/net/protocol.h @@ -29,8 +29,11 @@ #include #endif -#define MAX_INET_PROTOS 256 /* Must be a power of 2 */ - +/* This is one larger than the largest protocol value that can be + * found in an ipv4 or ipv6 header. Since in both cases the protocol + * value is presented in a __u8, this is defined to be 256. + */ +#define MAX_INET_PROTOS 256 /* This is used to register protocols. */ struct net_protocol { -- cgit v1.2.3 From 41063e9dd11956f2d285e12e4342e1d232ba0ea2 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 19 Jun 2012 21:22:05 -0700 Subject: ipv4: Early TCP socket demux. Input packet processing for local sockets involves two major demuxes. One for the route and one for the socket. But we can optimize this down to one demux for certain kinds of local sockets. Currently we only do this for established TCP sockets, but it could at least in theory be expanded to other kinds of connections. If a TCP socket is established then it's identity is fully specified. This means that whatever input route was used during the three-way handshake must work equally well for the rest of the connection since the keys will not change. Once we move to established state, we cache the receive packet's input route to use later. Like the existing cached route in sk->sk_dst_cache used for output packets, we have to check for route invalidations using dst->obsolete and dst->ops->check(). Early demux occurs outside of a socket locked section, so when a route invalidation occurs we defer the fixup of sk->sk_rx_dst until we are actually inside of established state packet processing and thus have the socket locked. Signed-off-by: David S. Miller --- include/net/protocol.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net/protocol.h') diff --git a/include/net/protocol.h b/include/net/protocol.h index a1b1b530c338..967b926cbfb1 100644 --- a/include/net/protocol.h +++ b/include/net/protocol.h @@ -37,6 +37,7 @@ /* This is used to register protocols. */ struct net_protocol { + int (*early_demux)(struct sk_buff *skb); int (*handler)(struct sk_buff *skb); void (*err_handler)(struct sk_buff *skb, u32 info); int (*gso_send_check)(struct sk_buff *skb); -- cgit v1.2.3 From c074da2810c118b3812f32d6754bd9ead2f169e7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 26 Jun 2012 23:14:15 +0000 Subject: ipv4: tcp: dont cache unconfirmed intput dst DDOS synflood attacks hit badly IP route cache. On typical machines, this cache is allowed to hold up to 8 Millions dst entries, 256 bytes for each, for a total of 2GB of memory. rt_garbage_collect() triggers and tries to cleanup things. Eventually route cache is disabled but machine is under fire and might OOM and crash. This patch exploits the new TCP early demux, to set a nocache boolean in case incoming TCP frame is for a not yet ESTABLISHED or TIMEWAIT socket. This 'nocache' boolean is then used in case dst entry is not found in route cache, to create an unhashed dst entry (DST_NOCACHE) SYN-cookie-ACK sent use a similar mechanism (ipv4: tcp: dont cache output dst for syncookies), so after this patch, a machine is able to absorb a DDOS synflood attack without polluting its IP route cache. Signed-off-by: Eric Dumazet Cc: Hans Schillstrom Signed-off-by: David S. Miller --- include/net/protocol.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net/protocol.h') diff --git a/include/net/protocol.h b/include/net/protocol.h index 967b926cbfb1..7cfc8f76914d 100644 --- a/include/net/protocol.h +++ b/include/net/protocol.h @@ -37,7 +37,7 @@ /* This is used to register protocols. */ struct net_protocol { - int (*early_demux)(struct sk_buff *skb); + int (*early_demux)(struct sk_buff *skb, bool *nocache); int (*handler)(struct sk_buff *skb); void (*err_handler)(struct sk_buff *skb, u32 info); int (*gso_send_check)(struct sk_buff *skb); -- cgit v1.2.3 From c10237e077cef50e925f052e49f3b4fead9d71f9 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 27 Jun 2012 17:05:06 -0700 Subject: Revert "ipv4: tcp: dont cache unconfirmed intput dst" This reverts commit c074da2810c118b3812f32d6754bd9ead2f169e7. This change has several unwanted side effects: 1) Sockets will cache the DST_NOCACHE route in sk->sk_rx_dst and we'll thus never create a real cached route. 2) All TCP traffic will use DST_NOCACHE and never use the routing cache at all. Signed-off-by: David S. Miller --- include/net/protocol.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net/protocol.h') diff --git a/include/net/protocol.h b/include/net/protocol.h index 7cfc8f76914d..967b926cbfb1 100644 --- a/include/net/protocol.h +++ b/include/net/protocol.h @@ -37,7 +37,7 @@ /* This is used to register protocols. */ struct net_protocol { - int (*early_demux)(struct sk_buff *skb, bool *nocache); + int (*early_demux)(struct sk_buff *skb); int (*handler)(struct sk_buff *skb); void (*err_handler)(struct sk_buff *skb, u32 info); int (*gso_send_check)(struct sk_buff *skb); -- cgit v1.2.3 From 160eb5a6b14ca2eab5c598bdbbb24c24624bad34 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 27 Jun 2012 22:01:22 -0700 Subject: ipv4: Kill early demux method return value. It's completely unnecessary. Signed-off-by: David S. Miller --- include/net/protocol.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net/protocol.h') diff --git a/include/net/protocol.h b/include/net/protocol.h index 967b926cbfb1..057f2d315567 100644 --- a/include/net/protocol.h +++ b/include/net/protocol.h @@ -37,7 +37,7 @@ /* This is used to register protocols. */ struct net_protocol { - int (*early_demux)(struct sk_buff *skb); + void (*early_demux)(struct sk_buff *skb); int (*handler)(struct sk_buff *skb); void (*err_handler)(struct sk_buff *skb, u32 info); int (*gso_send_check)(struct sk_buff *skb); -- cgit v1.2.3 From c7109986db3c945f50ceed884a30e0fd8af3b89b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 26 Jul 2012 12:18:11 +0000 Subject: ipv6: Early TCP socket demux This is the IPv6 missing bits for infrastructure added in commit 41063e9dd1195 (ipv4: Early TCP socket demux.) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/protocol.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/net/protocol.h') diff --git a/include/net/protocol.h b/include/net/protocol.h index 057f2d315567..929528c73fe8 100644 --- a/include/net/protocol.h +++ b/include/net/protocol.h @@ -52,6 +52,8 @@ struct net_protocol { #if IS_ENABLED(CONFIG_IPV6) struct inet6_protocol { + void (*early_demux)(struct sk_buff *skb); + int (*handler)(struct sk_buff *skb); void (*err_handler)(struct sk_buff *skb, -- cgit v1.2.3