summaryrefslogtreecommitdiff
path: root/net/ipv4/tcp.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp.c')
-rw-r--r--net/ipv4/tcp.c119
1 files changed, 58 insertions, 61 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index a037bafcba3c..882436da9a3a 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -271,7 +271,6 @@ int sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT;
DEFINE_SNMP_STAT(struct tcp_mib, tcp_statistics);
-kmem_cache_t *tcp_openreq_cachep;
kmem_cache_t *tcp_bucket_cachep;
kmem_cache_t *tcp_timewait_cachep;
@@ -317,7 +316,7 @@ EXPORT_SYMBOL(tcp_enter_memory_pressure);
static __inline__ unsigned int tcp_listen_poll(struct sock *sk,
poll_table *wait)
{
- return tcp_sk(sk)->accept_queue ? (POLLIN | POLLRDNORM) : 0;
+ return !reqsk_queue_empty(&tcp_sk(sk)->accept_queue) ? (POLLIN | POLLRDNORM) : 0;
}
/*
@@ -463,28 +462,15 @@ int tcp_listen_start(struct sock *sk)
{
struct inet_sock *inet = inet_sk(sk);
struct tcp_sock *tp = tcp_sk(sk);
- struct tcp_listen_opt *lopt;
+ int rc = reqsk_queue_alloc(&tp->accept_queue, TCP_SYNQ_HSIZE);
+
+ if (rc != 0)
+ return rc;
sk->sk_max_ack_backlog = 0;
sk->sk_ack_backlog = 0;
- tp->accept_queue = tp->accept_queue_tail = NULL;
- rwlock_init(&tp->syn_wait_lock);
tcp_delack_init(tp);
- lopt = kmalloc(sizeof(struct tcp_listen_opt), GFP_KERNEL);
- if (!lopt)
- return -ENOMEM;
-
- memset(lopt, 0, sizeof(struct tcp_listen_opt));
- for (lopt->max_qlen_log = 6; ; lopt->max_qlen_log++)
- if ((1 << lopt->max_qlen_log) >= sysctl_max_syn_backlog)
- break;
- get_random_bytes(&lopt->hash_rnd, 4);
-
- write_lock_bh(&tp->syn_wait_lock);
- tp->listen_opt = lopt;
- write_unlock_bh(&tp->syn_wait_lock);
-
/* There is race window here: we announce ourselves listening,
* but this transition is still not validated by get_port().
* It is OK, because this socket enters to hash table only
@@ -501,10 +487,7 @@ int tcp_listen_start(struct sock *sk)
}
sk->sk_state = TCP_CLOSE;
- write_lock_bh(&tp->syn_wait_lock);
- tp->listen_opt = NULL;
- write_unlock_bh(&tp->syn_wait_lock);
- kfree(lopt);
+ reqsk_queue_destroy(&tp->accept_queue);
return -EADDRINUSE;
}
@@ -516,25 +499,23 @@ int tcp_listen_start(struct sock *sk)
static void tcp_listen_stop (struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
- struct tcp_listen_opt *lopt = tp->listen_opt;
- struct open_request *acc_req = tp->accept_queue;
- struct open_request *req;
+ struct listen_sock *lopt;
+ struct request_sock *acc_req;
+ struct request_sock *req;
int i;
tcp_delete_keepalive_timer(sk);
/* make all the listen_opt local to us */
- write_lock_bh(&tp->syn_wait_lock);
- tp->listen_opt = NULL;
- write_unlock_bh(&tp->syn_wait_lock);
- tp->accept_queue = tp->accept_queue_tail = NULL;
+ lopt = reqsk_queue_yank_listen_sk(&tp->accept_queue);
+ acc_req = reqsk_queue_yank_acceptq(&tp->accept_queue);
if (lopt->qlen) {
for (i = 0; i < TCP_SYNQ_HSIZE; i++) {
while ((req = lopt->syn_table[i]) != NULL) {
lopt->syn_table[i] = req->dl_next;
lopt->qlen--;
- tcp_openreq_free(req);
+ reqsk_free(req);
/* Following specs, it would be better either to send FIN
* (and enter FIN-WAIT-1, it is normal close)
@@ -574,7 +555,7 @@ static void tcp_listen_stop (struct sock *sk)
sock_put(child);
sk_acceptq_removed(sk);
- tcp_openreq_fastfree(req);
+ __reqsk_free(req);
}
BUG_TRAP(!sk->sk_ack_backlog);
}
@@ -1345,7 +1326,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
cleanup_rbuf(sk, copied);
- if (tp->ucopy.task == user_recv) {
+ if (!sysctl_tcp_low_latency && tp->ucopy.task == user_recv) {
/* Install new reader */
if (!user_recv && !(flags & (MSG_TRUNC | MSG_PEEK))) {
user_recv = current;
@@ -1868,11 +1849,11 @@ static int wait_for_connect(struct sock *sk, long timeo)
prepare_to_wait_exclusive(sk->sk_sleep, &wait,
TASK_INTERRUPTIBLE);
release_sock(sk);
- if (!tp->accept_queue)
+ if (reqsk_queue_empty(&tp->accept_queue))
timeo = schedule_timeout(timeo);
lock_sock(sk);
err = 0;
- if (tp->accept_queue)
+ if (!reqsk_queue_empty(&tp->accept_queue))
break;
err = -EINVAL;
if (sk->sk_state != TCP_LISTEN)
@@ -1895,7 +1876,6 @@ static int wait_for_connect(struct sock *sk, long timeo)
struct sock *tcp_accept(struct sock *sk, int flags, int *err)
{
struct tcp_sock *tp = tcp_sk(sk);
- struct open_request *req;
struct sock *newsk;
int error;
@@ -1906,37 +1886,31 @@ struct sock *tcp_accept(struct sock *sk, int flags, int *err)
*/
error = -EINVAL;
if (sk->sk_state != TCP_LISTEN)
- goto out;
+ goto out_err;
/* Find already established connection */
- if (!tp->accept_queue) {
+ if (reqsk_queue_empty(&tp->accept_queue)) {
long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
/* If this is a non blocking socket don't sleep */
error = -EAGAIN;
if (!timeo)
- goto out;
+ goto out_err;
error = wait_for_connect(sk, timeo);
if (error)
- goto out;
+ goto out_err;
}
- req = tp->accept_queue;
- if ((tp->accept_queue = req->dl_next) == NULL)
- tp->accept_queue_tail = NULL;
-
- newsk = req->sk;
- sk_acceptq_removed(sk);
- tcp_openreq_fastfree(req);
+ newsk = reqsk_queue_get_child(&tp->accept_queue, sk);
BUG_TRAP(newsk->sk_state != TCP_SYN_RECV);
- release_sock(sk);
- return newsk;
-
out:
release_sock(sk);
+ return newsk;
+out_err:
+ newsk = NULL;
*err = error;
- return NULL;
+ goto out;
}
/*
@@ -1953,6 +1927,25 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
return tp->af_specific->setsockopt(sk, level, optname,
optval, optlen);
+ /* This is a string value all the others are int's */
+ if (optname == TCP_CONGESTION) {
+ char name[TCP_CA_NAME_MAX];
+
+ if (optlen < 1)
+ return -EINVAL;
+
+ val = strncpy_from_user(name, optval,
+ min(TCP_CA_NAME_MAX-1, optlen));
+ if (val < 0)
+ return -EFAULT;
+ name[val] = 0;
+
+ lock_sock(sk);
+ err = tcp_set_congestion_control(tp, name);
+ release_sock(sk);
+ return err;
+ }
+
if (optlen < sizeof(int))
return -EINVAL;
@@ -2237,6 +2230,16 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
case TCP_QUICKACK:
val = !tp->ack.pingpong;
break;
+
+ case TCP_CONGESTION:
+ if (get_user(len, optlen))
+ return -EFAULT;
+ len = min_t(unsigned int, len, TCP_CA_NAME_MAX);
+ if (put_user(len, optlen))
+ return -EFAULT;
+ if (copy_to_user(optval, tp->ca_ops->name, len))
+ return -EFAULT;
+ return 0;
default:
return -ENOPROTOOPT;
};
@@ -2250,7 +2253,7 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
extern void __skb_cb_too_small_for_tcp(int, int);
-extern void tcpdiag_init(void);
+extern struct tcp_congestion_ops tcp_reno;
static __initdata unsigned long thash_entries;
static int __init set_thash_entries(char *str)
@@ -2271,13 +2274,6 @@ void __init tcp_init(void)
__skb_cb_too_small_for_tcp(sizeof(struct tcp_skb_cb),
sizeof(skb->cb));
- tcp_openreq_cachep = kmem_cache_create("tcp_open_request",
- sizeof(struct open_request),
- 0, SLAB_HWCACHE_ALIGN,
- NULL, NULL);
- if (!tcp_openreq_cachep)
- panic("tcp_init: Cannot alloc open_request cache.");
-
tcp_bucket_cachep = kmem_cache_create("tcp_bind_bucket",
sizeof(struct tcp_bind_bucket),
0, SLAB_HWCACHE_ALIGN,
@@ -2338,7 +2334,7 @@ void __init tcp_init(void)
(tcp_bhash_size * sizeof(struct tcp_bind_hashbucket));
order++)
;
- if (order > 4) {
+ if (order >= 4) {
sysctl_local_port_range[0] = 32768;
sysctl_local_port_range[1] = 61000;
sysctl_tcp_max_tw_buckets = 180000;
@@ -2366,6 +2362,8 @@ void __init tcp_init(void)
printk(KERN_INFO "TCP: Hash tables configured "
"(established %d bind %d)\n",
tcp_ehash_size << 1, tcp_bhash_size);
+
+ tcp_register_congestion_control(&tcp_reno);
}
EXPORT_SYMBOL(tcp_accept);
@@ -2374,7 +2372,6 @@ EXPORT_SYMBOL(tcp_destroy_sock);
EXPORT_SYMBOL(tcp_disconnect);
EXPORT_SYMBOL(tcp_getsockopt);
EXPORT_SYMBOL(tcp_ioctl);
-EXPORT_SYMBOL(tcp_openreq_cachep);
EXPORT_SYMBOL(tcp_poll);
EXPORT_SYMBOL(tcp_read_sock);
EXPORT_SYMBOL(tcp_recvmsg);