From aa395145165cb06a0d0885221bbe0ce4a564391d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 20 Apr 2010 13:03:51 +0000 Subject: net: sk_sleep() helper Define a new function to return the waitqueue of a "struct sock". static inline wait_queue_head_t *sk_sleep(struct sock *sk) { return sk->sk_sleep; } Change all read occurrences of sk_sleep by a call to this function. Needed for a future RCU conversion. sk_sleep wont be a field directly available. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/macvtap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/net/macvtap.c') diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index abba3cc81f12..85d6420f8404 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -246,8 +246,8 @@ static void macvtap_sock_write_space(struct sock *sk) !test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags)) return; - if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) - wake_up_interruptible_poll(sk->sk_sleep, POLLOUT | POLLWRNORM | POLLWRBAND); + if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) + wake_up_interruptible_poll(sk_sleep(sk), POLLOUT | POLLWRNORM | POLLWRBAND); } static int macvtap_open(struct inode *inode, struct file *file) -- cgit v1.2.3 From 4a4771a58e13b46bfdc999fe481e550f8c6937ff Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 25 Apr 2010 22:20:06 +0000 Subject: net: use sk_sleep() Commit aa395145 (net: sk_sleep() helper) missed three files in the conversion. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/macvtap.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/net/macvtap.c') diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 85d6420f8404..d97e1fd234ba 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -181,7 +181,7 @@ static int macvtap_forward(struct net_device *dev, struct sk_buff *skb) return -ENOLINK; skb_queue_tail(&q->sk.sk_receive_queue, skb); - wake_up_interruptible_poll(q->sk.sk_sleep, POLLIN | POLLRDNORM | POLLRDBAND); + wake_up_interruptible_poll(sk_sleep(&q->sk), POLLIN | POLLRDNORM | POLLRDBAND); return 0; } @@ -562,7 +562,7 @@ static ssize_t macvtap_do_read(struct macvtap_queue *q, struct kiocb *iocb, struct sk_buff *skb; ssize_t ret = 0; - add_wait_queue(q->sk.sk_sleep, &wait); + add_wait_queue(sk_sleep(&q->sk), &wait); while (len) { current->state = TASK_INTERRUPTIBLE; @@ -587,7 +587,7 @@ static ssize_t macvtap_do_read(struct macvtap_queue *q, struct kiocb *iocb, } current->state = TASK_RUNNING; - remove_wait_queue(q->sk.sk_sleep, &wait); + remove_wait_queue(sk_sleep(&q->sk), &wait); return ret; } -- cgit v1.2.3 From 43815482370c510c569fd18edb57afcb0fa8cab6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 29 Apr 2010 11:01:49 +0000 Subject: net: sock_def_readable() and friends RCU conversion sk_callback_lock rwlock actually protects sk->sk_sleep pointer, so we need two atomic operations (and associated dirtying) per incoming packet. RCU conversion is pretty much needed : 1) Add a new structure, called "struct socket_wq" to hold all fields that will need rcu_read_lock() protection (currently: a wait_queue_head_t and a struct fasync_struct pointer). [Future patch will add a list anchor for wakeup coalescing] 2) Attach one of such structure to each "struct socket" created in sock_alloc_inode(). 3) Respect RCU grace period when freeing a "struct socket_wq" 4) Change sk_sleep pointer in "struct sock" by sk_wq, pointer to "struct socket_wq" 5) Change sk_sleep() function to use new sk->sk_wq instead of sk->sk_sleep 6) Change sk_has_sleeper() to wq_has_sleeper() that must be used inside a rcu_read_lock() section. 7) Change all sk_has_sleeper() callers to : - Use rcu_read_lock() instead of read_lock(&sk->sk_callback_lock) - Use wq_has_sleeper() to eventually wakeup tasks. - Use rcu_read_unlock() instead of read_unlock(&sk->sk_callback_lock) 8) sock_wake_async() is modified to use rcu protection as well. 9) Exceptions : macvtap, drivers/net/tun.c, af_unix use integrated "struct socket_wq" instead of dynamically allocated ones. They dont need rcu freeing. Some cleanups or followups are probably needed, (possible sk_callback_lock conversion to a spinlock for example...). Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/macvtap.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'drivers/net/macvtap.c') diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index d97e1fd234ba..1c4110df343e 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -37,6 +37,7 @@ struct macvtap_queue { struct sock sk; struct socket sock; + struct socket_wq wq; struct macvlan_dev *vlan; struct file *file; unsigned int flags; @@ -242,12 +243,15 @@ static struct rtnl_link_ops macvtap_link_ops __read_mostly = { static void macvtap_sock_write_space(struct sock *sk) { + wait_queue_head_t *wqueue; + if (!sock_writeable(sk) || !test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags)) return; - if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) - wake_up_interruptible_poll(sk_sleep(sk), POLLOUT | POLLWRNORM | POLLWRBAND); + wqueue = sk_sleep(sk); + if (wqueue && waitqueue_active(wqueue)) + wake_up_interruptible_poll(wqueue, POLLOUT | POLLWRNORM | POLLWRBAND); } static int macvtap_open(struct inode *inode, struct file *file) @@ -272,7 +276,8 @@ static int macvtap_open(struct inode *inode, struct file *file) if (!q) goto out; - init_waitqueue_head(&q->sock.wait); + q->sock.wq = &q->wq; + init_waitqueue_head(&q->wq.wait); q->sock.type = SOCK_RAW; q->sock.state = SS_CONNECTED; q->sock.file = file; @@ -308,7 +313,7 @@ static unsigned int macvtap_poll(struct file *file, poll_table * wait) goto out; mask = 0; - poll_wait(file, &q->sock.wait, wait); + poll_wait(file, &q->wq.wait, wait); if (!skb_queue_empty(&q->sk.sk_receive_queue)) mask |= POLLIN | POLLRDNORM; -- cgit v1.2.3 From 55afbd0810922afe456f9e4e3abc84d69d3f8a15 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 29 Apr 2010 13:50:48 +0300 Subject: macvtap: add ioctl to modify vnet header size This adds TUNSETVNETHDRSZ/TUNGETVNETHDRSZ support to macvtap. Signed-off-by: Michael S. Tsirkin Acked-by: Arnd Bergmann Acked-by: David S. Miller --- drivers/net/macvtap.c | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) (limited to 'drivers/net/macvtap.c') diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 1c4110df343e..a8a94e2f6ddc 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -38,6 +38,7 @@ struct macvtap_queue { struct sock sk; struct socket sock; struct socket_wq wq; + int vnet_hdr_sz; struct macvlan_dev *vlan; struct file *file; unsigned int flags; @@ -285,6 +286,7 @@ static int macvtap_open(struct inode *inode, struct file *file) sock_init_data(&q->sock, &q->sk); q->sk.sk_write_space = macvtap_sock_write_space; q->flags = IFF_VNET_HDR | IFF_NO_PI | IFF_TAP; + q->vnet_hdr_sz = sizeof(struct virtio_net_hdr); err = macvtap_set_queue(dev, file, q); if (err) @@ -445,14 +447,14 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, int vnet_hdr_len = 0; if (q->flags & IFF_VNET_HDR) { - vnet_hdr_len = sizeof(vnet_hdr); + vnet_hdr_len = q->vnet_hdr_sz; err = -EINVAL; if ((len -= vnet_hdr_len) < 0) goto err; err = memcpy_fromiovecend((void *)&vnet_hdr, iv, 0, - vnet_hdr_len); + sizeof(vnet_hdr)); if (err < 0) goto err; if ((vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && @@ -534,7 +536,7 @@ static ssize_t macvtap_put_user(struct macvtap_queue *q, if (q->flags & IFF_VNET_HDR) { struct virtio_net_hdr vnet_hdr; - vnet_hdr_len = sizeof (vnet_hdr); + vnet_hdr_len = q->vnet_hdr_sz; if ((len -= vnet_hdr_len) < 0) return -EINVAL; @@ -542,7 +544,7 @@ static ssize_t macvtap_put_user(struct macvtap_queue *q, if (ret) return ret; - if (memcpy_toiovecend(iv, (void *)&vnet_hdr, 0, vnet_hdr_len)) + if (memcpy_toiovecend(iv, (void *)&vnet_hdr, 0, sizeof(vnet_hdr))) return -EFAULT; } @@ -627,6 +629,8 @@ static long macvtap_ioctl(struct file *file, unsigned int cmd, struct ifreq __user *ifr = argp; unsigned int __user *up = argp; unsigned int u; + int __user *sp = argp; + int s; int ret; switch (cmd) { @@ -672,6 +676,21 @@ static long macvtap_ioctl(struct file *file, unsigned int cmd, q->sk.sk_sndbuf = u; return 0; + case TUNGETVNETHDRSZ: + s = q->vnet_hdr_sz; + if (put_user(s, sp)) + return -EFAULT; + return 0; + + case TUNSETVNETHDRSZ: + if (get_user(s, sp)) + return -EFAULT; + if (s < (int)sizeof(struct virtio_net_hdr)) + return -EINVAL; + + q->vnet_hdr_sz = s; + return 0; + case TUNSETOFFLOAD: /* let the user check for future flags */ if (arg & ~(TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | -- cgit v1.2.3