From ee89bab14e857678f83a71ee99e575b0fdbb58d4 Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Thu, 9 Aug 2012 22:14:56 +0000 Subject: net: move and rename netif_notify_peers() I believe net/core/dev.c is a better place for netif_notify_peers(), because other net event notify functions also stay in this file. And rename it to netdev_notify_peers(). Cc: David S. Miller Cc: Ian Campbell Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index a9db4f33407f..8d4b7316c734 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2246,8 +2246,6 @@ extern void netif_carrier_on(struct net_device *dev); extern void netif_carrier_off(struct net_device *dev); -extern void netif_notify_peers(struct net_device *dev); - /** * netif_dormant_on - mark device as dormant. * @dev: network device @@ -2596,6 +2594,7 @@ extern void __dev_set_rx_mode(struct net_device *dev); extern int dev_set_promiscuity(struct net_device *dev, int inc); extern int dev_set_allmulti(struct net_device *dev, int inc); extern void netdev_state_change(struct net_device *dev); +extern void netdev_notify_peers(struct net_device *dev); extern int netdev_bonding_change(struct net_device *dev, unsigned long event); extern void netdev_features_change(struct net_device *dev); -- cgit v1.2.3 From b7bc2a5b5bd99b216c3e5fe68c7f45c684ab5745 Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Thu, 9 Aug 2012 22:14:57 +0000 Subject: net: remove netdev_bonding_change() I don't see any benifits to use netdev_bonding_change() than using call_netdevice_notifiers() directly. Cc: David S. Miller Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 8d4b7316c734..1d6ab69c1f3f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2595,8 +2595,6 @@ extern int dev_set_promiscuity(struct net_device *dev, int inc); extern int dev_set_allmulti(struct net_device *dev, int inc); extern void netdev_state_change(struct net_device *dev); extern void netdev_notify_peers(struct net_device *dev); -extern int netdev_bonding_change(struct net_device *dev, - unsigned long event); extern void netdev_features_change(struct net_device *dev); /* Load a device via the kmod */ extern void dev_load(struct net *net, const char *name); -- cgit v1.2.3 From 0115e8e30d6fcdd4b8faa30d3ffd90859a591f51 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 22 Aug 2012 17:19:46 +0000 Subject: net: remove delay at device dismantle I noticed extra one second delay in device dismantle, tracked down to a call to dst_dev_event() while some call_rcu() are still in RCU queues. These call_rcu() were posted by rt_free(struct rtable *rt) calls. We then wait a little (but one second) in netdev_wait_allrefs() before kicking again NETDEV_UNREGISTER. As the call_rcu() are now completed, dst_dev_event() can do the needed device swap on busy dst. To solve this problem, add a new NETDEV_UNREGISTER_FINAL, called after a rcu_barrier(), but outside of RTNL lock. Use NETDEV_UNREGISTER_FINAL with care ! Change dst_dev_event() handler to react to NETDEV_UNREGISTER_FINAL Also remove NETDEV_UNREGISTER_BATCH, as its not used anymore after IP cache removal. With help from Gao feng Signed-off-by: Eric Dumazet Cc: Tom Herbert Cc: Mahesh Bandewar Cc: "Eric W. Biederman" Cc: Gao feng Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 4936f09a9333..9ad7fa8c10e0 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1553,7 +1553,7 @@ struct packet_type { #define NETDEV_PRE_TYPE_CHANGE 0x000E #define NETDEV_POST_TYPE_CHANGE 0x000F #define NETDEV_POST_INIT 0x0010 -#define NETDEV_UNREGISTER_BATCH 0x0011 +#define NETDEV_UNREGISTER_FINAL 0x0011 #define NETDEV_RELEASE 0x0012 #define NETDEV_NOTIFY_PEERS 0x0013 #define NETDEV_JOIN 0x0014 -- cgit v1.2.3 From 8f4cccbbd92f2ad0ddbbc498ef7cee2a1c3defe9 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Mon, 20 Aug 2012 22:16:51 +0100 Subject: net: Set device operstate at registration time The operstate of a device is initially IF_OPER_UNKNOWN and is updated asynchronously by linkwatch after each change of carrier state reported by the driver. The default carrier state of a net device is on, and this will never be changed on drivers that do not support carrier detection, thus the operstate remains IF_OPER_UNKNOWN. For devices that do support carrier detection, the driver must set the carrier state to off initially, then poll the hardware state when the device is opened. However, we must not activate linkwatch for a unregistered device, and commit b473001 ('net: Do not fire linkwatch events until the device is registered.') ensured that we don't. But this means that the operstate for many devices that support carrier detection remains IF_OPER_UNKNOWN when it should be IF_OPER_DOWN. The same issue exists with the dormant state. The proper initialisation sequence, avoiding a race with opening of the device, is: rtnl_lock(); rc = register_netdevice(dev); if (rc) goto out_unlock; netif_carrier_off(dev); /* or netif_dormant_on(dev) */ rtnl_unlock(); but it seems silly that this should have to be repeated in so many drivers. Further, the operstate seen immediately after opening the device may still be IF_OPER_UNKNOWN due to the asynchronous nature of linkwatch. Commit 22604c8 ('net: Fix for initial link state in 2.6.28') attempted to fix this by setting the operstate synchronously, but it was reverted as it could lead to deadlock. This initialises the operstate synchronously at registration time only. Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9ad7fa8c10e0..ccac82e61604 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2227,6 +2227,7 @@ static inline void dev_hold(struct net_device *dev) * kind of lower layer not just hardware media. */ +extern void linkwatch_init_dev(struct net_device *dev); extern void linkwatch_fire_event(struct net_device *dev); extern void linkwatch_forget_dev(struct net_device *dev); -- cgit v1.2.3 From 23d3b8bfb8eb20e7d96afa09991e6a5ed1c83164 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 5 Sep 2012 01:02:56 +0000 Subject: net: qdisc busylock needs lockdep annotations It seems we need to provide ability for stacked devices to use specific lock_class_key for sch->busylock We could instead default l2tpeth tx_queue_len to 0 (no qdisc), but a user might use a qdisc anyway. (So same fixes are probably needed on non LLTX stacked drivers) Noticed while stressing L2TPV3 setup : ====================================================== [ INFO: possible circular locking dependency detected ] 3.6.0-rc3+ #788 Not tainted ------------------------------------------------------- netperf/4660 is trying to acquire lock: (l2tpsock){+.-...}, at: [] l2tp_xmit_skb+0x172/0xa50 [l2tp_core] but task is already holding lock: (&(&sch->busylock)->rlock){+.-...}, at: [] dev_queue_xmit+0xd75/0xe00 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&(&sch->busylock)->rlock){+.-...}: [] lock_acquire+0x90/0x200 [] _raw_spin_lock_irqsave+0x4c/0x60 [] __wake_up+0x32/0x70 [] tty_wakeup+0x3e/0x80 [] pty_write+0x73/0x80 [] tty_put_char+0x3c/0x40 [] process_echoes+0x142/0x330 [] n_tty_receive_buf+0x8fb/0x1230 [] flush_to_ldisc+0x142/0x1c0 [] process_one_work+0x198/0x760 [] worker_thread+0x186/0x4b0 [] kthread+0x93/0xa0 [] kernel_thread_helper+0x4/0x10 -> #0 (l2tpsock){+.-...}: [] __lock_acquire+0x1628/0x1b10 [] lock_acquire+0x90/0x200 [] _raw_spin_lock+0x41/0x50 [] l2tp_xmit_skb+0x172/0xa50 [l2tp_core] [] l2tp_eth_dev_xmit+0x32/0x60 [l2tp_eth] [] dev_hard_start_xmit+0x502/0xa70 [] sch_direct_xmit+0xfe/0x290 [] dev_queue_xmit+0x1e5/0xe00 [] ip_finish_output+0x3d0/0x890 [] ip_output+0x59/0xf0 [] ip_local_out+0x2d/0xa0 [] ip_queue_xmit+0x1c3/0x680 [] tcp_transmit_skb+0x402/0xa60 [] tcp_write_xmit+0x1f4/0xa30 [] tcp_push_one+0x30/0x40 [] tcp_sendmsg+0xe82/0x1040 [] inet_sendmsg+0x125/0x230 [] sock_sendmsg+0xdc/0xf0 [] sys_sendto+0xfe/0x130 [] system_call_fastpath+0x16/0x1b Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&(&sch->busylock)->rlock); lock(l2tpsock); lock(&(&sch->busylock)->rlock); lock(l2tpsock); *** DEADLOCK *** 5 locks held by netperf/4660: #0: (sk_lock-AF_INET){+.+.+.}, at: [] tcp_sendmsg+0x2c/0x1040 #1: (rcu_read_lock){.+.+..}, at: [] ip_queue_xmit+0x0/0x680 #2: (rcu_read_lock_bh){.+....}, at: [] ip_finish_output+0x135/0x890 #3: (rcu_read_lock_bh){.+....}, at: [] dev_queue_xmit+0x0/0xe00 #4: (&(&sch->busylock)->rlock){+.-...}, at: [] dev_queue_xmit+0xd75/0xe00 stack backtrace: Pid: 4660, comm: netperf Not tainted 3.6.0-rc3+ #788 Call Trace: [] print_circular_bug+0x1fb/0x20c [] __lock_acquire+0x1628/0x1b10 [] ? check_usage+0x9b/0x4d0 [] ? __lock_acquire+0x2e4/0x1b10 [] lock_acquire+0x90/0x200 [] ? l2tp_xmit_skb+0x172/0xa50 [l2tp_core] [] _raw_spin_lock+0x41/0x50 [] ? l2tp_xmit_skb+0x172/0xa50 [l2tp_core] [] l2tp_xmit_skb+0x172/0xa50 [l2tp_core] [] l2tp_eth_dev_xmit+0x32/0x60 [l2tp_eth] [] dev_hard_start_xmit+0x502/0xa70 [] ? dev_hard_start_xmit+0x5e/0xa70 [] ? dev_queue_xmit+0x141/0xe00 [] sch_direct_xmit+0xfe/0x290 [] dev_queue_xmit+0x1e5/0xe00 [] ? dev_hard_start_xmit+0xa70/0xa70 [] ip_finish_output+0x3d0/0x890 [] ? ip_finish_output+0x135/0x890 [] ip_output+0x59/0xf0 [] ip_local_out+0x2d/0xa0 [] ip_queue_xmit+0x1c3/0x680 [] ? ip_local_out+0xa0/0xa0 [] tcp_transmit_skb+0x402/0xa60 [] ? tcp_md5_do_lookup+0x18e/0x1a0 [] tcp_write_xmit+0x1f4/0xa30 [] tcp_push_one+0x30/0x40 [] tcp_sendmsg+0xe82/0x1040 [] inet_sendmsg+0x125/0x230 [] ? inet_create+0x6b0/0x6b0 [] ? sock_update_classid+0xc2/0x3b0 [] ? sock_update_classid+0x130/0x3b0 [] sock_sendmsg+0xdc/0xf0 [] ? fget_light+0x3f9/0x4f0 [] sys_sendto+0xfe/0x130 [] ? trace_hardirqs_on+0xd/0x10 [] ? _raw_spin_unlock_irq+0x30/0x50 [] ? finish_task_switch+0x83/0xf0 [] ? finish_task_switch+0x46/0xf0 [] ? sysret_check+0x1b/0x56 [] system_call_fastpath+0x16/0x1b Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ccac82e61604..ae3153c0db0a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1322,6 +1322,8 @@ struct net_device { /* phy device may attach itself for hardware timestamping */ struct phy_device *phydev; + struct lock_class_key *qdisc_tx_busylock; + /* group the device belongs to */ int group; -- cgit v1.2.3 From b004ff4972e2a42aa4512c90cc6a9e4dc1bb36b6 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 12 Sep 2012 20:12:19 -0700 Subject: netdev_printk/dynamic_netdev_dbg: Directly call printk_emit A lot of stack is used in recursive printks with %pV. Using multiple levels of %pV (a logging function with %pV that calls another logging function with %pV) can consume more stack than necessary. Avoid excessive stack use by not calling dev_printk from netdev_printk and dynamic_netdev_dbg. Duplicate the logic and form of dev_printk instead. Make __netdev_printk static. Remove EXPORT_SYMBOL(__netdev_printk) Whitespace and brace style neatening. Signed-off-by: Joe Perches Acked-by: David S. Miller Tested-by: Jim Cromie Acked-by: Jason Baron Signed-off-by: Greg Kroah-Hartman --- include/linux/netdevice.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 59dc05f38247..5f49cc0a107e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2720,9 +2720,6 @@ static inline const char *netdev_name(const struct net_device *dev) return dev->name; } -extern int __netdev_printk(const char *level, const struct net_device *dev, - struct va_format *vaf); - extern __printf(3, 4) int netdev_printk(const char *level, const struct net_device *dev, const char *format, ...); -- cgit v1.2.3 From 6b6e27255f29a6191ef8ad96bfcc392ab2ef6c71 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Mon, 17 Sep 2012 10:03:26 +0000 Subject: netdev: make address const in device address management The internal functions for add/deleting addresses don't change their argument. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/netdevice.h | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ae3153c0db0a..82264e717e53 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -907,10 +907,10 @@ struct netdev_fcoe_hbainfo { * Must return >0 or -errno if it changed dev->features itself. * * int (*ndo_fdb_add)(struct ndmsg *ndm, struct net_device *dev, - * unsigned char *addr, u16 flags) + * const unsigned char *addr, u16 flags) * Adds an FDB entry to dev for addr. * int (*ndo_fdb_del)(struct ndmsg *ndm, struct net_device *dev, - * unsigned char *addr) + * const unsigned char *addr) * Deletes the FDB entry from dev coresponding to addr. * int (*ndo_fdb_dump)(struct sk_buff *skb, struct netlink_callback *cb, * struct net_device *dev, int idx) @@ -1017,11 +1017,11 @@ struct net_device_ops { int (*ndo_fdb_add)(struct ndmsg *ndm, struct net_device *dev, - unsigned char *addr, + const unsigned char *addr, u16 flags); int (*ndo_fdb_del)(struct ndmsg *ndm, struct net_device *dev, - unsigned char *addr); + const unsigned char *addr); int (*ndo_fdb_dump)(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, @@ -2561,9 +2561,9 @@ extern void __hw_addr_flush(struct netdev_hw_addr_list *list); extern void __hw_addr_init(struct netdev_hw_addr_list *list); /* Functions used for device addresses handling */ -extern int dev_addr_add(struct net_device *dev, unsigned char *addr, +extern int dev_addr_add(struct net_device *dev, const unsigned char *addr, unsigned char addr_type); -extern int dev_addr_del(struct net_device *dev, unsigned char *addr, +extern int dev_addr_del(struct net_device *dev, const unsigned char *addr, unsigned char addr_type); extern int dev_addr_add_multiple(struct net_device *to_dev, struct net_device *from_dev, @@ -2575,20 +2575,20 @@ extern void dev_addr_flush(struct net_device *dev); extern int dev_addr_init(struct net_device *dev); /* Functions used for unicast addresses handling */ -extern int dev_uc_add(struct net_device *dev, unsigned char *addr); -extern int dev_uc_add_excl(struct net_device *dev, unsigned char *addr); -extern int dev_uc_del(struct net_device *dev, unsigned char *addr); +extern int dev_uc_add(struct net_device *dev, const unsigned char *addr); +extern int dev_uc_add_excl(struct net_device *dev, const unsigned char *addr); +extern int dev_uc_del(struct net_device *dev, const unsigned char *addr); extern int dev_uc_sync(struct net_device *to, struct net_device *from); extern void dev_uc_unsync(struct net_device *to, struct net_device *from); extern void dev_uc_flush(struct net_device *dev); extern void dev_uc_init(struct net_device *dev); /* Functions used for multicast addresses handling */ -extern int dev_mc_add(struct net_device *dev, unsigned char *addr); -extern int dev_mc_add_global(struct net_device *dev, unsigned char *addr); -extern int dev_mc_add_excl(struct net_device *dev, unsigned char *addr); -extern int dev_mc_del(struct net_device *dev, unsigned char *addr); -extern int dev_mc_del_global(struct net_device *dev, unsigned char *addr); +extern int dev_mc_add(struct net_device *dev, const unsigned char *addr); +extern int dev_mc_add_global(struct net_device *dev, const unsigned char *addr); +extern int dev_mc_add_excl(struct net_device *dev, const unsigned char *addr); +extern int dev_mc_del(struct net_device *dev, const unsigned char *addr); +extern int dev_mc_del_global(struct net_device *dev, const unsigned char *addr); extern int dev_mc_sync(struct net_device *to, struct net_device *from); extern void dev_mc_unsync(struct net_device *to, struct net_device *from); extern void dev_mc_flush(struct net_device *dev); -- cgit v1.2.3 From 8c4c49df5cfeb8d56e5b85a430c8cbcb86c2ac37 Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Mon, 17 Sep 2012 20:16:31 +0000 Subject: netpoll: call ->ndo_select_queue() in tx path In netpoll tx path, we miss the chance of calling ->ndo_select_queue(), thus could cause problems when bonding is involved. This patch makes dev_pick_tx() extern (and rename it to netdev_pick_tx()) to let netpoll call it in netpoll_send_skb_on_dev(). Reported-by: Sylvain Munaut Cc: "David S. Miller" Cc: Eric Dumazet Signed-off-by: Cong Wang Tested-by: Sylvain Munaut Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 82264e717e53..6c131f055ab0 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1403,6 +1403,9 @@ static inline void netdev_for_each_tx_queue(struct net_device *dev, f(dev, &dev->_tx[i], arg); } +extern struct netdev_queue *netdev_pick_tx(struct net_device *dev, + struct sk_buff *skb); + /* * Net namespace inlines */ -- cgit v1.2.3 From 404f7c9e118e0c92902afe1853d35f5638fe4a4c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 26 Sep 2012 07:07:47 +0000 Subject: net: struct napi_struct fields reordering Remove two holes on 64bit arches, and put dev_list at the end of napi_struct since its not used in fast path. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 6c131f055ab0..dd320bb22a5a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -338,18 +338,16 @@ struct napi_struct { unsigned long state; int weight; + unsigned int gro_count; int (*poll)(struct napi_struct *, int); #ifdef CONFIG_NETPOLL spinlock_t poll_lock; int poll_owner; #endif - - unsigned int gro_count; - struct net_device *dev; - struct list_head dev_list; struct sk_buff *gro_list; struct sk_buff *skb; + struct list_head dev_list; }; enum { -- cgit v1.2.3 From edc7d57327bd08bfd04f41531d49b176369db218 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Mon, 1 Oct 2012 12:32:33 +0000 Subject: netlink: add attributes to fdb interface Later changes need to be able to refer to neighbour attributes when doing fdb_add. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index dd320bb22a5a..807a610f193c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -904,7 +904,8 @@ struct netdev_fcoe_hbainfo { * feature set might be less than what was returned by ndo_fix_features()). * Must return >0 or -errno if it changed dev->features itself. * - * int (*ndo_fdb_add)(struct ndmsg *ndm, struct net_device *dev, + * int (*ndo_fdb_add)(struct ndmsg *ndm, struct nlattr *tb[], + * struct net_device *dev, * const unsigned char *addr, u16 flags) * Adds an FDB entry to dev for addr. * int (*ndo_fdb_del)(struct ndmsg *ndm, struct net_device *dev, @@ -1014,6 +1015,7 @@ struct net_device_ops { void (*ndo_neigh_destroy)(struct neighbour *n); int (*ndo_fdb_add)(struct ndmsg *ndm, + struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 flags); -- cgit v1.2.3