diff options
Diffstat (limited to 'net/core')
-rw-r--r-- | net/core/datagram.c | 3 | ||||
-rw-r--r-- | net/core/dev.c | 525 | ||||
-rw-r--r-- | net/core/dev_mcast.c | 206 | ||||
-rw-r--r-- | net/core/ethtool.c | 77 | ||||
-rw-r--r-- | net/core/flow.c | 2 | ||||
-rw-r--r-- | net/core/neighbour.c | 5 | ||||
-rw-r--r-- | net/core/netpoll.c | 8 | ||||
-rw-r--r-- | net/core/pktgen.c | 363 | ||||
-rw-r--r-- | net/core/rtnetlink.c | 472 | ||||
-rw-r--r-- | net/core/scm.c | 3 | ||||
-rw-r--r-- | net/core/skbuff.c | 17 | ||||
-rw-r--r-- | net/core/sock.c | 180 | ||||
-rw-r--r-- | net/core/utils.c | 1 |
13 files changed, 1385 insertions, 477 deletions
diff --git a/net/core/datagram.c b/net/core/datagram.c index cb056f476126..029b93e246b4 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -450,6 +450,9 @@ int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb, __wsum csum; int chunk = skb->len - hlen; + if (!chunk) + return 0; + /* Skip filled elements. * Pretty silly, look at memcpy_toiovec, though 8) */ diff --git a/net/core/dev.c b/net/core/dev.c index 1561f61e4209..a76021c71207 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -98,6 +98,7 @@ #include <linux/seq_file.h> #include <linux/stat.h> #include <linux/if_bridge.h> +#include <linux/if_macvlan.h> #include <net/dst.h> #include <net/pkt_sched.h> #include <net/checksum.h> @@ -151,9 +152,22 @@ static struct list_head ptype_base[16] __read_mostly; /* 16 way hashed list */ static struct list_head ptype_all __read_mostly; /* Taps */ #ifdef CONFIG_NET_DMA -static struct dma_client *net_dma_client; -static unsigned int net_dma_count; -static spinlock_t net_dma_event_lock; +struct net_dma { + struct dma_client client; + spinlock_t lock; + cpumask_t channel_mask; + struct dma_chan *channels[NR_CPUS]; +}; + +static enum dma_state_client +netdev_dma_event(struct dma_client *client, struct dma_chan *chan, + enum dma_state state); + +static struct net_dma net_dma = { + .client = { + .event_callback = netdev_dma_event, + }, +}; #endif /* @@ -803,7 +817,9 @@ int dev_alloc_name(struct net_device *dev, const char *name) */ int dev_change_name(struct net_device *dev, char *newname) { + char oldname[IFNAMSIZ]; int err = 0; + int ret; ASSERT_RTNL(); @@ -813,6 +829,8 @@ int dev_change_name(struct net_device *dev, char *newname) if (!dev_valid_name(newname)) return -EINVAL; + memcpy(oldname, dev->name, IFNAMSIZ); + if (strchr(newname, '%')) { err = dev_alloc_name(dev, newname); if (err < 0) @@ -824,10 +842,28 @@ int dev_change_name(struct net_device *dev, char *newname) else strlcpy(dev->name, newname, IFNAMSIZ); +rollback: device_rename(&dev->dev, dev->name); + + write_lock_bh(&dev_base_lock); hlist_del(&dev->name_hlist); hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name)); - raw_notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev); + write_unlock_bh(&dev_base_lock); + + ret = raw_notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev); + ret = notifier_to_errno(ret); + + if (ret) { + if (err) { + printk(KERN_ERR + "%s: name change rollback failed: %d.\n", + dev->name, ret); + } else { + err = ret; + memcpy(dev->name, oldname, IFNAMSIZ); + goto rollback; + } + } return err; } @@ -942,7 +978,7 @@ int dev_open(struct net_device *dev) /* * Initialize multicasting status */ - dev_mc_upload(dev); + dev_set_rx_mode(dev); /* * Wakeup transmit queue engine @@ -1040,20 +1076,43 @@ int dev_close(struct net_device *dev) int register_netdevice_notifier(struct notifier_block *nb) { struct net_device *dev; + struct net_device *last; int err; rtnl_lock(); err = raw_notifier_chain_register(&netdev_chain, nb); - if (!err) { - for_each_netdev(dev) { - nb->notifier_call(nb, NETDEV_REGISTER, dev); + if (err) + goto unlock; - if (dev->flags & IFF_UP) - nb->notifier_call(nb, NETDEV_UP, dev); - } + for_each_netdev(dev) { + err = nb->notifier_call(nb, NETDEV_REGISTER, dev); + err = notifier_to_errno(err); + if (err) + goto rollback; + + if (!(dev->flags & IFF_UP)) + continue; + + nb->notifier_call(nb, NETDEV_UP, dev); } + +unlock: rtnl_unlock(); return err; + +rollback: + last = dev; + for_each_netdev(dev) { + if (dev == last) + break; + + if (dev->flags & IFF_UP) { + nb->notifier_call(nb, NETDEV_GOING_DOWN, dev); + nb->notifier_call(nb, NETDEV_DOWN, dev); + } + nb->notifier_call(nb, NETDEV_UNREGISTER, dev); + } + goto unlock; } /** @@ -1429,7 +1488,9 @@ gso: skb->next = nskb; return rc; } - if (unlikely(netif_queue_stopped(dev) && skb->next)) + if (unlikely((netif_queue_stopped(dev) || + netif_subqueue_stopped(dev, skb->queue_mapping)) && + skb->next)) return NETDEV_TX_BUSY; } while (skb->next); @@ -1510,8 +1571,10 @@ int dev_queue_xmit(struct sk_buff *skb) skb_headroom(skb)); if (!(dev->features & NETIF_F_GEN_CSUM) && - (!(dev->features & NETIF_F_IP_CSUM) || - skb->protocol != htons(ETH_P_IP))) + !((dev->features & NETIF_F_IP_CSUM) && + skb->protocol == htons(ETH_P_IP)) && + !((dev->features & NETIF_F_IPV6_CSUM) && + skb->protocol == htons(ETH_P_IPV6))) if (skb_checksum_help(skb)) goto out_kfree_skb; } @@ -1545,6 +1608,8 @@ gso: spin_lock(&dev->queue_lock); q = dev->qdisc; if (q->enqueue) { + /* reset queue_mapping to zero */ + skb->queue_mapping = 0; rc = q->enqueue(skb, q); qdisc_run(dev); spin_unlock(&dev->queue_lock); @@ -1574,7 +1639,8 @@ gso: HARD_TX_LOCK(dev, cpu); - if (!netif_queue_stopped(dev)) { + if (!netif_queue_stopped(dev) && + !netif_subqueue_stopped(dev, skb->queue_mapping)) { rc = 0; if (!dev_hard_start_xmit(skb, dev)) { HARD_TX_UNLOCK(dev); @@ -1793,6 +1859,28 @@ static inline struct sk_buff *handle_bridge(struct sk_buff *skb, #define handle_bridge(skb, pt_prev, ret, orig_dev) (skb) #endif +#if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE) +struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *skb) __read_mostly; +EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook); + +static inline struct sk_buff *handle_macvlan(struct sk_buff *skb, + struct packet_type **pt_prev, + int *ret, + struct net_device *orig_dev) +{ + if (skb->dev->macvlan_port == NULL) + return skb; + + if (*pt_prev) { + *ret = deliver_skb(skb, *pt_prev, orig_dev); + *pt_prev = NULL; + } + return macvlan_handle_frame_hook(skb); +} +#else +#define handle_macvlan(skb, pt_prev, ret, orig_dev) (skb) +#endif + #ifdef CONFIG_NET_CLS_ACT /* TODO: Maybe we should just force sch_ingress to be compiled in * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions @@ -1900,6 +1988,9 @@ ncls: skb = handle_bridge(skb, &pt_prev, &ret, orig_dev); if (!skb) goto out; + skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev); + if (!skb) + goto out; type = skb->protocol; list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) { @@ -2015,12 +2106,13 @@ out: * There may not be any more sk_buffs coming right now, so push * any pending DMA copies to hardware */ - if (net_dma_client) { - struct dma_chan *chan; - rcu_read_lock(); - list_for_each_entry_rcu(chan, &net_dma_client->channels, client_node) - dma_async_memcpy_issue_pending(chan); - rcu_read_unlock(); + if (!cpus_empty(net_dma.channel_mask)) { + int chan_idx; + for_each_cpu_mask(chan_idx, net_dma.channel_mask) { + struct dma_chan *chan = net_dma.channels[chan_idx]; + if (chan) + dma_async_memcpy_issue_pending(chan); + } } #endif return; @@ -2496,26 +2588,17 @@ int netdev_set_master(struct net_device *slave, struct net_device *master) return 0; } -/** - * dev_set_promiscuity - update promiscuity count on a device - * @dev: device - * @inc: modifier - * - * Add or remove promiscuity from a device. While the count in the device - * remains above zero the interface remains promiscuous. Once it hits zero - * the device reverts back to normal filtering operation. A negative inc - * value is used to drop promiscuity on the device. - */ -void dev_set_promiscuity(struct net_device *dev, int inc) +static void __dev_set_promiscuity(struct net_device *dev, int inc) { unsigned short old_flags = dev->flags; + ASSERT_RTNL(); + if ((dev->promiscuity += inc) == 0) dev->flags &= ~IFF_PROMISC; else dev->flags |= IFF_PROMISC; if (dev->flags != old_flags) { - dev_mc_upload(dev); printk(KERN_INFO "device %s %s promiscuous mode\n", dev->name, (dev->flags & IFF_PROMISC) ? "entered" : "left"); @@ -2525,10 +2608,32 @@ void dev_set_promiscuity(struct net_device *dev, int inc) dev->name, (dev->flags & IFF_PROMISC), (old_flags & IFF_PROMISC), audit_get_loginuid(current->audit_context)); + + if (dev->change_rx_flags) + dev->change_rx_flags(dev, IFF_PROMISC); } } /** + * dev_set_promiscuity - update promiscuity count on a device + * @dev: device + * @inc: modifier + * + * Add or remove promiscuity from a device. While the count in the device + * remains above zero the interface remains promiscuous. Once it hits zero + * the device reverts back to normal filtering operation. A negative inc + * value is used to drop promiscuity on the device. + */ +void dev_set_promiscuity(struct net_device *dev, int inc) +{ + unsigned short old_flags = dev->flags; + + __dev_set_promiscuity(dev, inc); + if (dev->flags != old_flags) + dev_set_rx_mode(dev); +} + +/** * dev_set_allmulti - update allmulti count on a device * @dev: device * @inc: modifier @@ -2544,11 +2649,194 @@ void dev_set_allmulti(struct net_device *dev, int inc) { unsigned short old_flags = dev->flags; + ASSERT_RTNL(); + dev->flags |= IFF_ALLMULTI; if ((dev->allmulti += inc) == 0) dev->flags &= ~IFF_ALLMULTI; - if (dev->flags ^ old_flags) - dev_mc_upload(dev); + if (dev->flags ^ old_flags) { + if (dev->change_rx_flags) + dev->change_rx_flags(dev, IFF_ALLMULTI); + dev_set_rx_mode(dev); + } +} + +/* + * Upload unicast and multicast address lists to device and + * configure RX filtering. When the device doesn't support unicast + * filtering it is put in promiscous mode while unicast addresses + * are present. + */ +void __dev_set_rx_mode(struct net_device *dev) +{ + /* dev_open will call this function so the list will stay sane. */ + if (!(dev->flags&IFF_UP)) + return; + + if (!netif_device_present(dev)) + return; + + if (dev->set_rx_mode) + dev->set_rx_mode(dev); + else { + /* Unicast addresses changes may only happen under the rtnl, + * therefore calling __dev_set_promiscuity here is safe. + */ + if (dev->uc_count > 0 && !dev->uc_promisc) { + __dev_set_promiscuity(dev, 1); + dev->uc_promisc = 1; + } else if (dev->uc_count == 0 && dev->uc_promisc) { + __dev_set_promiscuity(dev, -1); + dev->uc_promisc = 0; + } + + if (dev->set_multicast_list) + dev->set_multicast_list(dev); + } +} + +void dev_set_rx_mode(struct net_device *dev) +{ + netif_tx_lock_bh(dev); + __dev_set_rx_mode(dev); + netif_tx_unlock_bh(dev); +} + +int __dev_addr_delete(struct dev_addr_list **list, int *count, + void *addr, int alen, int glbl) +{ + struct dev_addr_list *da; + + for (; (da = *list) != NULL; list = &da->next) { + if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 && + alen == da->da_addrlen) { + if (glbl) { + int old_glbl = da->da_gusers; + da->da_gusers = 0; + if (old_glbl == 0) + break; + } + if (--da->da_users) + return 0; + + *list = da->next; + kfree(da); + (*count)--; + return 0; + } + } + return -ENOENT; +} + +int __dev_addr_add(struct dev_addr_list **list, int *count, + void *addr, int alen, int glbl) +{ + struct dev_addr_list *da; + + for (da = *list; da != NULL; da = da->next) { + if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 && + da->da_addrlen == alen) { + if (glbl) { + int old_glbl = da->da_gusers; + da->da_gusers = 1; + if (old_glbl) + return 0; + } + da->da_users++; + return 0; + } + } + + da = kmalloc(sizeof(*da), GFP_ATOMIC); + if (da == NULL) + return -ENOMEM; + memcpy(da->da_addr, addr, alen); + da->da_addrlen = alen; + da->da_users = 1; + da->da_gusers = glbl ? 1 : 0; + da->next = *list; + *list = da; + (*count)++; + return 0; +} + +/** + * dev_unicast_delete - Release secondary unicast address. + * @dev: device + * @addr: address to delete + * @alen: length of @addr + * + * Release reference to a secondary unicast address and remove it + * from the device if the reference count drops to zero. + * + * The caller must hold the rtnl_mutex. + */ +int dev_unicast_delete(struct net_device *dev, void *addr, int alen) +{ + int err; + + ASSERT_RTNL(); + + netif_tx_lock_bh(dev); + err = __dev_addr_delete(&dev->uc_list, &dev->uc_count, addr, alen, 0); + if (!err) + __dev_set_rx_mode(dev); + netif_tx_unlock_bh(dev); + return err; +} +EXPORT_SYMBOL(dev_unicast_delete); + +/** + * dev_unicast_add - add a secondary unicast address + * @dev: device + * @addr: address to delete + * @alen: length of @addr + * + * Add a secondary unicast address to the device or increase + * the reference count if it already exists. + * + * The caller must hold the rtnl_mutex. + */ +int dev_unicast_add(struct net_device *dev, void *addr, int alen) +{ + int err; + + ASSERT_RTNL(); + + netif_tx_lock_bh(dev); + err = __dev_addr_add(&dev->uc_list, &dev->uc_count, addr, alen, 0); + if (!err) + __dev_set_rx_mode(dev); + netif_tx_unlock_bh(dev); + return err; +} +EXPORT_SYMBOL(dev_unicast_add); + +static void __dev_addr_discard(struct dev_addr_list **list) +{ + struct dev_addr_list *tmp; + + while (*list != NULL) { + tmp = *list; + *list = tmp->next; + if (tmp->da_users > tmp->da_gusers) + printk("__dev_addr_discard: address leakage! " + "da_users=%d\n", tmp->da_users); + kfree(tmp); + } +} + +static void dev_addr_discard(struct net_device *dev) +{ + netif_tx_lock_bh(dev); + + __dev_addr_discard(&dev->uc_list); + dev->uc_count = 0; + + __dev_addr_discard(&dev->mc_list); + dev->mc_count = 0; + + netif_tx_unlock_bh(dev); } unsigned dev_get_flags(const struct net_device *dev) @@ -2580,6 +2868,8 @@ int dev_change_flags(struct net_device *dev, unsigned flags) int ret, changes; int old_flags = dev->flags; + ASSERT_RTNL(); + /* * Set the flags on our device. */ @@ -2594,7 +2884,10 @@ int dev_change_flags(struct net_device *dev, unsigned flags) * Load in the correct multicast list now the flags have changed. */ - dev_mc_upload(dev); + if (dev->change_rx_flags && (dev->flags ^ flags) & IFF_MULTICAST) + dev->change_rx_flags(dev, IFF_MULTICAST); + + dev_set_rx_mode(dev); /* * Have we downed the interface. We handle IFF_UP ourselves @@ -2607,7 +2900,7 @@ int dev_change_flags(struct net_device *dev, unsigned flags) ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev); if (!ret) - dev_mc_upload(dev); + dev_set_rx_mode(dev); } if (dev->flags & IFF_UP && @@ -3089,7 +3382,7 @@ int register_netdevice(struct net_device *dev) if (!dev_valid_name(dev->name)) { ret = -EINVAL; - goto out; + goto err_uninit; } dev->ifindex = dev_new_index(); @@ -3103,10 +3396,26 @@ int register_netdevice(struct net_device *dev) = hlist_entry(p, struct net_device, name_hlist); if (!strncmp(d->name, dev->name, IFNAMSIZ)) { ret = -EEXIST; - goto out; + goto err_uninit; } } + /* Fix illegal checksum combinations */ + if ((dev->features & NETIF_F_HW_CSUM) && + (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { + printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n", + dev->name); + dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM); + } + + if ((dev->features & NETIF_F_NO_CSUM) && + (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { + printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n", + dev->name); + dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM); + } + + /* Fix illegal SG+CSUM combinations. */ if ((dev->features & NETIF_F_SG) && !(dev->features & NETIF_F_ALL_CSUM)) { @@ -3147,7 +3456,7 @@ int register_netdevice(struct net_device *dev) ret = netdev_register_sysfs(dev); if (ret) - goto out; + goto err_uninit; dev->reg_state = NETREG_REGISTERED; /* @@ -3166,12 +3475,18 @@ int register_netdevice(struct net_device *dev) write_unlock_bh(&dev_base_lock); /* Notify protocols, that a new device appeared. */ - raw_notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev); - - ret = 0; + ret = raw_notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev); + ret = notifier_to_errno(ret); + if (ret) + unregister_netdevice(dev); out: return ret; + +err_uninit: + if (dev->uninit) + dev->uninit(dev); + goto out; } /** @@ -3343,16 +3658,18 @@ static struct net_device_stats *internal_stats(struct net_device *dev) } /** - * alloc_netdev - allocate network device + * alloc_netdev_mq - allocate network device * @sizeof_priv: size of private data to allocate space for * @name: device name format string * @setup: callback to initialize device + * @queue_count: the number of subqueues to allocate * * Allocates a struct net_device with private data area for driver use - * and performs basic initialization. + * and performs basic initialization. Also allocates subquue structs + * for each queue on the device at the end of the netdevice. */ -struct net_device *alloc_netdev(int sizeof_priv, const char *name, - void (*setup)(struct net_device *)) +struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, + void (*setup)(struct net_device *), unsigned int queue_count) { void *p; struct net_device *dev; @@ -3361,7 +3678,9 @@ struct net_device *alloc_netdev(int sizeof_priv, const char *name, BUG_ON(strlen(name) >= sizeof(dev->name)); /* ensure 32-byte alignment of both the device and private area */ - alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST; + alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST + + (sizeof(struct net_device_subqueue) * (queue_count - 1))) & + ~NETDEV_ALIGN_CONST; alloc_size += sizeof_priv + NETDEV_ALIGN_CONST; p = kzalloc(alloc_size, GFP_KERNEL); @@ -3374,15 +3693,22 @@ struct net_device *alloc_netdev(int sizeof_priv, const char *name, (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST); dev->padded = (char *)dev - (char *)p; - if (sizeof_priv) - dev->priv = netdev_priv(dev); + if (sizeof_priv) { + dev->priv = ((char *)dev + + ((sizeof(struct net_device) + + (sizeof(struct net_device_subqueue) * + (queue_count - 1)) + NETDEV_ALIGN_CONST) + & ~NETDEV_ALIGN_CONST)); + } + + dev->egress_subqueue_count = queue_count; dev->get_stats = internal_stats; setup(dev); strcpy(dev->name, name); return dev; } -EXPORT_SYMBOL(alloc_netdev); +EXPORT_SYMBOL(alloc_netdev_mq); /** * free_netdev - free network device @@ -3471,9 +3797,9 @@ void unregister_netdevice(struct net_device *dev) raw_notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev); /* - * Flush the multicast chain + * Flush the unicast and multicast chains */ - dev_mc_discard(dev); + dev_addr_discard(dev); if (dev->uninit) dev->uninit(dev); @@ -3559,16 +3885,19 @@ static int dev_cpu_callback(struct notifier_block *nfb, #ifdef CONFIG_NET_DMA /** - * net_dma_rebalance - - * This is called when the number of channels allocated to the net_dma_client - * changes. The net_dma_client tries to have one DMA channel per CPU. + * net_dma_rebalance - try to maintain one DMA channel per CPU + * @net_dma: DMA client and associated data (lock, channels, channel_mask) + * + * This is called when the number of channels allocated to the net_dma client + * changes. The net_dma client tries to have one DMA channel per CPU. */ -static void net_dma_rebalance(void) + +static void net_dma_rebalance(struct net_dma *net_dma) { - unsigned int cpu, i, n; + unsigned int cpu, i, n, chan_idx; struct dma_chan *chan; - if (net_dma_count == 0) { + if (cpus_empty(net_dma->channel_mask)) { for_each_online_cpu(cpu) rcu_assign_pointer(per_cpu(softnet_data, cpu).net_dma, NULL); return; @@ -3577,10 +3906,12 @@ static void net_dma_rebalance(void) i = 0; cpu = first_cpu(cpu_online_map); - rcu_read_lock(); - list_for_each_entry(chan, &net_dma_client->channels, client_node) { - n = ((num_online_cpus() / net_dma_count) - + (i < (num_online_cpus() % net_dma_count) ? 1 : 0)); + for_each_cpu_mask(chan_idx, net_dma->channel_mask) { + chan = net_dma->channels[chan_idx]; + + n = ((num_online_cpus() / cpus_weight(net_dma->channel_mask)) + + (i < (num_online_cpus() % + cpus_weight(net_dma->channel_mask)) ? 1 : 0)); while(n) { per_cpu(softnet_data, cpu).net_dma = chan; @@ -3589,32 +3920,61 @@ static void net_dma_rebalance(void) } i++; } - rcu_read_unlock(); } /** * netdev_dma_event - event callback for the net_dma_client * @client: should always be net_dma_client * @chan: DMA channel for the event - * @event: event type + * @state: DMA state to be handled */ -static void netdev_dma_event(struct dma_client *client, struct dma_chan *chan, - enum dma_event event) +static enum dma_state_client +netdev_dma_event(struct dma_client *client, struct dma_chan *chan, + enum dma_state state) { - spin_lock(&net_dma_event_lock); - switch (event) { - case DMA_RESOURCE_ADDED: - net_dma_count++; - net_dma_rebalance(); + int i, found = 0, pos = -1; + struct net_dma *net_dma = + container_of(client, struct net_dma, client); + enum dma_state_client ack = DMA_DUP; /* default: take no action */ + + spin_lock(&net_dma->lock); + switch (state) { + case DMA_RESOURCE_AVAILABLE: + for (i = 0; i < NR_CPUS; i++) + if (net_dma->channels[i] == chan) { + found = 1; + break; + } else if (net_dma->channels[i] == NULL && pos < 0) + pos = i; + + if (!found && pos >= 0) { + ack = DMA_ACK; + net_dma->channels[pos] = chan; + cpu_set(pos, net_dma->channel_mask); + net_dma_rebalance(net_dma); + } break; case DMA_RESOURCE_REMOVED: - net_dma_count--; - net_dma_rebalance(); + for (i = 0; i < NR_CPUS; i++) + if (net_dma->channels[i] == chan) { + found = 1; + pos = i; + break; + } + + if (found) { + ack = DMA_ACK; + cpu_clear(pos, net_dma->channel_mask); + net_dma->channels[i] = NULL; + net_dma_rebalance(net_dma); + } break; default: break; } - spin_unlock(&net_dma_event_lock); + spin_unlock(&net_dma->lock); + + return ack; } /** @@ -3622,12 +3982,10 @@ static void netdev_dma_event(struct dma_client *client, struct dma_chan *chan, */ static int __init netdev_dma_register(void) { - spin_lock_init(&net_dma_event_lock); - net_dma_client = dma_async_client_register(netdev_dma_event); - if (net_dma_client == NULL) - return -ENOMEM; - - dma_async_client_chan_request(net_dma_client, num_online_cpus()); + spin_lock_init(&net_dma.lock); + dma_cap_set(DMA_MEMCPY, net_dma.client.cap_mask); + dma_async_client_register(&net_dma.client); + dma_async_client_chan_request(&net_dma.client); return 0; } @@ -3650,9 +4008,10 @@ int netdev_compute_features(unsigned long all, unsigned long one) if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM)) all ^= NETIF_F_NO_CSUM | NETIF_F_HW_CSUM; - /* if device can't do all checksum, downgrade to ipv4 */ + /* if device can't do all checksum, downgrade to ipv4/ipv6 */ if (all & NETIF_F_HW_CSUM && !(one & NETIF_F_HW_CSUM)) - all ^= NETIF_F_HW_CSUM | NETIF_F_IP_CSUM; + all ^= NETIF_F_HW_CSUM + | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; if (one & NETIF_F_GSO) one |= NETIF_F_GSO_SOFTWARE; diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c index 5a54053386c8..20330c572610 100644 --- a/net/core/dev_mcast.c +++ b/net/core/dev_mcast.c @@ -64,85 +64,24 @@ */ /* - * Update the multicast list into the physical NIC controller. - */ - -static void __dev_mc_upload(struct net_device *dev) -{ - /* Don't do anything till we up the interface - * [dev_open will call this function so the list will - * stay sane] - */ - - if (!(dev->flags&IFF_UP)) - return; - - /* - * Devices with no set multicast or which have been - * detached don't get set. - */ - - if (dev->set_multicast_list == NULL || - !netif_device_present(dev)) - return; - - dev->set_multicast_list(dev); -} - -void dev_mc_upload(struct net_device *dev) -{ - netif_tx_lock_bh(dev); - __dev_mc_upload(dev); - netif_tx_unlock_bh(dev); -} - -/* * Delete a device level multicast */ int dev_mc_delete(struct net_device *dev, void *addr, int alen, int glbl) { - int err = 0; - struct dev_mc_list *dmi, **dmip; + int err; netif_tx_lock_bh(dev); - - for (dmip = &dev->mc_list; (dmi = *dmip) != NULL; dmip = &dmi->next) { + err = __dev_addr_delete(&dev->mc_list, &dev->mc_count, + addr, alen, glbl); + if (!err) { /* - * Find the entry we want to delete. The device could - * have variable length entries so check these too. + * We have altered the list, so the card + * loaded filter is now wrong. Fix it */ - if (memcmp(dmi->dmi_addr, addr, dmi->dmi_addrlen) == 0 && - alen == dmi->dmi_addrlen) { - if (glbl) { - int old_glbl = dmi->dmi_gusers; - dmi->dmi_gusers = 0; - if (old_glbl == 0) - break; - } - if (--dmi->dmi_users) - goto done; - - /* - * Last user. So delete the entry. - */ - *dmip = dmi->next; - dev->mc_count--; - - kfree(dmi); - - /* - * We have altered the list, so the card - * loaded filter is now wrong. Fix it - */ - __dev_mc_upload(dev); - - netif_tx_unlock_bh(dev); - return 0; - } + + __dev_set_rx_mode(dev); } - err = -ENOENT; -done: netif_tx_unlock_bh(dev); return err; } @@ -153,68 +92,96 @@ done: int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl) { - int err = 0; - struct dev_mc_list *dmi, *dmi1; - - dmi1 = kmalloc(sizeof(*dmi), GFP_ATOMIC); + int err; netif_tx_lock_bh(dev); - for (dmi = dev->mc_list; dmi != NULL; dmi = dmi->next) { - if (memcmp(dmi->dmi_addr, addr, dmi->dmi_addrlen) == 0 && - dmi->dmi_addrlen == alen) { - if (glbl) { - int old_glbl = dmi->dmi_gusers; - dmi->dmi_gusers = 1; - if (old_glbl) - goto done; - } - dmi->dmi_users++; - goto done; - } - } - - if ((dmi = dmi1) == NULL) { - netif_tx_unlock_bh(dev); - return -ENOMEM; - } - memcpy(dmi->dmi_addr, addr, alen); - dmi->dmi_addrlen = alen; - dmi->next = dev->mc_list; - dmi->dmi_users = 1; - dmi->dmi_gusers = glbl ? 1 : 0; - dev->mc_list = dmi; - dev->mc_count++; + err = __dev_addr_add(&dev->mc_list, &dev->mc_count, addr, alen, glbl); + if (!err) + __dev_set_rx_mode(dev); + netif_tx_unlock_bh(dev); + return err; +} - __dev_mc_upload(dev); +/** + * dev_mc_sync - Synchronize device's multicast list to another device + * @to: destination device + * @from: source device + * + * Add newly added addresses to the destination device and release + * addresses that have no users left. The source device must be + * locked by netif_tx_lock_bh. + * + * This function is intended to be called from the dev->set_multicast_list + * function of layered software devices. + */ +int dev_mc_sync(struct net_device *to, struct net_device *from) +{ + struct dev_addr_list *da, *next; + int err = 0; - netif_tx_unlock_bh(dev); - return 0; + netif_tx_lock_bh(to); + da = from->mc_list; + while (da != NULL) { + next = da->next; + if (!da->da_synced) { + err = __dev_addr_add(&to->mc_list, &to->mc_count, + da->da_addr, da->da_addrlen, 0); + if (err < 0) + break; + da->da_synced = 1; + da->da_users++; + } else if (da->da_users == 1) { + __dev_addr_delete(&to->mc_list, &to->mc_count, + da->da_addr, da->da_addrlen, 0); + __dev_addr_delete(&from->mc_list, &from->mc_count, + da->da_addr, da->da_addrlen, 0); + } + da = next; + } + if (!err) + __dev_set_rx_mode(to); + netif_tx_unlock_bh(to); -done: - netif_tx_unlock_bh(dev); - kfree(dmi1); return err; } +EXPORT_SYMBOL(dev_mc_sync); -/* - * Discard multicast list when a device is downed - */ -void dev_mc_discard(struct net_device *dev) +/** + * dev_mc_unsync - Remove synchronized addresses from the destination + * device + * @to: destination device + * @from: source device + * + * Remove all addresses that were added to the destination device by + * dev_mc_sync(). This function is intended to be called from the + * dev->stop function of layered software devices. + */ +void dev_mc_unsync(struct net_device *to, struct net_device *from) { - netif_tx_lock_bh(dev); - - while (dev->mc_list != NULL) { - struct dev_mc_list *tmp = dev->mc_list; - dev->mc_list = tmp->next; - if (tmp->dmi_users > tmp->dmi_gusers) - printk("dev_mc_discard: multicast leakage! dmi_users=%d\n", tmp->dmi_users); - kfree(tmp); + struct dev_addr_list *da, *next; + + netif_tx_lock_bh(from); + netif_tx_lock_bh(to); + + da = from->mc_list; + while (da != NULL) { + next = da->next; + if (!da->da_synced) + continue; + __dev_addr_delete(&to->mc_list, &to->mc_count, + da->da_addr, da->da_addrlen, 0); + da->da_synced = 0; + __dev_addr_delete(&from->mc_list, &from->mc_count, + da->da_addr, da->da_addrlen, 0); + da = next; } - dev->mc_count = 0; + __dev_set_rx_mode(to); - netif_tx_unlock_bh(dev); + netif_tx_unlock_bh(to); + netif_tx_unlock_bh(from); } +EXPORT_SYMBOL(dev_mc_unsync); #ifdef CONFIG_PROC_FS static void *dev_mc_seq_start(struct seq_file *seq, loff_t *pos) @@ -244,7 +211,7 @@ static void dev_mc_seq_stop(struct seq_file *seq, void *v) static int dev_mc_seq_show(struct seq_file *seq, void *v) { - struct dev_mc_list *m; + struct dev_addr_list *m; struct net_device *dev = v; netif_tx_lock_bh(dev); @@ -292,4 +259,3 @@ void __init dev_mcast_init(void) EXPORT_SYMBOL(dev_mc_add); EXPORT_SYMBOL(dev_mc_delete); -EXPORT_SYMBOL(dev_mc_upload); diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 8d5e5a09b576..c5e059352d43 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -3,10 +3,12 @@ * Copyright (c) 2003 Matthew Wilcox <matthew@wil.cx> * * This file is where we call all the ethtool_ops commands to get - * the information ethtool needs. We fall back to calling do_ioctl() - * for drivers which haven't been converted to ethtool_ops yet. + * the information ethtool needs. * - * It's GPL, stupid. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. */ #include <linux/module.h> @@ -52,6 +54,17 @@ int ethtool_op_set_tx_hw_csum(struct net_device *dev, u32 data) return 0; } + +int ethtool_op_set_tx_ipv6_csum(struct net_device *dev, u32 data) +{ + if (data) + dev->features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; + else + dev->features &= ~(NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM); + + return 0; +} + u32 ethtool_op_get_sg(struct net_device *dev) { return (dev->features & NETIF_F_SG) != 0; @@ -82,18 +95,6 @@ int ethtool_op_set_tso(struct net_device *dev, u32 data) return 0; } -int ethtool_op_get_perm_addr(struct net_device *dev, struct ethtool_perm_addr *addr, u8 *data) -{ - unsigned char len = dev->addr_len; - if ( addr->size < len ) - return -ETOOSMALL; - - addr->size = len; - memcpy(data, dev->perm_addr, len); - return 0; -} - - u32 ethtool_op_get_ufo(struct net_device *dev) { return (dev->features & NETIF_F_UFO) != 0; @@ -766,34 +767,20 @@ static int ethtool_get_stats(struct net_device *dev, void __user *useraddr) static int ethtool_get_perm_addr(struct net_device *dev, void __user *useraddr) { struct ethtool_perm_addr epaddr; - u8 *data; - int ret; - - if (!dev->ethtool_ops->get_perm_addr) - return -EOPNOTSUPP; - if (copy_from_user(&epaddr,useraddr,sizeof(epaddr))) + if (copy_from_user(&epaddr, useraddr, sizeof(epaddr))) return -EFAULT; - data = kmalloc(epaddr.size, GFP_USER); - if (!data) - return -ENOMEM; - - ret = dev->ethtool_ops->get_perm_addr(dev,&epaddr,data); - if (ret) - return ret; + if (epaddr.size < dev->addr_len) + return -ETOOSMALL; + epaddr.size = dev->addr_len; - ret = -EFAULT; if (copy_to_user(useraddr, &epaddr, sizeof(epaddr))) - goto out; + return -EFAULT; useraddr += sizeof(epaddr); - if (copy_to_user(useraddr, data, epaddr.size)) - goto out; - ret = 0; - - out: - kfree(data); - return ret; + if (copy_to_user(useraddr, dev->perm_addr, epaddr.size)) + return -EFAULT; + return 0; } /* The main entry point in this file. Called from net/core/dev.c */ @@ -810,7 +797,7 @@ int dev_ethtool(struct ifreq *ifr) return -ENODEV; if (!dev->ethtool_ops) - goto ioctl; + return -EOPNOTSUPP; if (copy_from_user(ðcmd, useraddr, sizeof (ethcmd))) return -EFAULT; @@ -949,7 +936,7 @@ int dev_ethtool(struct ifreq *ifr) rc = ethtool_set_gso(dev, useraddr); break; default: - rc = -EOPNOTSUPP; + rc = -EOPNOTSUPP; } if (dev->ethtool_ops->complete) @@ -959,20 +946,9 @@ int dev_ethtool(struct ifreq *ifr) netdev_features_change(dev); return rc; - - ioctl: - /* Keep existing behaviour for the moment. */ - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - - if (dev->do_ioctl) - return dev->do_ioctl(dev, ifr, SIOCETHTOOL); - return -EOPNOTSUPP; } -EXPORT_SYMBOL(dev_ethtool); EXPORT_SYMBOL(ethtool_op_get_link); -EXPORT_SYMBOL_GPL(ethtool_op_get_perm_addr); EXPORT_SYMBOL(ethtool_op_get_sg); EXPORT_SYMBOL(ethtool_op_get_tso); EXPORT_SYMBOL(ethtool_op_get_tx_csum); @@ -980,5 +956,6 @@ EXPORT_SYMBOL(ethtool_op_set_sg); EXPORT_SYMBOL(ethtool_op_set_tso); EXPORT_SYMBOL(ethtool_op_set_tx_csum); EXPORT_SYMBOL(ethtool_op_set_tx_hw_csum); +EXPORT_SYMBOL(ethtool_op_set_tx_ipv6_csum); EXPORT_SYMBOL(ethtool_op_set_ufo); EXPORT_SYMBOL(ethtool_op_get_ufo); diff --git a/net/core/flow.c b/net/core/flow.c index 051430545a05..0ab5234b17d8 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -350,7 +350,7 @@ static int __init flow_cache_init(void) flow_cachep = kmem_cache_create("flow_cache", sizeof(struct flow_cache_entry), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, - NULL, NULL); + NULL); flow_hash_shift = 10; flow_lwm = 2 * flow_hash_size; flow_hwm = 4 * flow_hash_size; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 9df26a07f067..f7de8f24d8dd 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -33,6 +33,7 @@ #include <linux/rtnetlink.h> #include <linux/random.h> #include <linux/string.h> +#include <linux/log2.h> #define NEIGH_DEBUG 1 @@ -311,7 +312,7 @@ static void neigh_hash_grow(struct neigh_table *tbl, unsigned long new_entries) NEIGH_CACHE_STAT_INC(tbl, hash_grows); - BUG_ON(new_entries & (new_entries - 1)); + BUG_ON(!is_power_of_2(new_entries)); new_hash = neigh_hash_alloc(new_entries); if (!new_hash) return; @@ -1347,7 +1348,7 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl) tbl->kmem_cachep = kmem_cache_create(tbl->id, tbl->entry_size, 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, - NULL, NULL); + NULL); tbl->stats = alloc_percpu(struct neigh_statistics); if (!tbl->stats) panic("cannot create neighbour cache statistics"); diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 5df8cf425b41..de1b26aa5720 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -66,8 +66,9 @@ static void queue_process(struct work_struct *work) local_irq_save(flags); netif_tx_lock(dev); - if (netif_queue_stopped(dev) || - dev->hard_start_xmit(skb, dev) != NETDEV_TX_OK) { + if ((netif_queue_stopped(dev) || + netif_subqueue_stopped(dev, skb->queue_mapping)) || + dev->hard_start_xmit(skb, dev) != NETDEV_TX_OK) { skb_queue_head(&npinfo->txq, skb); netif_tx_unlock(dev); local_irq_restore(flags); @@ -254,7 +255,8 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) for (tries = jiffies_to_usecs(1)/USEC_PER_POLL; tries > 0; --tries) { if (netif_tx_trylock(dev)) { - if (!netif_queue_stopped(dev)) + if (!netif_queue_stopped(dev) && + !netif_subqueue_stopped(dev, skb->queue_mapping)) status = dev->hard_start_xmit(skb, dev); netif_tx_unlock(dev); diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 9cd3a1cb60ef..803d0c8826af 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -111,6 +111,9 @@ * * 802.1Q/Q-in-Q support by Francesco Fondelli (FF) <francesco.fondelli@gmail.com> * + * Fixed src_mac command to set source mac of packet to value specified in + * command by Adit Ranadive <adit.262@gmail.com> + * */ #include <linux/sys.h> #include <linux/types.h> @@ -152,6 +155,9 @@ #include <net/checksum.h> #include <net/ipv6.h> #include <net/addrconf.h> +#ifdef CONFIG_XFRM +#include <net/xfrm.h> +#endif #include <asm/byteorder.h> #include <linux/rcupdate.h> #include <asm/bitops.h> @@ -181,6 +187,8 @@ #define F_MPLS_RND (1<<8) /* Random MPLS labels */ #define F_VID_RND (1<<9) /* Random VLAN ID */ #define F_SVID_RND (1<<10) /* Random SVLAN ID */ +#define F_FLOW_SEQ (1<<11) /* Sequential flows */ +#define F_IPSEC_ON (1<<12) /* ipsec on for flows */ /* Thread control flag bits */ #define T_TERMINATE (1<<0) @@ -207,8 +215,15 @@ static struct proc_dir_entry *pg_proc_dir = NULL; struct flow_state { __be32 cur_daddr; int count; +#ifdef CONFIG_XFRM + struct xfrm_state *x; +#endif + __u32 flags; }; +/* flow flag bits */ +#define F_INIT (1<<0) /* flow has been initialized */ + struct pktgen_dev { /* * Try to keep frequent/infrequent used vars. separated. @@ -228,6 +243,7 @@ struct pktgen_dev { int min_pkt_size; /* = ETH_ZLEN; */ int max_pkt_size; /* = ETH_ZLEN; */ + int pkt_overhead; /* overhead for MPLS, VLANs, IPSEC etc */ int nfrags; __u32 delay_us; /* Default delay */ __u32 delay_ns; @@ -341,7 +357,11 @@ struct pktgen_dev { unsigned cflows; /* Concurrent flows (config) */ unsigned lflow; /* Flow length (config) */ unsigned nflows; /* accumulated flows (stats) */ - + unsigned curfl; /* current sequenced flow (state)*/ +#ifdef CONFIG_XFRM + __u8 ipsmode; /* IPSEC mode (config) */ + __u8 ipsproto; /* IPSEC type (config) */ +#endif char result[512]; }; @@ -363,7 +383,6 @@ struct pktgen_thread { /* Field for thread to receive "posted" events terminate, stop ifs etc. */ u32 control; - int pid; int cpu; wait_queue_head_t queue; @@ -550,7 +569,7 @@ static ssize_t pgctrl_write(struct file *file, const char __user * buf, pktgen_run_all_threads(); else - printk("pktgen: Unknown command: %s\n", data); + printk(KERN_WARNING "pktgen: Unknown command: %s\n", data); err = count; @@ -690,6 +709,18 @@ static int pktgen_if_show(struct seq_file *seq, void *v) if (pkt_dev->flags & F_MPLS_RND) seq_printf(seq, "MPLS_RND "); + if (pkt_dev->cflows) { + if (pkt_dev->flags & F_FLOW_SEQ) + seq_printf(seq, "FLOW_SEQ "); /*in sequence flows*/ + else + seq_printf(seq, "FLOW_RND "); + } + +#ifdef CONFIG_XFRM + if (pkt_dev->flags & F_IPSEC_ON) + seq_printf(seq, "IPSEC "); +#endif + if (pkt_dev->flags & F_MACSRC_RND) seq_printf(seq, "MACSRC_RND "); @@ -879,14 +910,14 @@ static ssize_t pktgen_if_write(struct file *file, pg_result = &(pkt_dev->result[0]); if (count < 1) { - printk("pktgen: wrong command format\n"); + printk(KERN_WARNING "pktgen: wrong command format\n"); return -EINVAL; } max = count - i; tmp = count_trail_chars(&user_buffer[i], max); if (tmp < 0) { - printk("pktgen: illegal format\n"); + printk(KERN_WARNING "pktgen: illegal format\n"); return tmp; } i += tmp; @@ -914,7 +945,7 @@ static ssize_t pktgen_if_write(struct file *file, if (copy_from_user(tb, user_buffer, count)) return -EFAULT; tb[count] = 0; - printk("pktgen: %s,%lu buffer -:%s:-\n", name, + printk(KERN_DEBUG "pktgen: %s,%lu buffer -:%s:-\n", name, (unsigned long)count, tb); } @@ -1181,6 +1212,14 @@ static ssize_t pktgen_if_write(struct file *file, else if (strcmp(f, "!SVID_RND") == 0) pkt_dev->flags &= ~F_SVID_RND; + else if (strcmp(f, "FLOW_SEQ") == 0) + pkt_dev->flags |= F_FLOW_SEQ; + +#ifdef CONFIG_XFRM + else if (strcmp(f, "IPSEC") == 0) + pkt_dev->flags |= F_IPSEC_ON; +#endif + else if (strcmp(f, "!IPV6") == 0) pkt_dev->flags &= ~F_IPV6; @@ -1189,7 +1228,7 @@ static ssize_t pktgen_if_write(struct file *file, "Flag -:%s:- unknown\nAvailable flags, (prepend ! to un-set flag):\n%s", f, "IPSRC_RND, IPDST_RND, UDPSRC_RND, UDPDST_RND, " - "MACSRC_RND, MACDST_RND, TXSIZE_RND, IPV6, MPLS_RND, VID_RND, SVID_RND\n"); + "MACSRC_RND, MACDST_RND, TXSIZE_RND, IPV6, MPLS_RND, VID_RND, SVID_RND, FLOW_SEQ, IPSEC\n"); return count; } sprintf(pg_result, "OK: flags=0x%x", pkt_dev->flags); @@ -1211,7 +1250,7 @@ static ssize_t pktgen_if_write(struct file *file, pkt_dev->cur_daddr = pkt_dev->daddr_min; } if (debug) - printk("pktgen: dst_min set to: %s\n", + printk(KERN_DEBUG "pktgen: dst_min set to: %s\n", pkt_dev->dst_min); i += len; sprintf(pg_result, "OK: dst_min=%s", pkt_dev->dst_min); @@ -1234,7 +1273,7 @@ static ssize_t pktgen_if_write(struct file *file, pkt_dev->cur_daddr = pkt_dev->daddr_max; } if (debug) - printk("pktgen: dst_max set to: %s\n", + printk(KERN_DEBUG "pktgen: dst_max set to: %s\n", pkt_dev->dst_max); i += len; sprintf(pg_result, "OK: dst_max=%s", pkt_dev->dst_max); @@ -1257,7 +1296,7 @@ static ssize_t pktgen_if_write(struct file *file, ipv6_addr_copy(&pkt_dev->cur_in6_daddr, &pkt_dev->in6_daddr); if (debug) - printk("pktgen: dst6 set to: %s\n", buf); + printk(KERN_DEBUG "pktgen: dst6 set to: %s\n", buf); i += len; sprintf(pg_result, "OK: dst6=%s", buf); @@ -1280,7 +1319,7 @@ static ssize_t pktgen_if_write(struct file *file, ipv6_addr_copy(&pkt_dev->cur_in6_daddr, &pkt_dev->min_in6_daddr); if (debug) - printk("pktgen: dst6_min set to: %s\n", buf); + printk(KERN_DEBUG "pktgen: dst6_min set to: %s\n", buf); i += len; sprintf(pg_result, "OK: dst6_min=%s", buf); @@ -1301,7 +1340,7 @@ static ssize_t pktgen_if_write(struct file *file, fmt_ip6(buf, pkt_dev->max_in6_daddr.s6_addr); if (debug) - printk("pktgen: dst6_max set to: %s\n", buf); + printk(KERN_DEBUG "pktgen: dst6_max set to: %s\n", buf); i += len; sprintf(pg_result, "OK: dst6_max=%s", buf); @@ -1324,7 +1363,7 @@ static ssize_t pktgen_if_write(struct file *file, ipv6_addr_copy(&pkt_dev->cur_in6_saddr, &pkt_dev->in6_saddr); if (debug) - printk("pktgen: src6 set to: %s\n", buf); + printk(KERN_DEBUG "pktgen: src6 set to: %s\n", buf); i += len; sprintf(pg_result, "OK: src6=%s", buf); @@ -1345,7 +1384,7 @@ static ssize_t pktgen_if_write(struct file *file, pkt_dev->cur_saddr = pkt_dev->saddr_min; } if (debug) - printk("pktgen: src_min set to: %s\n", + printk(KERN_DEBUG "pktgen: src_min set to: %s\n", pkt_dev->src_min); i += len; sprintf(pg_result, "OK: src_min=%s", pkt_dev->src_min); @@ -1366,7 +1405,7 @@ static ssize_t pktgen_if_write(struct file *file, pkt_dev->cur_saddr = pkt_dev->saddr_max; } if (debug) - printk("pktgen: src_max set to: %s\n", + printk(KERN_DEBUG "pktgen: src_max set to: %s\n", pkt_dev->src_max); i += len; sprintf(pg_result, "OK: src_max=%s", pkt_dev->src_max); @@ -1415,8 +1454,11 @@ static ssize_t pktgen_if_write(struct file *file, } if (!strcmp(name, "src_mac")) { char *v = valstr; + unsigned char old_smac[ETH_ALEN]; unsigned char *m = pkt_dev->src_mac; + memcpy(old_smac, pkt_dev->src_mac, ETH_ALEN); + len = strn_len(&user_buffer[i], sizeof(valstr) - 1); if (len < 0) { return len; @@ -1445,6 +1487,10 @@ static ssize_t pktgen_if_write(struct file *file, } } + /* Set up Src MAC */ + if (compare_ether_addr(old_smac, pkt_dev->src_mac)) + memcpy(&(pkt_dev->hh[6]), pkt_dev->src_mac, ETH_ALEN); + sprintf(pg_result, "OK: srcmac"); return count; } @@ -1496,7 +1542,7 @@ static ssize_t pktgen_if_write(struct file *file, pkt_dev->svlan_id = 0xffff; if (debug) - printk("pktgen: VLAN/SVLAN auto turned off\n"); + printk(KERN_DEBUG "pktgen: VLAN/SVLAN auto turned off\n"); } return count; } @@ -1511,10 +1557,10 @@ static ssize_t pktgen_if_write(struct file *file, pkt_dev->vlan_id = value; /* turn on VLAN */ if (debug) - printk("pktgen: VLAN turned on\n"); + printk(KERN_DEBUG "pktgen: VLAN turned on\n"); if (debug && pkt_dev->nr_labels) - printk("pktgen: MPLS auto turned off\n"); + printk(KERN_DEBUG "pktgen: MPLS auto turned off\n"); pkt_dev->nr_labels = 0; /* turn off MPLS */ sprintf(pg_result, "OK: vlan_id=%u", pkt_dev->vlan_id); @@ -1523,7 +1569,7 @@ static ssize_t pktgen_if_write(struct file *file, pkt_dev->svlan_id = 0xffff; if (debug) - printk("pktgen: VLAN/SVLAN turned off\n"); + printk(KERN_DEBUG "pktgen: VLAN/SVLAN turned off\n"); } return count; } @@ -1568,10 +1614,10 @@ static ssize_t pktgen_if_write(struct file *file, pkt_dev->svlan_id = value; /* turn on SVLAN */ if (debug) - printk("pktgen: SVLAN turned on\n"); + printk(KERN_DEBUG "pktgen: SVLAN turned on\n"); if (debug && pkt_dev->nr_labels) - printk("pktgen: MPLS auto turned off\n"); + printk(KERN_DEBUG "pktgen: MPLS auto turned off\n"); pkt_dev->nr_labels = 0; /* turn off MPLS */ sprintf(pg_result, "OK: svlan_id=%u", pkt_dev->svlan_id); @@ -1580,7 +1626,7 @@ static ssize_t pktgen_if_write(struct file *file, pkt_dev->svlan_id = 0xffff; if (debug) - printk("pktgen: VLAN/SVLAN turned off\n"); + printk(KERN_DEBUG "pktgen: VLAN/SVLAN turned off\n"); } return count; } @@ -1740,10 +1786,11 @@ static ssize_t pktgen_thread_write(struct file *file, i += len; if (debug) - printk("pktgen: t=%s, count=%lu\n", name, (unsigned long)count); + printk(KERN_DEBUG "pktgen: t=%s, count=%lu\n", + name, (unsigned long)count); if (!t) { - printk("pktgen: ERROR: No thread\n"); + printk(KERN_ERR "pktgen: ERROR: No thread\n"); ret = -EINVAL; goto out; } @@ -1854,8 +1901,8 @@ static void pktgen_mark_device(const char *ifname) mutex_lock(&pktgen_thread_lock); if (++i >= max_tries) { - printk("pktgen_mark_device: timed out after waiting " - "%d msec for device %s to be removed\n", + printk(KERN_ERR "pktgen_mark_device: timed out after " + "waiting %d msec for device %s to be removed\n", msec_per_try * i, ifname); break; } @@ -1925,15 +1972,15 @@ static int pktgen_setup_dev(struct pktgen_dev *pkt_dev, const char *ifname) odev = dev_get_by_name(ifname); if (!odev) { - printk("pktgen: no such netdevice: \"%s\"\n", ifname); + printk(KERN_ERR "pktgen: no such netdevice: \"%s\"\n", ifname); return -ENODEV; } if (odev->type != ARPHRD_ETHER) { - printk("pktgen: not an ethernet device: \"%s\"\n", ifname); + printk(KERN_ERR "pktgen: not an ethernet device: \"%s\"\n", ifname); err = -EINVAL; } else if (!netif_running(odev)) { - printk("pktgen: device is down: \"%s\"\n", ifname); + printk(KERN_ERR "pktgen: device is down: \"%s\"\n", ifname); err = -ENETDOWN; } else { pkt_dev->odev = odev; @@ -1950,7 +1997,8 @@ static int pktgen_setup_dev(struct pktgen_dev *pkt_dev, const char *ifname) static void pktgen_setup_inject(struct pktgen_dev *pkt_dev) { if (!pkt_dev->odev) { - printk("pktgen: ERROR: pkt_dev->odev == NULL in setup_inject.\n"); + printk(KERN_ERR "pktgen: ERROR: pkt_dev->odev == NULL in " + "setup_inject.\n"); sprintf(pkt_dev->result, "ERROR: pkt_dev->odev == NULL in setup_inject.\n"); return; @@ -2012,7 +2060,8 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev) } rcu_read_unlock(); if (err) - printk("pktgen: ERROR: IPv6 link address not availble.\n"); + printk(KERN_ERR "pktgen: ERROR: IPv6 link " + "address not availble.\n"); } #endif } else { @@ -2075,6 +2124,69 @@ static void spin(struct pktgen_dev *pkt_dev, __u64 spin_until_us) pkt_dev->idle_acc += now - start; } +static inline void set_pkt_overhead(struct pktgen_dev *pkt_dev) +{ + pkt_dev->pkt_overhead = 0; + pkt_dev->pkt_overhead += pkt_dev->nr_labels*sizeof(u32); + pkt_dev->pkt_overhead += VLAN_TAG_SIZE(pkt_dev); + pkt_dev->pkt_overhead += SVLAN_TAG_SIZE(pkt_dev); +} + +static inline int f_seen(struct pktgen_dev *pkt_dev, int flow) +{ + + if (pkt_dev->flows[flow].flags & F_INIT) + return 1; + else + return 0; +} + +static inline int f_pick(struct pktgen_dev *pkt_dev) +{ + int flow = pkt_dev->curfl; + + if (pkt_dev->flags & F_FLOW_SEQ) { + if (pkt_dev->flows[flow].count >= pkt_dev->lflow) { + /* reset time */ + pkt_dev->flows[flow].count = 0; + pkt_dev->curfl += 1; + if (pkt_dev->curfl >= pkt_dev->cflows) + pkt_dev->curfl = 0; /*reset */ + } + } else { + flow = random32() % pkt_dev->cflows; + + if (pkt_dev->flows[flow].count > pkt_dev->lflow) + pkt_dev->flows[flow].count = 0; + } + + return pkt_dev->curfl; +} + + +#ifdef CONFIG_XFRM +/* If there was already an IPSEC SA, we keep it as is, else + * we go look for it ... +*/ +static void get_ipsec_sa(struct pktgen_dev *pkt_dev, int flow) +{ + struct xfrm_state *x = pkt_dev->flows[flow].x; + if (!x) { + /*slow path: we dont already have xfrm_state*/ + x = xfrm_stateonly_find((xfrm_address_t *)&pkt_dev->cur_daddr, + (xfrm_address_t *)&pkt_dev->cur_saddr, + AF_INET, + pkt_dev->ipsmode, + pkt_dev->ipsproto, 0); + if (x) { + pkt_dev->flows[flow].x = x; + set_pkt_overhead(pkt_dev); + pkt_dev->pkt_overhead+=x->props.header_len; + } + + } +} +#endif /* Increment/randomize headers according to flags and current values * for IP src/dest, UDP src/dst port, MAC-Addr src/dst */ @@ -2084,12 +2196,8 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) __u32 imx; int flow = 0; - if (pkt_dev->cflows) { - flow = random32() % pkt_dev->cflows; - - if (pkt_dev->flows[flow].count > pkt_dev->lflow) - pkt_dev->flows[flow].count = 0; - } + if (pkt_dev->cflows) + flow = f_pick(pkt_dev); /* Deal with source MAC */ if (pkt_dev->src_mac_count > 1) { @@ -2205,7 +2313,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) pkt_dev->cur_saddr = htonl(t); } - if (pkt_dev->cflows && pkt_dev->flows[flow].count != 0) { + if (pkt_dev->cflows && f_seen(pkt_dev, flow)) { pkt_dev->cur_daddr = pkt_dev->flows[flow].cur_daddr; } else { imn = ntohl(pkt_dev->daddr_min); @@ -2235,8 +2343,13 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) } } if (pkt_dev->cflows) { + pkt_dev->flows[flow].flags |= F_INIT; pkt_dev->flows[flow].cur_daddr = pkt_dev->cur_daddr; +#ifdef CONFIG_XFRM + if (pkt_dev->flags & F_IPSEC_ON) + get_ipsec_sa(pkt_dev, flow); +#endif pkt_dev->nflows++; } } @@ -2277,6 +2390,93 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) pkt_dev->flows[flow].count++; } + +#ifdef CONFIG_XFRM +static int pktgen_output_ipsec(struct sk_buff *skb, struct pktgen_dev *pkt_dev) +{ + struct xfrm_state *x = pkt_dev->flows[pkt_dev->curfl].x; + int err = 0; + struct iphdr *iph; + + if (!x) + return 0; + /* XXX: we dont support tunnel mode for now until + * we resolve the dst issue */ + if (x->props.mode != XFRM_MODE_TRANSPORT) + return 0; + + spin_lock(&x->lock); + iph = ip_hdr(skb); + + err = x->mode->output(x, skb); + if (err) + goto error; + err = x->type->output(x, skb); + if (err) + goto error; + + x->curlft.bytes +=skb->len; + x->curlft.packets++; + spin_unlock(&x->lock); + +error: + spin_unlock(&x->lock); + return err; +} + +static inline void free_SAs(struct pktgen_dev *pkt_dev) +{ + if (pkt_dev->cflows) { + /* let go of the SAs if we have them */ + int i = 0; + for (; i < pkt_dev->nflows; i++){ + struct xfrm_state *x = pkt_dev->flows[i].x; + if (x) { + xfrm_state_put(x); + pkt_dev->flows[i].x = NULL; + } + } + } +} + +static inline int process_ipsec(struct pktgen_dev *pkt_dev, + struct sk_buff *skb, __be16 protocol) +{ + if (pkt_dev->flags & F_IPSEC_ON) { + struct xfrm_state *x = pkt_dev->flows[pkt_dev->curfl].x; + int nhead = 0; + if (x) { + int ret; + __u8 *eth; + nhead = x->props.header_len - skb_headroom(skb); + if (nhead >0) { + ret = pskb_expand_head(skb, nhead, 0, GFP_ATOMIC); + if (ret < 0) { + printk(KERN_ERR "Error expanding " + "ipsec packet %d\n",ret); + return 0; + } + } + + /* ipsec is not expecting ll header */ + skb_pull(skb, ETH_HLEN); + ret = pktgen_output_ipsec(skb, pkt_dev); + if (ret) { + printk(KERN_ERR "Error creating ipsec " + "packet %d\n",ret); + kfree_skb(skb); + return 0; + } + /* restore ll */ + eth = (__u8 *) skb_push(skb, ETH_HLEN); + memcpy(eth, pkt_dev->hh, 12); + *(u16 *) & eth[12] = protocol; + } + } + return 1; +} +#endif + static void mpls_push(__be32 *mpls, struct pktgen_dev *pkt_dev) { unsigned i; @@ -2323,9 +2523,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, datalen = (odev->hard_header_len + 16) & ~0xf; skb = alloc_skb(pkt_dev->cur_pkt_size + 64 + datalen + - pkt_dev->nr_labels*sizeof(u32) + - VLAN_TAG_SIZE(pkt_dev) + SVLAN_TAG_SIZE(pkt_dev), - GFP_ATOMIC); + pkt_dev->pkt_overhead, GFP_ATOMIC); if (!skb) { sprintf(pkt_dev->result, "No memory"); return NULL; @@ -2368,7 +2566,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, /* Eth + IPh + UDPh + mpls */ datalen = pkt_dev->cur_pkt_size - 14 - 20 - 8 - - pkt_dev->nr_labels*sizeof(u32) - VLAN_TAG_SIZE(pkt_dev) - SVLAN_TAG_SIZE(pkt_dev); + pkt_dev->pkt_overhead; if (datalen < sizeof(struct pktgen_hdr)) datalen = sizeof(struct pktgen_hdr); @@ -2391,8 +2589,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, iph->check = ip_fast_csum((void *)iph, iph->ihl); skb->protocol = protocol; skb->mac_header = (skb->network_header - ETH_HLEN - - pkt_dev->nr_labels * sizeof(u32) - - VLAN_TAG_SIZE(pkt_dev) - SVLAN_TAG_SIZE(pkt_dev)); + pkt_dev->pkt_overhead); skb->dev = odev; skb->pkt_type = PACKET_HOST; @@ -2463,6 +2660,11 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, pgh->tv_usec = htonl(timestamp.tv_usec); } +#ifdef CONFIG_XFRM + if (!process_ipsec(pkt_dev, skb, protocol)) + return NULL; +#endif + return skb; } @@ -2662,9 +2864,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, mod_cur_headers(pkt_dev); skb = alloc_skb(pkt_dev->cur_pkt_size + 64 + 16 + - pkt_dev->nr_labels*sizeof(u32) + - VLAN_TAG_SIZE(pkt_dev) + SVLAN_TAG_SIZE(pkt_dev), - GFP_ATOMIC); + pkt_dev->pkt_overhead, GFP_ATOMIC); if (!skb) { sprintf(pkt_dev->result, "No memory"); return NULL; @@ -2708,7 +2908,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, /* Eth + IPh + UDPh + mpls */ datalen = pkt_dev->cur_pkt_size - 14 - sizeof(struct ipv6hdr) - sizeof(struct udphdr) - - pkt_dev->nr_labels*sizeof(u32) - VLAN_TAG_SIZE(pkt_dev) - SVLAN_TAG_SIZE(pkt_dev); + pkt_dev->pkt_overhead; if (datalen < sizeof(struct pktgen_hdr)) { datalen = sizeof(struct pktgen_hdr); @@ -2738,8 +2938,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, ipv6_addr_copy(&iph->saddr, &pkt_dev->cur_in6_saddr); skb->mac_header = (skb->network_header - ETH_HLEN - - pkt_dev->nr_labels * sizeof(u32) - - VLAN_TAG_SIZE(pkt_dev) - SVLAN_TAG_SIZE(pkt_dev)); + pkt_dev->pkt_overhead); skb->protocol = protocol; skb->dev = odev; skb->pkt_type = PACKET_HOST; @@ -2857,6 +3056,7 @@ static void pktgen_run(struct pktgen_thread *t) pkt_dev->started_at = getCurUs(); pkt_dev->next_tx_us = getCurUs(); /* Transmit immediately */ pkt_dev->next_tx_ns = 0; + set_pkt_overhead(pkt_dev); strcpy(pkt_dev->result, "Starting"); started++; @@ -2997,8 +3197,8 @@ static int pktgen_stop_device(struct pktgen_dev *pkt_dev) int nr_frags = pkt_dev->skb ? skb_shinfo(pkt_dev->skb)->nr_frags : -1; if (!pkt_dev->running) { - printk("pktgen: interface: %s is already stopped\n", - pkt_dev->odev->name); + printk(KERN_WARNING "pktgen: interface: %s is already " + "stopped\n", pkt_dev->odev->name); return -EINVAL; } @@ -3139,7 +3339,10 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) } } - if (netif_queue_stopped(odev) || need_resched()) { + if ((netif_queue_stopped(odev) || + (pkt_dev->skb && + netif_subqueue_stopped(odev, pkt_dev->skb->queue_mapping))) || + need_resched()) { idle_start = getCurUs(); if (!netif_running(odev)) { @@ -3154,7 +3357,8 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) pkt_dev->idle_acc += getCurUs() - idle_start; - if (netif_queue_stopped(odev)) { + if (netif_queue_stopped(odev) || + netif_subqueue_stopped(odev, pkt_dev->skb->queue_mapping)) { pkt_dev->next_tx_us = getCurUs(); /* TODO */ pkt_dev->next_tx_ns = 0; goto out; /* Try the next interface */ @@ -3170,7 +3374,8 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) pkt_dev->skb = fill_packet(odev, pkt_dev); if (pkt_dev->skb == NULL) { - printk("pktgen: ERROR: couldn't allocate skb in fill_packet.\n"); + printk(KERN_ERR "pktgen: ERROR: couldn't " + "allocate skb in fill_packet.\n"); schedule(); pkt_dev->clone_count--; /* back out increment, OOM */ goto out; @@ -3181,7 +3386,8 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) } netif_tx_lock_bh(odev); - if (!netif_queue_stopped(odev)) { + if (!netif_queue_stopped(odev) && + !netif_subqueue_stopped(odev, pkt_dev->skb->queue_mapping)) { atomic_inc(&(pkt_dev->skb->users)); retry_now: @@ -3266,14 +3472,14 @@ static int pktgen_thread_worker(void *arg) init_waitqueue_head(&t->queue); - t->pid = current->pid; - pr_debug("pktgen: starting pktgen/%d: pid=%d\n", cpu, current->pid); max_before_softirq = t->max_before_softirq; set_current_state(TASK_INTERRUPTIBLE); + set_freezable(); + while (!kthread_should_stop()) { pkt_dev = next_to_run(t); @@ -3372,7 +3578,8 @@ static int add_dev_to_thread(struct pktgen_thread *t, if_lock(t); if (pkt_dev->pg_thread) { - printk("pktgen: ERROR: already assigned to a thread.\n"); + printk(KERN_ERR "pktgen: ERROR: already assigned " + "to a thread.\n"); rv = -EBUSY; goto out; } @@ -3397,7 +3604,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname) pkt_dev = __pktgen_NN_threads(ifname, FIND); if (pkt_dev) { - printk("pktgen: ERROR: interface already used.\n"); + printk(KERN_ERR "pktgen: ERROR: interface already used.\n"); return -EBUSY; } @@ -3439,18 +3646,25 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname) pkt_dev->entry = create_proc_entry(ifname, 0600, pg_proc_dir); if (!pkt_dev->entry) { - printk("pktgen: cannot create %s/%s procfs entry.\n", + printk(KERN_ERR "pktgen: cannot create %s/%s procfs entry.\n", PG_PROC_DIR, ifname); err = -EINVAL; goto out2; } pkt_dev->entry->proc_fops = &pktgen_if_fops; pkt_dev->entry->data = pkt_dev; +#ifdef CONFIG_XFRM + pkt_dev->ipsmode = XFRM_MODE_TRANSPORT; + pkt_dev->ipsproto = IPPROTO_ESP; +#endif return add_dev_to_thread(t, pkt_dev); out2: dev_put(pkt_dev->odev); out1: +#ifdef CONFIG_XFRM + free_SAs(pkt_dev); +#endif if (pkt_dev->flows) vfree(pkt_dev->flows); kfree(pkt_dev); @@ -3465,7 +3679,8 @@ static int __init pktgen_create_thread(int cpu) t = kzalloc(sizeof(struct pktgen_thread), GFP_KERNEL); if (!t) { - printk("pktgen: ERROR: out of memory, can't create new thread.\n"); + printk(KERN_ERR "pktgen: ERROR: out of memory, can't " + "create new thread.\n"); return -ENOMEM; } @@ -3478,7 +3693,8 @@ static int __init pktgen_create_thread(int cpu) p = kthread_create(pktgen_thread_worker, t, "kpktgend_%d", cpu); if (IS_ERR(p)) { - printk("pktgen: kernel_thread() failed for cpu %d\n", t->cpu); + printk(KERN_ERR "pktgen: kernel_thread() failed " + "for cpu %d\n", t->cpu); list_del(&t->th_list); kfree(t); return PTR_ERR(p); @@ -3488,7 +3704,7 @@ static int __init pktgen_create_thread(int cpu) pe = create_proc_entry(t->tsk->comm, 0600, pg_proc_dir); if (!pe) { - printk("pktgen: cannot create %s/%s procfs entry.\n", + printk(KERN_ERR "pktgen: cannot create %s/%s procfs entry.\n", PG_PROC_DIR, t->tsk->comm); kthread_stop(p); list_del(&t->th_list); @@ -3527,7 +3743,8 @@ static int pktgen_remove_device(struct pktgen_thread *t, pr_debug("pktgen: remove_device pkt_dev=%p\n", pkt_dev); if (pkt_dev->running) { - printk("pktgen:WARNING: trying to remove a running interface, stopping it now.\n"); + printk(KERN_WARNING "pktgen: WARNING: trying to remove a " + "running interface, stopping it now.\n"); pktgen_stop_device(pkt_dev); } @@ -3545,6 +3762,9 @@ static int pktgen_remove_device(struct pktgen_thread *t, if (pkt_dev->entry) remove_proc_entry(pkt_dev->entry->name, pg_proc_dir); +#ifdef CONFIG_XFRM + free_SAs(pkt_dev); +#endif if (pkt_dev->flows) vfree(pkt_dev->flows); kfree(pkt_dev); @@ -3556,7 +3776,7 @@ static int __init pg_init(void) int cpu; struct proc_dir_entry *pe; - printk(version); + printk(KERN_INFO "%s", version); pg_proc_dir = proc_mkdir(PG_PROC_DIR, proc_net); if (!pg_proc_dir) @@ -3565,8 +3785,8 @@ static int __init pg_init(void) pe = create_proc_entry(PGCTRL, 0600, pg_proc_dir); if (pe == NULL) { - printk("pktgen: ERROR: cannot create %s procfs entry.\n", - PGCTRL); + printk(KERN_ERR "pktgen: ERROR: cannot create %s " + "procfs entry.\n", PGCTRL); proc_net_remove(PG_PROC_DIR); return -EINVAL; } @@ -3582,12 +3802,13 @@ static int __init pg_init(void) err = pktgen_create_thread(cpu); if (err) - printk("pktgen: WARNING: Cannot create thread for cpu %d (%d)\n", - cpu, err); + printk(KERN_WARNING "pktgen: WARNING: Cannot create " + "thread for cpu %d (%d)\n", cpu, err); } if (list_empty(&pktgen_threads)) { - printk("pktgen: ERROR: Initialization failed for all threads\n"); + printk(KERN_ERR "pktgen: ERROR: Initialization failed for " + "all threads\n"); unregister_netdevice_notifier(&pktgen_notifier_block); remove_proc_entry(PGCTRL, pg_proc_dir); proc_net_remove(PG_PROC_DIR); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 02e8bf084277..4756d5857abf 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -97,6 +97,19 @@ int rtattr_parse(struct rtattr *tb[], int maxattr, struct rtattr *rta, int len) return 0; } +int __rtattr_parse_nested_compat(struct rtattr *tb[], int maxattr, + struct rtattr *rta, int len) +{ + if (RTA_PAYLOAD(rta) < len) + return -1; + if (RTA_PAYLOAD(rta) >= RTA_ALIGN(len) + sizeof(struct rtattr)) { + rta = RTA_DATA(rta) + RTA_ALIGN(len); + return rtattr_parse_nested(tb, maxattr, rta); + } + memset(tb, 0, sizeof(struct rtattr *) * maxattr); + return 0; +} + static struct rtnl_link *rtnl_msg_handlers[NPROTO]; static inline int rtm_msgindex(int msgtype) @@ -243,6 +256,150 @@ void rtnl_unregister_all(int protocol) EXPORT_SYMBOL_GPL(rtnl_unregister_all); +static LIST_HEAD(link_ops); + +/** + * __rtnl_link_register - Register rtnl_link_ops with rtnetlink. + * @ops: struct rtnl_link_ops * to register + * + * The caller must hold the rtnl_mutex. This function should be used + * by drivers that create devices during module initialization. It + * must be called before registering the devices. + * + * Returns 0 on success or a negative error code. + */ +int __rtnl_link_register(struct rtnl_link_ops *ops) +{ + if (!ops->dellink) + ops->dellink = unregister_netdevice; + + list_add_tail(&ops->list, &link_ops); + return 0; +} + +EXPORT_SYMBOL_GPL(__rtnl_link_register); + +/** + * rtnl_link_register - Register rtnl_link_ops with rtnetlink. + * @ops: struct rtnl_link_ops * to register + * + * Returns 0 on success or a negative error code. + */ +int rtnl_link_register(struct rtnl_link_ops *ops) +{ + int err; + + rtnl_lock(); + err = __rtnl_link_register(ops); + rtnl_unlock(); + return err; +} + +EXPORT_SYMBOL_GPL(rtnl_link_register); + +/** + * __rtnl_link_unregister - Unregister rtnl_link_ops from rtnetlink. + * @ops: struct rtnl_link_ops * to unregister + * + * The caller must hold the rtnl_mutex. + */ +void __rtnl_link_unregister(struct rtnl_link_ops *ops) +{ + struct net_device *dev, *n; + + for_each_netdev_safe(dev, n) { + if (dev->rtnl_link_ops == ops) + ops->dellink(dev); + } + list_del(&ops->list); +} + +EXPORT_SYMBOL_GPL(__rtnl_link_unregister); + +/** + * rtnl_link_unregister - Unregister rtnl_link_ops from rtnetlink. + * @ops: struct rtnl_link_ops * to unregister + */ +void rtnl_link_unregister(struct rtnl_link_ops *ops) +{ + rtnl_lock(); + __rtnl_link_unregister(ops); + rtnl_unlock(); +} + +EXPORT_SYMBOL_GPL(rtnl_link_unregister); + +static const struct rtnl_link_ops *rtnl_link_ops_get(const char *kind) +{ + const struct rtnl_link_ops *ops; + + list_for_each_entry(ops, &link_ops, list) { + if (!strcmp(ops->kind, kind)) + return ops; + } + return NULL; +} + +static size_t rtnl_link_get_size(const struct net_device *dev) +{ + const struct rtnl_link_ops *ops = dev->rtnl_link_ops; + size_t size; + + if (!ops) + return 0; + + size = nlmsg_total_size(sizeof(struct nlattr)) + /* IFLA_LINKINFO */ + nlmsg_total_size(strlen(ops->kind) + 1); /* IFLA_INFO_KIND */ + + if (ops->get_size) + /* IFLA_INFO_DATA + nested data */ + size += nlmsg_total_size(sizeof(struct nlattr)) + + ops->get_size(dev); + + if (ops->get_xstats_size) + size += ops->get_xstats_size(dev); /* IFLA_INFO_XSTATS */ + + return size; +} + +static int rtnl_link_fill(struct sk_buff *skb, const struct net_device *dev) +{ + const struct rtnl_link_ops *ops = dev->rtnl_link_ops; + struct nlattr *linkinfo, *data; + int err = -EMSGSIZE; + + linkinfo = nla_nest_start(skb, IFLA_LINKINFO); + if (linkinfo == NULL) + goto out; + + if (nla_put_string(skb, IFLA_INFO_KIND, ops->kind) < 0) + goto err_cancel_link; + if (ops->fill_xstats) { + err = ops->fill_xstats(skb, dev); + if (err < 0) + goto err_cancel_link; + } + if (ops->fill_info) { + data = nla_nest_start(skb, IFLA_INFO_DATA); + if (data == NULL) + goto err_cancel_link; + err = ops->fill_info(skb, dev); + if (err < 0) + goto err_cancel_data; + nla_nest_end(skb, data); + } + + nla_nest_end(skb, linkinfo); + return 0; + +err_cancel_data: + nla_nest_cancel(skb, data); +err_cancel_link: + nla_nest_cancel(skb, linkinfo); +out: + return err; +} + static const int rtm_min[RTM_NR_FAMILIES] = { [RTM_FAM(RTM_NEWLINK)] = NLMSG_LENGTH(sizeof(struct ifinfomsg)), @@ -437,7 +594,7 @@ static void copy_rtnl_link_stats(struct rtnl_link_stats *a, a->tx_compressed = b->tx_compressed; }; -static inline size_t if_nlmsg_size(void) +static inline size_t if_nlmsg_size(const struct net_device *dev) { return NLMSG_ALIGN(sizeof(struct ifinfomsg)) + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */ @@ -452,7 +609,8 @@ static inline size_t if_nlmsg_size(void) + nla_total_size(4) /* IFLA_LINK */ + nla_total_size(4) /* IFLA_MASTER */ + nla_total_size(1) /* IFLA_OPERSTATE */ - + nla_total_size(1); /* IFLA_LINKMODE */ + + nla_total_size(1) /* IFLA_LINKMODE */ + + rtnl_link_get_size(dev); /* IFLA_LINKINFO */ } static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, @@ -522,6 +680,11 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, } } + if (dev->rtnl_link_ops) { + if (rtnl_link_fill(skb, dev) < 0) + goto nla_put_failure; + } + return nlmsg_end(skb, nlh); nla_put_failure: @@ -553,6 +716,8 @@ cont: static const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_IFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ-1 }, + [IFLA_ADDRESS] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN }, + [IFLA_BROADCAST] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN }, [IFLA_MAP] = { .len = sizeof(struct rtnl_link_ifmap) }, [IFLA_MTU] = { .type = NLA_U32 }, [IFLA_TXQLEN] = { .type = NLA_U32 }, @@ -561,44 +726,16 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_LINKMODE] = { .type = NLA_U8 }, }; -static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) -{ - struct ifinfomsg *ifm; - struct net_device *dev; - int err, send_addr_notify = 0, modified = 0; - struct nlattr *tb[IFLA_MAX+1]; - char ifname[IFNAMSIZ]; - - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy); - if (err < 0) - goto errout; - - if (tb[IFLA_IFNAME]) - nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); - else - ifname[0] = '\0'; - - err = -EINVAL; - ifm = nlmsg_data(nlh); - if (ifm->ifi_index > 0) - dev = dev_get_by_index(ifm->ifi_index); - else if (tb[IFLA_IFNAME]) - dev = dev_get_by_name(ifname); - else - goto errout; - - if (dev == NULL) { - err = -ENODEV; - goto errout; - } - - if (tb[IFLA_ADDRESS] && - nla_len(tb[IFLA_ADDRESS]) < dev->addr_len) - goto errout_dev; +static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { + [IFLA_INFO_KIND] = { .type = NLA_STRING }, + [IFLA_INFO_DATA] = { .type = NLA_NESTED }, +}; - if (tb[IFLA_BROADCAST] && - nla_len(tb[IFLA_BROADCAST]) < dev->addr_len) - goto errout_dev; +static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, + struct nlattr **tb, char *ifname, int modified) +{ + int send_addr_notify = 0; + int err; if (tb[IFLA_MAP]) { struct rtnl_link_ifmap *u_map; @@ -606,12 +743,12 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) if (!dev->set_config) { err = -EOPNOTSUPP; - goto errout_dev; + goto errout; } if (!netif_device_present(dev)) { err = -ENODEV; - goto errout_dev; + goto errout; } u_map = nla_data(tb[IFLA_MAP]); @@ -624,7 +761,7 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) err = dev->set_config(dev, &k_map); if (err < 0) - goto errout_dev; + goto errout; modified = 1; } @@ -635,19 +772,19 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) if (!dev->set_mac_address) { err = -EOPNOTSUPP; - goto errout_dev; + goto errout; } if (!netif_device_present(dev)) { err = -ENODEV; - goto errout_dev; + goto errout; } len = sizeof(sa_family_t) + dev->addr_len; sa = kmalloc(len, GFP_KERNEL); if (!sa) { err = -ENOMEM; - goto errout_dev; + goto errout; } sa->sa_family = dev->type; memcpy(sa->sa_data, nla_data(tb[IFLA_ADDRESS]), @@ -655,7 +792,7 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) err = dev->set_mac_address(dev, sa); kfree(sa); if (err) - goto errout_dev; + goto errout; send_addr_notify = 1; modified = 1; } @@ -663,7 +800,7 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) if (tb[IFLA_MTU]) { err = dev_set_mtu(dev, nla_get_u32(tb[IFLA_MTU])); if (err < 0) - goto errout_dev; + goto errout; modified = 1; } @@ -675,7 +812,7 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) if (ifm->ifi_index > 0 && ifname[0]) { err = dev_change_name(dev, ifname); if (err < 0) - goto errout_dev; + goto errout; modified = 1; } @@ -684,7 +821,6 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) send_addr_notify = 1; } - if (ifm->ifi_flags || ifm->ifi_change) { unsigned int flags = ifm->ifi_flags; @@ -712,7 +848,7 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) err = 0; -errout_dev: +errout: if (err < 0 && modified && net_ratelimit()) printk(KERN_WARNING "A link change request failed with " "some changes comitted already. Interface %s may " @@ -721,12 +857,241 @@ errout_dev: if (send_addr_notify) call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); + return err; +} + +static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +{ + struct ifinfomsg *ifm; + struct net_device *dev; + int err; + struct nlattr *tb[IFLA_MAX+1]; + char ifname[IFNAMSIZ]; + + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy); + if (err < 0) + goto errout; + + if (tb[IFLA_IFNAME]) + nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); + else + ifname[0] = '\0'; + + err = -EINVAL; + ifm = nlmsg_data(nlh); + if (ifm->ifi_index > 0) + dev = dev_get_by_index(ifm->ifi_index); + else if (tb[IFLA_IFNAME]) + dev = dev_get_by_name(ifname); + else + goto errout; + + if (dev == NULL) { + err = -ENODEV; + goto errout; + } + if (tb[IFLA_ADDRESS] && + nla_len(tb[IFLA_ADDRESS]) < dev->addr_len) + goto errout_dev; + + if (tb[IFLA_BROADCAST] && + nla_len(tb[IFLA_BROADCAST]) < dev->addr_len) + goto errout_dev; + + err = do_setlink(dev, ifm, tb, ifname, 0); +errout_dev: dev_put(dev); errout: return err; } +static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +{ + const struct rtnl_link_ops *ops; + struct net_device *dev; + struct ifinfomsg *ifm; + char ifname[IFNAMSIZ]; + struct nlattr *tb[IFLA_MAX+1]; + int err; + + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy); + if (err < 0) + return err; + + if (tb[IFLA_IFNAME]) + nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); + + ifm = nlmsg_data(nlh); + if (ifm->ifi_index > 0) + dev = __dev_get_by_index(ifm->ifi_index); + else if (tb[IFLA_IFNAME]) + dev = __dev_get_by_name(ifname); + else + return -EINVAL; + + if (!dev) + return -ENODEV; + + ops = dev->rtnl_link_ops; + if (!ops) + return -EOPNOTSUPP; + + ops->dellink(dev); + return 0; +} + +static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +{ + const struct rtnl_link_ops *ops; + struct net_device *dev; + struct ifinfomsg *ifm; + char kind[MODULE_NAME_LEN]; + char ifname[IFNAMSIZ]; + struct nlattr *tb[IFLA_MAX+1]; + struct nlattr *linkinfo[IFLA_INFO_MAX+1]; + int err; + +#ifdef CONFIG_KMOD +replay: +#endif + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy); + if (err < 0) + return err; + + if (tb[IFLA_IFNAME]) + nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); + else + ifname[0] = '\0'; + + ifm = nlmsg_data(nlh); + if (ifm->ifi_index > 0) + dev = __dev_get_by_index(ifm->ifi_index); + else if (ifname[0]) + dev = __dev_get_by_name(ifname); + else + dev = NULL; + + if (tb[IFLA_LINKINFO]) { + err = nla_parse_nested(linkinfo, IFLA_INFO_MAX, + tb[IFLA_LINKINFO], ifla_info_policy); + if (err < 0) + return err; + } else + memset(linkinfo, 0, sizeof(linkinfo)); + + if (linkinfo[IFLA_INFO_KIND]) { + nla_strlcpy(kind, linkinfo[IFLA_INFO_KIND], sizeof(kind)); + ops = rtnl_link_ops_get(kind); + } else { + kind[0] = '\0'; + ops = NULL; + } + + if (1) { + struct nlattr *attr[ops ? ops->maxtype + 1 : 0], **data = NULL; + + if (ops) { + if (ops->maxtype && linkinfo[IFLA_INFO_DATA]) { + err = nla_parse_nested(attr, ops->maxtype, + linkinfo[IFLA_INFO_DATA], + ops->policy); + if (err < 0) + return err; + data = attr; + } + if (ops->validate) { + err = ops->validate(tb, data); + if (err < 0) + return err; + } + } + + if (dev) { + int modified = 0; + + if (nlh->nlmsg_flags & NLM_F_EXCL) + return -EEXIST; + if (nlh->nlmsg_flags & NLM_F_REPLACE) + return -EOPNOTSUPP; + + if (linkinfo[IFLA_INFO_DATA]) { + if (!ops || ops != dev->rtnl_link_ops || + !ops->changelink) + return -EOPNOTSUPP; + + err = ops->changelink(dev, tb, data); + if (err < 0) + return err; + modified = 1; + } + + return do_setlink(dev, ifm, tb, ifname, modified); + } + + if (!(nlh->nlmsg_flags & NLM_F_CREATE)) + return -ENODEV; + + if (ifm->ifi_index || ifm->ifi_flags || ifm->ifi_change) + return -EOPNOTSUPP; + if (tb[IFLA_MAP] || tb[IFLA_MASTER] || tb[IFLA_PROTINFO]) + return -EOPNOTSUPP; + + if (!ops) { +#ifdef CONFIG_KMOD + if (kind[0]) { + __rtnl_unlock(); + request_module("rtnl-link-%s", kind); + rtnl_lock(); + ops = rtnl_link_ops_get(kind); + if (ops) + goto replay; + } +#endif + return -EOPNOTSUPP; + } + + if (!ifname[0]) + snprintf(ifname, IFNAMSIZ, "%s%%d", ops->kind); + dev = alloc_netdev(ops->priv_size, ifname, ops->setup); + if (!dev) + return -ENOMEM; + + if (strchr(dev->name, '%')) { + err = dev_alloc_name(dev, dev->name); + if (err < 0) + goto err_free; + } + dev->rtnl_link_ops = ops; + + if (tb[IFLA_MTU]) + dev->mtu = nla_get_u32(tb[IFLA_MTU]); + if (tb[IFLA_ADDRESS]) + memcpy(dev->dev_addr, nla_data(tb[IFLA_ADDRESS]), + nla_len(tb[IFLA_ADDRESS])); + if (tb[IFLA_BROADCAST]) + memcpy(dev->broadcast, nla_data(tb[IFLA_BROADCAST]), + nla_len(tb[IFLA_BROADCAST])); + if (tb[IFLA_TXQLEN]) + dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]); + if (tb[IFLA_WEIGHT]) + dev->weight = nla_get_u32(tb[IFLA_WEIGHT]); + if (tb[IFLA_OPERSTATE]) + set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE])); + if (tb[IFLA_LINKMODE]) + dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]); + + if (ops->newlink) + err = ops->newlink(dev, tb, data); + else + err = register_netdevice(dev); +err_free: + if (err < 0) + free_netdev(dev); + return err; + } +} + static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) { struct ifinfomsg *ifm; @@ -747,7 +1112,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) } else return -EINVAL; - nskb = nlmsg_new(if_nlmsg_size(), GFP_KERNEL); + nskb = nlmsg_new(if_nlmsg_size(dev), GFP_KERNEL); if (nskb == NULL) { err = -ENOBUFS; goto errout; @@ -797,7 +1162,7 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change) struct sk_buff *skb; int err = -ENOBUFS; - skb = nlmsg_new(if_nlmsg_size(), GFP_KERNEL); + skb = nlmsg_new(if_nlmsg_size(dev), GFP_KERNEL); if (skb == NULL) goto errout; @@ -952,6 +1317,8 @@ void __init rtnetlink_init(void) rtnl_register(PF_UNSPEC, RTM_GETLINK, rtnl_getlink, rtnl_dump_ifinfo); rtnl_register(PF_UNSPEC, RTM_SETLINK, rtnl_setlink, NULL); + rtnl_register(PF_UNSPEC, RTM_NEWLINK, rtnl_newlink, NULL); + rtnl_register(PF_UNSPEC, RTM_DELLINK, rtnl_dellink, NULL); rtnl_register(PF_UNSPEC, RTM_GETADDR, NULL, rtnl_dump_all); rtnl_register(PF_UNSPEC, RTM_GETROUTE, NULL, rtnl_dump_all); @@ -960,6 +1327,7 @@ void __init rtnetlink_init(void) EXPORT_SYMBOL(__rta_fill); EXPORT_SYMBOL(rtattr_strlcpy); EXPORT_SYMBOL(rtattr_parse); +EXPORT_SYMBOL(__rtattr_parse_nested_compat); EXPORT_SYMBOL(rtnetlink_put_metrics); EXPORT_SYMBOL(rtnl_lock); EXPORT_SYMBOL(rtnl_trylock); diff --git a/net/core/scm.c b/net/core/scm.c index 292ad8d5ad76..44c4ec2c8769 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -228,7 +228,8 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm) err = security_file_receive(fp[i]); if (err) break; - err = get_unused_fd(); + err = get_unused_fd_flags(MSG_CMSG_CLOEXEC & msg->msg_flags + ? O_CLOEXEC : 0); if (err < 0) break; new_fd = err; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 3943c3ad9145..35021eb3ed07 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -415,9 +415,11 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) C(csum); C(local_df); n->cloned = 1; + n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len; n->nohdr = 0; C(pkt_type); C(ip_summed); + skb_copy_queue_mapping(n, skb); C(priority); #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) C(ipvs_property); @@ -426,6 +428,10 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) n->destructor = NULL; C(mark); __nf_copy(n, skb); +#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ + defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) + C(nf_trace); +#endif #ifdef CONFIG_NET_SCHED C(tc_index); #ifdef CONFIG_NET_CLS_ACT @@ -459,6 +465,7 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) #endif new->sk = NULL; new->dev = old->dev; + skb_copy_queue_mapping(new, old); new->priority = old->priority; new->protocol = old->protocol; new->dst = dst_clone(old->dst); @@ -482,6 +489,10 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->destructor = NULL; new->mark = old->mark; __nf_copy(new, old); +#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ + defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) + new->nf_trace = old->nf_trace; +#endif #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) new->ipvs_property = old->ipvs_property; #endif @@ -676,6 +687,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, skb->network_header += off; skb->mac_header += off; skb->cloned = 0; + skb->hdr_len = 0; skb->nohdr = 0; atomic_set(&skb_shinfo(skb)->dataref, 1); return 0; @@ -1930,6 +1942,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features) tail = nskb; nskb->dev = skb->dev; + skb_copy_queue_mapping(nskb, skb); nskb->priority = skb->priority; nskb->protocol = skb->protocol; nskb->dst = dst_clone(skb->dst); @@ -2008,13 +2021,13 @@ void __init skb_init(void) sizeof(struct sk_buff), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, - NULL, NULL); + NULL); skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache", (2*sizeof(struct sk_buff)) + sizeof(atomic_t), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, - NULL, NULL); + NULL); } /** diff --git a/net/core/sock.c b/net/core/sock.c index c14ce0198d25..190de61cd648 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -171,6 +171,20 @@ static const char *af_family_slock_key_strings[AF_MAX+1] = { "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" , "slock-AF_RXRPC" , "slock-AF_MAX" }; +static const char *af_family_clock_key_strings[AF_MAX+1] = { + "clock-AF_UNSPEC", "clock-AF_UNIX" , "clock-AF_INET" , + "clock-AF_AX25" , "clock-AF_IPX" , "clock-AF_APPLETALK", + "clock-AF_NETROM", "clock-AF_BRIDGE" , "clock-AF_ATMPVC" , + "clock-AF_X25" , "clock-AF_INET6" , "clock-AF_ROSE" , + "clock-AF_DECnet", "clock-AF_NETBEUI" , "clock-AF_SECURITY" , + "clock-AF_KEY" , "clock-AF_NETLINK" , "clock-AF_PACKET" , + "clock-AF_ASH" , "clock-AF_ECONET" , "clock-AF_ATMSVC" , + "clock-21" , "clock-AF_SNA" , "clock-AF_IRDA" , + "clock-AF_PPPOX" , "clock-AF_WANPIPE" , "clock-AF_LLC" , + "clock-27" , "clock-28" , "clock-29" , + "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" , + "clock-AF_RXRPC" , "clock-AF_MAX" +}; #endif /* @@ -210,13 +224,14 @@ static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen) return -EDOM; if (tv.tv_sec < 0) { - static int warned = 0; + static int warned __read_mostly; + *timeo_p = 0; if (warned < 10 && net_ratelimit()) warned++; printk(KERN_INFO "sock_set_timeout: `%s' (pid %d) " "tries to set negative timeout\n", - current->comm, current->pid); + current->comm, current->pid); return 0; } *timeo_p = MAX_SCHEDULE_TIMEOUT; @@ -347,6 +362,61 @@ struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie) } EXPORT_SYMBOL(sk_dst_check); +static int sock_bindtodevice(struct sock *sk, char __user *optval, int optlen) +{ + int ret = -ENOPROTOOPT; +#ifdef CONFIG_NETDEVICES + char devname[IFNAMSIZ]; + int index; + + /* Sorry... */ + ret = -EPERM; + if (!capable(CAP_NET_RAW)) + goto out; + + ret = -EINVAL; + if (optlen < 0) + goto out; + + /* Bind this socket to a particular device like "eth0", + * as specified in the passed interface name. If the + * name is "" or the option length is zero the socket + * is not bound. + */ + if (optlen > IFNAMSIZ - 1) + optlen = IFNAMSIZ - 1; + memset(devname, 0, sizeof(devname)); + + ret = -EFAULT; + if (copy_from_user(devname, optval, optlen)) + goto out; + + if (devname[0] == '\0') { + index = 0; + } else { + struct net_device *dev = dev_get_by_name(devname); + + ret = -ENODEV; + if (!dev) + goto out; + + index = dev->ifindex; + dev_put(dev); + } + + lock_sock(sk); + sk->sk_bound_dev_if = index; + sk_dst_reset(sk); + release_sock(sk); + + ret = 0; + +out: +#endif + + return ret; +} + /* * This is meant for all protocols to use and covers goings on * at the socket level. Everything here is generic. @@ -375,6 +445,9 @@ int sock_setsockopt(struct socket *sock, int level, int optname, } #endif + if (optname == SO_BINDTODEVICE) + return sock_bindtodevice(sk, optval, optlen); + if (optlen < sizeof(int)) return -EINVAL; @@ -563,54 +636,6 @@ set_rcvbuf: ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen); break; -#ifdef CONFIG_NETDEVICES - case SO_BINDTODEVICE: - { - char devname[IFNAMSIZ]; - - /* Sorry... */ - if (!capable(CAP_NET_RAW)) { - ret = -EPERM; - break; - } - - /* Bind this socket to a particular device like "eth0", - * as specified in the passed interface name. If the - * name is "" or the option length is zero the socket - * is not bound. - */ - - if (!valbool) { - sk->sk_bound_dev_if = 0; - } else { - if (optlen > IFNAMSIZ - 1) - optlen = IFNAMSIZ - 1; - memset(devname, 0, sizeof(devname)); - if (copy_from_user(devname, optval, optlen)) { - ret = -EFAULT; - break; - } - - /* Remove any cached route for this socket. */ - sk_dst_reset(sk); - - if (devname[0] == '\0') { - sk->sk_bound_dev_if = 0; - } else { - struct net_device *dev = dev_get_by_name(devname); - if (!dev) { - ret = -ENODEV; - break; - } - sk->sk_bound_dev_if = dev->ifindex; - dev_put(dev); - } - } - break; - } -#endif - - case SO_ATTACH_FILTER: ret = -EINVAL; if (optlen == sizeof(struct sock_fprog)) { @@ -940,8 +965,9 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority) rwlock_init(&newsk->sk_dst_lock); rwlock_init(&newsk->sk_callback_lock); - lockdep_set_class(&newsk->sk_callback_lock, - af_callback_keys + newsk->sk_family); + lockdep_set_class_and_name(&newsk->sk_callback_lock, + af_callback_keys + newsk->sk_family, + af_family_clock_key_strings[newsk->sk_family]); newsk->sk_dst_cache = NULL; newsk->sk_wmem_queued = 0; @@ -1529,8 +1555,9 @@ void sock_init_data(struct socket *sock, struct sock *sk) rwlock_init(&sk->sk_dst_lock); rwlock_init(&sk->sk_callback_lock); - lockdep_set_class(&sk->sk_callback_lock, - af_callback_keys + sk->sk_family); + lockdep_set_class_and_name(&sk->sk_callback_lock, + af_callback_keys + sk->sk_family, + af_family_clock_key_strings[sk->sk_family]); sk->sk_state_change = sock_def_wakeup; sk->sk_data_ready = sock_def_readable; @@ -1751,7 +1778,7 @@ int proto_register(struct proto *prot, int alloc_slab) if (alloc_slab) { prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0, - SLAB_HWCACHE_ALIGN, NULL, NULL); + SLAB_HWCACHE_ALIGN, NULL); if (prot->slab == NULL) { printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n", @@ -1769,7 +1796,7 @@ int proto_register(struct proto *prot, int alloc_slab) sprintf(request_sock_slab_name, mask, prot->name); prot->rsk_prot->slab = kmem_cache_create(request_sock_slab_name, prot->rsk_prot->obj_size, 0, - SLAB_HWCACHE_ALIGN, NULL, NULL); + SLAB_HWCACHE_ALIGN, NULL); if (prot->rsk_prot->slab == NULL) { printk(KERN_CRIT "%s: Can't create request sock SLAB cache!\n", @@ -1791,7 +1818,7 @@ int proto_register(struct proto *prot, int alloc_slab) kmem_cache_create(timewait_sock_slab_name, prot->twsk_prot->twsk_obj_size, 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); + NULL); if (prot->twsk_prot->twsk_slab == NULL) goto out_free_timewait_sock_slab_name; } @@ -1851,46 +1878,15 @@ void proto_unregister(struct proto *prot) EXPORT_SYMBOL(proto_unregister); #ifdef CONFIG_PROC_FS -static inline struct proto *__proto_head(void) -{ - return list_entry(proto_list.next, struct proto, node); -} - -static inline struct proto *proto_head(void) -{ - return list_empty(&proto_list) ? NULL : __proto_head(); -} - -static inline struct proto *proto_next(struct proto *proto) -{ - return proto->node.next == &proto_list ? NULL : - list_entry(proto->node.next, struct proto, node); -} - -static inline struct proto *proto_get_idx(loff_t pos) -{ - struct proto *proto; - loff_t i = 0; - - list_for_each_entry(proto, &proto_list, node) - if (i++ == pos) - goto out; - - proto = NULL; -out: - return proto; -} - static void *proto_seq_start(struct seq_file *seq, loff_t *pos) { read_lock(&proto_list_lock); - return *pos ? proto_get_idx(*pos - 1) : SEQ_START_TOKEN; + return seq_list_start_head(&proto_list, *pos); } static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - ++*pos; - return v == SEQ_START_TOKEN ? proto_head() : proto_next(v); + return seq_list_next(v, &proto_list, pos); } static void proto_seq_stop(struct seq_file *seq, void *v) @@ -1938,7 +1934,7 @@ static void proto_seq_printf(struct seq_file *seq, struct proto *proto) static int proto_seq_show(struct seq_file *seq, void *v) { - if (v == SEQ_START_TOKEN) + if (v == &proto_list) seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s", "protocol", "size", @@ -1950,7 +1946,7 @@ static int proto_seq_show(struct seq_file *seq, void *v) "module", "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n"); else - proto_seq_printf(seq, v); + proto_seq_printf(seq, list_entry(v, struct proto, node)); return 0; } diff --git a/net/core/utils.c b/net/core/utils.c index 2030bb8c2d30..0bf17da40d52 100644 --- a/net/core/utils.c +++ b/net/core/utils.c @@ -25,6 +25,7 @@ #include <linux/random.h> #include <linux/percpu.h> #include <linux/init.h> +#include <net/sock.h> #include <asm/byteorder.h> #include <asm/system.h> |