1 /*
2 * NET3 Protocol independent device support routines.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Derived from the non IP parts of dev.c 1.0.19
10 * Authors: Ross Biro
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Mark Evans, <evansmp@uhura.aston.ac.uk>
13 *
14 * Additional Authors:
15 * Florian la Roche <rzsfl@rz.uni-sb.de>
16 * Alan Cox <gw4pts@gw4pts.ampr.org>
17 * David Hinds <dahinds@users.sourceforge.net>
18 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
19 * Adam Sulmicki <adam@cfar.umd.edu>
20 * Pekka Riikonen <priikone@poesidon.pspt.fi>
21 *
22 * Changes:
23 * D.J. Barrow : Fixed bug where dev->refcnt gets set
24 * to 2 if register_netdev gets called
25 * before net_dev_init & also removed a
26 * few lines of code in the process.
27 * Alan Cox : device private ioctl copies fields back.
28 * Alan Cox : Transmit queue code does relevant
29 * stunts to keep the queue safe.
30 * Alan Cox : Fixed double lock.
31 * Alan Cox : Fixed promisc NULL pointer trap
32 * ???????? : Support the full private ioctl range
33 * Alan Cox : Moved ioctl permission check into
34 * drivers
35 * Tim Kordas : SIOCADDMULTI/SIOCDELMULTI
36 * Alan Cox : 100 backlog just doesn't cut it when
37 * you start doing multicast video 8)
38 * Alan Cox : Rewrote net_bh and list manager.
39 * Alan Cox : Fix ETH_P_ALL echoback lengths.
40 * Alan Cox : Took out transmit every packet pass
41 * Saved a few bytes in the ioctl handler
42 * Alan Cox : Network driver sets packet type before
43 * calling netif_rx. Saves a function
44 * call a packet.
45 * Alan Cox : Hashed net_bh()
46 * Richard Kooijman: Timestamp fixes.
47 * Alan Cox : Wrong field in SIOCGIFDSTADDR
48 * Alan Cox : Device lock protection.
49 * Alan Cox : Fixed nasty side effect of device close
50 * changes.
51 * Rudi Cilibrasi : Pass the right thing to
52 * set_mac_address()
53 * Dave Miller : 32bit quantity for the device lock to
54 * make it work out on a Sparc.
55 * Bjorn Ekwall : Added KERNELD hack.
56 * Alan Cox : Cleaned up the backlog initialise.
57 * Craig Metz : SIOCGIFCONF fix if space for under
58 * 1 device.
59 * Thomas Bogendoerfer : Return ENODEV for dev_open, if there
60 * is no device open function.
61 * Andi Kleen : Fix error reporting for SIOCGIFCONF
62 * Michael Chastain : Fix signed/unsigned for SIOCGIFCONF
63 * Cyrus Durgin : Cleaned for KMOD
64 * Adam Sulmicki : Bug Fix : Network Device Unload
65 * A network device unload needs to purge
66 * the backlog queue.
67 * Paul Rusty Russell : SIOCSIFNAME
68 * Pekka Riikonen : Netdev boot-time settings code
69 * Andrew Morton : Make unregister_netdevice wait
70 * indefinitely on dev->refcnt
71 * J Hadi Salim : - Backlog queue sampling
72 * - netif_rx() feedback
73 */
74
75 #include <asm/uaccess.h>
76 #include <asm/system.h>
77 #include <linux/bitops.h>
78 #include <linux/capability.h>
79 #include <linux/cpu.h>
80 #include <linux/types.h>
81 #include <linux/kernel.h>
82 #include <linux/sched.h>
83 #include <linux/mutex.h>
84 #include <linux/string.h>
85 #include <linux/mm.h>
86 #include <linux/socket.h>
87 #include <linux/sockios.h>
88 #include <linux/errno.h>
89 #include <linux/interrupt.h>
90 #include <linux/if_ether.h>
91 #include <linux/netdevice.h>
92 #include <linux/etherdevice.h>
93 #include <linux/notifier.h>
94 #include <linux/skbuff.h>
95 #include <net/net_namespace.h>
96 #include <net/sock.h>
97 #include <linux/rtnetlink.h>
98 #include <linux/proc_fs.h>
99 #include <linux/seq_file.h>
100 #include <linux/stat.h>
101 #include <linux/if_bridge.h>
102 #include <linux/if_macvlan.h>
103 #include <net/dst.h>
104 #include <net/pkt_sched.h>
105 #include <net/checksum.h>
106 #include <linux/highmem.h>
107 #include <linux/init.h>
108 #include <linux/kmod.h>
109 #include <linux/module.h>
110 #include <linux/kallsyms.h>
111 #include <linux/netpoll.h>
112 #include <linux/rcupdate.h>
113 #include <linux/delay.h>
114 #include <net/wext.h>
115 #include <net/iw_handler.h>
116 #include <asm/current.h>
117 #include <linux/audit.h>
118 #include <linux/dmaengine.h>
119 #include <linux/err.h>
120 #include <linux/ctype.h>
121 #include <linux/if_arp.h>
122
123 #include "net-sysfs.h"
124
125 /*
126 * The list of packet types we will receive (as opposed to discard)
127 * and the routines to invoke.
128 *
129 * Why 16. Because with 16 the only overlap we get on a hash of the
130 * low nibble of the protocol value is RARP/SNAP/X.25.
131 *
132 * NOTE: That is no longer true with the addition of VLAN tags. Not
133 * sure which should go first, but I bet it won't make much
134 * difference if we are running VLANs. The good news is that
135 * this protocol won't be in the list unless compiled in, so
136 * the average user (w/out VLANs) will not be adversely affected.
137 * --BLG
138 *
139 * 0800 IP
140 * 8100 802.1Q VLAN
141 * 0001 802.3
142 * 0002 AX.25
143 * 0004 802.2
144 * 8035 RARP
145 * 0005 SNAP
146 * 0805 X.25
147 * 0806 ARP
148 * 8137 IPX
149 * 0009 Localtalk
150 * 86DD IPv6
151 */
152
153 #define PTYPE_HASH_SIZE (16)
154 #define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1)
155
156 static DEFINE_SPINLOCK(ptype_lock);
157 static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
158 static struct list_head ptype_all __read_mostly; /* Taps */
159
160 #ifdef CONFIG_NET_DMA
161 struct net_dma {
162 struct dma_client client;
163 spinlock_t lock;
164 cpumask_t channel_mask;
165 struct dma_chan *channels[NR_CPUS];
166 };
167
168 static enum dma_state_client
169 netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
170 enum dma_state state);
171
172 static struct net_dma net_dma = {
173 .client = {
174 .event_callback = netdev_dma_event,
175 },
176 };
177 #endif
178
179 /*
180 * The @dev_base_head list is protected by @dev_base_lock and the rtnl
181 * semaphore.
182 *
183 * Pure readers hold dev_base_lock for reading.
184 *
185 * Writers must hold the rtnl semaphore while they loop through the
186 * dev_base_head list, and hold dev_base_lock for writing when they do the
187 * actual updates. This allows pure readers to access the list even
188 * while a writer is preparing to update it.
189 *
190 * To put it another way, dev_base_lock is held for writing only to
191 * protect against pure readers; the rtnl semaphore provides the
192 * protection against other writers.
193 *
194 * See, for example usages, register_netdevice() and
195 * unregister_netdevice(), which must be called with the rtnl
196 * semaphore held.
197 */
198 DEFINE_RWLOCK(dev_base_lock);
199
200 EXPORT_SYMBOL(dev_base_lock);
201
202 #define NETDEV_HASHBITS 8
203 #define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS)
204
205 static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
206 {
207 unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
208 return &net->dev_name_head[hash & ((1 << NETDEV_HASHBITS) - 1)];
209 }
210
211 static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
212 {
213 return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)];
214 }
215
216 /* Device list insertion */
217 static int list_netdevice(struct net_device *dev)
218 {
219 struct net *net = dev->nd_net;
220
221 ASSERT_RTNL();
222
223 write_lock_bh(&dev_base_lock);
224 list_add_tail(&dev->dev_list, &net->dev_base_head);
225 hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
226 hlist_add_head(&dev->index_hlist, dev_index_hash(net, dev->ifindex));
227 write_unlock_bh(&dev_base_lock);
228 return 0;
229 }
230
231 /* Device list removal */
232 static void unlist_netdevice(struct net_device *dev)
233 {
234 ASSERT_RTNL();
235
236 /* Unlink dev from the device chain */
237 write_lock_bh(&dev_base_lock);
238 list_del(&dev->dev_list);
239 hlist_del(&dev->name_hlist);
240 hlist_del(&dev->index_hlist);
241 write_unlock_bh(&dev_base_lock);
242 }
243
244 /*
245 * Our notifier list
246 */
247
248 static RAW_NOTIFIER_HEAD(netdev_chain);
249
250 /*
251 * Device drivers call our routines to queue packets here. We empty the
252 * queue in the local softnet handler.
253 */
254
255 DEFINE_PER_CPU(struct softnet_data, softnet_data);
256
257 #ifdef CONFIG_DEBUG_LOCK_ALLOC
258 /*
259 * register_netdevice() inits dev->_xmit_lock and sets lockdep class
260 * according to dev->type
261 */
262 static const unsigned short netdev_lock_type[] =
263 {ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
264 ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
265 ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
266 ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
267 ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
268 ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
269 ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
270 ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
271 ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
272 ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
273 ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
274 ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
275 ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211,
276 ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_VOID,
277 ARPHRD_NONE};
278
279 static const char *netdev_lock_name[] =
280 {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
281 "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
282 "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
283 "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
284 "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
285 "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
286 "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
287 "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
288 "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
289 "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
290 "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
291 "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
292 "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211",
293 "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_VOID",
294 "_xmit_NONE"};
295
296 static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
297
298 static inline unsigned short netdev_lock_pos(unsigned short dev_type)
299 {
300 int i;
301
302 for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
303 if (netdev_lock_type[i] == dev_type)
304 return i;
305 /* the last key is used by default */
306 return ARRAY_SIZE(netdev_lock_type) - 1;
307 }
308
309 static inline void netdev_set_lockdep_class(spinlock_t *lock,
310 unsigned short dev_type)
311 {
312 int i;
313
314 i = netdev_lock_pos(dev_type);
315 lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
316 netdev_lock_name[i]);
317 }
318 #else
319 static inline void netdev_set_lockdep_class(spinlock_t *lock,
320 unsigned short dev_type)
321 {
322 }
323 #endif
324
325 /*******************************************************************************
326
327 Protocol management and registration routines
328
329 *******************************************************************************/
330
331 /*
332 * Add a protocol ID to the list. Now that the input handler is
333 * smarter we can dispense with all the messy stuff that used to be
334 * here.
335 *
336 * BEWARE!!! Protocol handlers, mangling input packets,
337 * MUST BE last in hash buckets and checking protocol handlers
338 * MUST start from promiscuous ptype_all chain in net_bh.
339 * It is true now, do not change it.
340 * Explanation follows: if protocol handler, mangling packet, will
341 * be the first on list, it is not able to sense, that packet
342 * is cloned and should be copied-on-write, so that it will
343 * change it and subsequent readers will get broken packet.
344 * --ANK (980803)
345 */
346
347 /**
348 * dev_add_pack - add packet handler
349 * @pt: packet type declaration
350 *
351 * Add a protocol handler to the networking stack. The passed &packet_type
352 * is linked into kernel lists and may not be freed until it has been
353 * removed from the kernel lists.
354 *
355 * This call does not sleep therefore it can not
356 * guarantee all CPU's that are in middle of receiving packets
357 * will see the new packet type (until the next received packet).
358 */
359
360 void dev_add_pack(struct packet_type *pt)
361 {
362 int hash;
363
364 spin_lock_bh(&ptype_lock);
365 if (pt->type == htons(ETH_P_ALL))
366 list_add_rcu(&pt->list, &ptype_all);
367 else {
368 hash = ntohs(pt->type) & PTYPE_HASH_MASK;
369 list_add_rcu(&pt->list, &ptype_base[hash]);
370 }
371 spin_unlock_bh(&ptype_lock);
372 }
373
374 /**
375 * __dev_remove_pack - remove packet handler
376 * @pt: packet type declaration
377 *
378 * Remove a protocol handler that was previously added to the kernel
379 * protocol handlers by dev_add_pack(). The passed &packet_type is removed
380 * from the kernel lists and can be freed or reused once this function
381 * returns.
382 *
383 * The packet type might still be in use by receivers
384 * and must not be freed until after all the CPU's have gone
385 * through a quiescent state.
386 */
387 void __dev_remove_pack(struct packet_type *pt)
388 {
389 struct list_head *head;
390 struct packet_type *pt1;
391
392 spin_lock_bh(&ptype_lock);
393
394 if (pt->type == htons(ETH_P_ALL))
395 head = &ptype_all;
396 else
397 head = &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
398
399 list_for_each_entry(pt1, head, list) {
400 if (pt == pt1) {
401 list_del_rcu(&pt->list);
402 goto out;
403 }
404 }
405
406 printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
407 out:
408 spin_unlock_bh(&ptype_lock);
409 }
410 /**
411 * dev_remove_pack - remove packet handler
412 * @pt: packet type declaration
413 *
414 * Remove a protocol handler that was previously added to the kernel
415 * protocol handlers by dev_add_pack(). The passed &packet_type is removed
416 * from the kernel lists and can be freed or reused once this function
417 * returns.
418 *
419 * This call sleeps to guarantee that no CPU is looking at the packet
420 * type after return.
421 */
422 void dev_remove_pack(struct packet_type *pt)
423 {
424 __dev_remove_pack(pt);
425
426 synchronize_net();
427 }
428
429 /******************************************************************************
430
431 Device Boot-time Settings Routines
432
433 *******************************************************************************/
434
435 /* Boot time configuration table */
436 static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
437
438 /**
439 * netdev_boot_setup_add - add new setup entry
440 * @name: name of the device
441 * @map: configured settings for the device
442 *
443 * Adds new setup entry to the dev_boot_setup list. The function
444 * returns 0 on error and 1 on success. This is a generic routine to
445 * all netdevices.
446 */
447 static int netdev_boot_setup_add(char *name, struct ifmap *map)
448 {
449 struct netdev_boot_setup *s;
450 int i;
451
452 s = dev_boot_setup;
453 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
454 if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
455 memset(s[i].name, 0, sizeof(s[i].name));
456 strcpy(s[i].name, name);
457 memcpy(&s[i].map, map, sizeof(s[i].map));
458 break;
459 }
460 }
461
462 return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
463 }
464
465 /**
466 * netdev_boot_setup_check - check boot time settings
467 * @dev: the netdevice
468 *
469 * Check boot time settings for the device.
470 * The found settings are set for the device to be used
471 * later in the device probing.
472 * Returns 0 if no settings found, 1 if they are.
473 */
474 int netdev_boot_setup_check(struct net_device *dev)
475 {
476 struct netdev_boot_setup *s = dev_boot_setup;
477 int i;
478
479 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
480 if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
481 !strncmp(dev->name, s[i].name, strlen(s[i].name))) {
482 dev->irq = s[i].map.irq;
483 dev->base_addr = s[i].map.base_addr;
484 dev->mem_start = s[i].map.mem_start;
485 dev->mem_end = s[i].map.mem_end;
486 return 1;
487 }
488 }
489 return 0;
490 }
491
492
493 /**
494 * netdev_boot_base - get address from boot time settings
495 * @prefix: prefix for network device
496 * @unit: id for network device
497 *
498 * Check boot time settings for the base address of device.
499 * The found settings are set for the device to be used
500 * later in the device probing.
501 * Returns 0 if no settings found.
502 */
503 unsigned long netdev_boot_base(const char *prefix, int unit)
504 {
505 const struct netdev_boot_setup *s = dev_boot_setup;
506 char name[IFNAMSIZ];
507 int i;
508
509 sprintf(name, "%s%d", prefix, unit);
510
511 /*
512 * If device already registered then return base of 1
513 * to indicate not to probe for this interface
514 */
515 if (__dev_get_by_name(&init_net, name))
516 return 1;
517
518 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
519 if (!strcmp(name, s[i].name))
520 return s[i].map.base_addr;
521 return 0;
522 }
523
524 /*
525 * Saves at boot time configured settings for any netdevice.
526 */
527 int __init netdev_boot_setup(char *str)
528 {
529 int ints[5];
530 struct ifmap map;
531
532 str = get_options(str, ARRAY_SIZE(ints), ints);
533 if (!str || !*str)
534 return 0;
535
536 /* Save settings */
537 memset(&map, 0, sizeof(map));
538 if (ints[0] > 0)
539 map.irq = ints[1];
540 if (ints[0] > 1)
541 map.base_addr = ints[2];
542 if (ints[0] > 2)
543 map.mem_start = ints[3];
544 if (ints[0] > 3)
545 map.mem_end = ints[4];
546
547 /* Add new entry to the list */
548 return netdev_boot_setup_add(str, &map);
549 }
550
551 __setup("netdev=", netdev_boot_setup);
552
553 /*******************************************************************************
554
555 Device Interface Subroutines
556
557 *******************************************************************************/
558
559 /**
560 * __dev_get_by_name - find a device by its name
561 * @net: the applicable net namespace
562 * @name: name to find
563 *
564 * Find an interface by name. Must be called under RTNL semaphore
565 * or @dev_base_lock. If the name is found a pointer to the device
566 * is returned. If the name is not found then %NULL is returned. The
567 * reference counters are not incremented so the caller must be
568 * careful with locks.
569 */
570
571 struct net_device *__dev_get_by_name(struct net *net, const char *name)
572 {
573 struct hlist_node *p;
574
575 hlist_for_each(p, dev_name_hash(net, name)) {
576 struct net_device *dev
577 = hlist_entry(p, struct net_device, name_hlist);
578 if (!strncmp(dev->name, name, IFNAMSIZ))
579 return dev;
580 }
581 return NULL;
582 }
583
584 /**
585 * dev_get_by_name - find a device by its name
586 * @net: the applicable net namespace
587 * @name: name to find
588 *
589 * Find an interface by name. This can be called from any
590 * context and does its own locking. The returned handle has
591 * the usage count incremented and the caller must use dev_put() to
592 * release it when it is no longer needed. %NULL is returned if no
593 * matching device is found.
594 */
595
596 struct net_device *dev_get_by_name(struct net *net, const char *name)
597 {
598 struct net_device *dev;
599
600 read_lock(&dev_base_lock);
601 dev = __dev_get_by_name(net, name);
602 if (dev)
603 dev_hold(dev);
604 read_unlock(&dev_base_lock);
605 return dev;
606 }
607
608 /**
609 * __dev_get_by_index - find a device by its ifindex
610 * @net: the applicable net namespace
611 * @ifindex: index of device
612 *
613 * Search for an interface by index. Returns %NULL if the device
614 * is not found or a pointer to the device. The device has not
615 * had its reference counter increased so the caller must be careful
616 * about locking. The caller must hold either the RTNL semaphore
617 * or @dev_base_lock.
618 */
619
620 struct net_device *__dev_get_by_index(struct net *net, int ifindex)
621 {
622 struct hlist_node *p;
623
624 hlist_for_each(p, dev_index_hash(net, ifindex)) {
625 struct net_device *dev
626 = hlist_entry(p, struct net_device, index_hlist);
627 if (dev->ifindex == ifindex)
628 return dev;
629 }
630 return NULL;
631 }
632
633
634 /**
635 * dev_get_by_index - find a device by its ifindex
636 * @net: the applicable net namespace
637 * @ifindex: index of device
638 *
639 * Search for an interface by index. Returns NULL if the device
640 * is not found or a pointer to the device. The device returned has
641 * had a reference added and the pointer is safe until the user calls
642 * dev_put to indicate they have finished with it.
643 */
644
645 struct net_device *dev_get_by_index(struct net *net, int ifindex)
646 {
647 struct net_device *dev;
648
649 read_lock(&dev_base_lock);
650 dev = __dev_get_by_index(net, ifindex);
651 if (dev)
652 dev_hold(dev);
653 read_unlock(&dev_base_lock);
654 return dev;
655 }
656
657 /**
658 * dev_getbyhwaddr - find a device by its hardware address
659 * @net: the applicable net namespace
660 * @type: media type of device
661 * @ha: hardware address
662 *
663 * Search for an interface by MAC address. Returns NULL if the device
664 * is not found or a pointer to the device. The caller must hold the
665 * rtnl semaphore. The returned device has not had its ref count increased
666 * and the caller must therefore be careful about locking
667 *
668 * BUGS:
669 * If the API was consistent this would be __dev_get_by_hwaddr
670 */
671
672 struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *ha)
673 {
674 struct net_device *dev;
675
676 ASSERT_RTNL();
677
678 for_each_netdev(net, dev)
679 if (dev->type == type &&
680 !memcmp(dev->dev_addr, ha, dev->addr_len))
681 return dev;
682
683 return NULL;
684 }
685
686 EXPORT_SYMBOL(dev_getbyhwaddr);
687
688 struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
689 {
690 struct net_device *dev;
691
692 ASSERT_RTNL();
693 for_each_netdev(net, dev)
694 if (dev->type == type)
695 return dev;
696
697 return NULL;
698 }
699
700 EXPORT_SYMBOL(__dev_getfirstbyhwtype);
701
702 struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
703 {
704 struct net_device *dev;
705
706 rtnl_lock();
707 dev = __dev_getfirstbyhwtype(net, type);
708 if (dev)
709 dev_hold(dev);
710 rtnl_unlock();
711 return dev;
712 }
713
714 EXPORT_SYMBOL(dev_getfirstbyhwtype);
715
716 /**
717 * dev_get_by_flags - find any device with given flags
718 * @net: the applicable net namespace
719 * @if_flags: IFF_* values
720 * @mask: bitmask of bits in if_flags to check
721 *
722 * Search for any interface with the given flags. Returns NULL if a device
723 * is not found or a pointer to the device. The device returned has
724 * had a reference added and the pointer is safe until the user calls
725 * dev_put to indicate they have finished with it.
726 */
727
728 struct net_device * dev_get_by_flags(struct net *net, unsigned short if_flags, unsigned short mask)
729 {
730 struct net_device *dev, *ret;
731
732 ret = NULL;
733 read_lock(&dev_base_lock);
734 for_each_netdev(net, dev) {
735 if (((dev->flags ^ if_flags) & mask) == 0) {
736 dev_hold(dev);
737 ret = dev;
738 break;
739 }
740 }
741 read_unlock(&dev_base_lock);
742 return ret;
743 }
744
745 /**
746 * dev_valid_name - check if name is okay for network device
747 * @name: name string
748 *
749 * Network device names need to be valid file names to
750 * to allow sysfs to work. We also disallow any kind of
751 * whitespace.
752 */
753 int dev_valid_name(const char *name)
754 {
755 if (*name == '\0')
756 return 0;
757 if (strlen(name) >= IFNAMSIZ)
758 return 0;
759 if (!strcmp(name, ".") || !strcmp(name, ".."))
760 return 0;
761
762 while (*name) {
763 if (*name == '/' || isspace(*name))
764 return 0;
765 name++;
766 }
767 return 1;
768 }
769
770 /**
771 * __dev_alloc_name - allocate a name for a device
772 * @net: network namespace to allocate the device name in
773 * @name: name format string
774 * @buf: scratch buffer and result name string
775 *
776 * Passed a format string - eg "lt%d" it will try and find a suitable
777 * id. It scans list of devices to build up a free map, then chooses
778 * the first empty slot. The caller must hold the dev_base or rtnl lock
779 * while allocating the name and adding the device in order to avoid
780 * duplicates.
781 * Limited to bits_per_byte * page size devices (ie 32K on most platforms).
782 * Returns the number of the unit assigned or a negative errno code.
783 */
784
785 static int __dev_alloc_name(struct net *net, const char *name, char *buf)
786 {
787 int i = 0;
788 const char *p;
789 const int max_netdevices = 8*PAGE_SIZE;
790 unsigned long *inuse;
791 struct net_device *d;
792
793 p = strnchr(name, IFNAMSIZ-1, '%');
794 if (p) {
795 /*
796 * Verify the string as this thing may have come from
797 * the user. There must be either one "%d" and no other "%"
798 * characters.
799 */
800 if (p[1] != 'd' || strchr(p + 2, '%'))
801 return -EINVAL;
802
803 /* Use one page as a bit array of possible slots */
804 inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
805 if (!inuse)
806 return -ENOMEM;
807
808 for_each_netdev(net, d) {
809 if (!sscanf(d->name, name, &i))
810 continue;
811 if (i < 0 || i >= max_netdevices)
812 continue;
813
814 /* avoid cases where sscanf is not exact inverse of printf */
815 snprintf(buf, IFNAMSIZ, name, i);
816 if (!strncmp(buf, d->name, IFNAMSIZ))
817 set_bit(i, inuse);
818 }
819
820 i = find_first_zero_bit(inuse, max_netdevices);
821 free_page((unsigned long) inuse);
822 }
823
824 snprintf(buf, IFNAMSIZ, name, i);
825 if (!__dev_get_by_name(net, buf))
826 return i;
827
828 /* It is possible to run out of possible slots
829 * when the name is long and there isn't enough space left
830 * for the digits, or if all bits are used.
831 */
832 return -ENFILE;
833 }
834
835 /**
836 * dev_alloc_name - allocate a name for a device
837 * @dev: device
838 * @name: name format string
839 *
840 * Passed a format string - eg "lt%d" it will try and find a suitable
841 * id. It scans list of devices to build up a free map, then chooses
842 * the first empty slot. The caller must hold the dev_base or rtnl lock
843 * while allocating the name and adding the device in order to avoid
844 * duplicates.
845 * Limited to bits_per_byte * page size devices (ie 32K on most platforms).
846 * Returns the number of the unit assigned or a negative errno code.
847 */
848
849 int dev_alloc_name(struct net_device *dev, const char *name)
850 {
851 char buf[IFNAMSIZ];
852 struct net *net;
853 int ret;
854
855 BUG_ON(!dev->nd_net);
856 net = dev->nd_net;
857 ret = __dev_alloc_name(net, name, buf);
858 if (ret >= 0)
859 strlcpy(dev->name, buf, IFNAMSIZ);
860 return ret;
861 }
862
863
864 /**
865 * dev_change_name - change name of a device
866 * @dev: device
867 * @newname: name (or format string) must be at least IFNAMSIZ
868 *
869 * Change name of a device, can pass format strings "eth%d".
870 * for wildcarding.
871 */
872 int dev_change_name(struct net_device *dev, char *newname)
873 {
874 char oldname[IFNAMSIZ];
875 int err = 0;
876 int ret;
877 struct net *net;
878
879 ASSERT_RTNL();
880 BUG_ON(!dev->nd_net);
881
882 net = dev->nd_net;
883 if (dev->flags & IFF_UP)
884 return -EBUSY;
885
886 if (!dev_valid_name(newname))
887 return -EINVAL;
888
889 if (strncmp(newname, dev->name, IFNAMSIZ) == 0)
890 return 0;
891
892 memcpy(oldname, dev->name, IFNAMSIZ);
893
894 if (strchr(newname, '%')) {
895 err = dev_alloc_name(dev, newname);
896 if (err < 0)
897 return err;
898 strcpy(newname, dev->name);
899 }
900 else if (__dev_get_by_name(net, newname))
901 return -EEXIST;
902 else
903 strlcpy(dev->name, newname, IFNAMSIZ);
904
905 rollback:
906 device_rename(&dev->dev, dev->name);
907
908 write_lock_bh(&dev_base_lock);
909 hlist_del(&dev->name_hlist);
910 hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
911 write_unlock_bh(&dev_base_lock);
912
913 ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
914 ret = notifier_to_errno(ret);
915
916 if (ret) {
917 if (err) {
918 printk(KERN_ERR
919 "%s: name change rollback failed: %d.\n",
920 dev->name, ret);
921 } else {
922 err = ret;
923 memcpy(dev->name, oldname, IFNAMSIZ);
924 goto rollback;
925 }
926 }
927
928 return err;
929 }
930
931 /**
932 * netdev_features_change - device changes features
933 * @dev: device to cause notification
934 *
935 * Called to indicate a device has changed features.
936 */
937 void netdev_features_change(struct net_device *dev)
938 {
939 call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);
940 }
941 EXPORT_SYMBOL(netdev_features_change);
942
943 /**
944 * netdev_state_change - device changes state
945 * @dev: device to cause notification
946 *
947 * Called to indicate a device has changed state. This function calls
948 * the notifier chains for netdev_chain and sends a NEWLINK message
949 * to the routing socket.
950 */
951 void netdev_state_change(struct net_device *dev)
952 {
953 if (dev->flags & IFF_UP) {
954 call_netdevice_notifiers(NETDEV_CHANGE, dev);
955 rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
956 }
957 }
958
959 /**
960 * dev_load - load a network module
961 * @net: the applicable net namespace
962 * @name: name of interface
963 *
964 * If a network interface is not present and the process has suitable
965 * privileges this function loads the module. If module loading is not
966 * available in this kernel then it becomes a nop.
967 */
968
969 void dev_load(struct net *net, const char *name)
970 {
971 struct net_device *dev;
972
973 read_lock(&dev_base_lock);
974 dev = __dev_get_by_name(net, name);
975 read_unlock(&dev_base_lock);
976
977 if (!dev && capable(CAP_SYS_MODULE))
978 request_module("%s", name);
979 }
980
981 /**
982 * dev_open - prepare an interface for use.
983 * @dev: device to open
984 *
985 * Takes a device from down to up state. The device's private open
986 * function is invoked and then the multicast lists are loaded. Finally
987 * the device is moved into the up state and a %NETDEV_UP message is
988 * sent to the netdev notifier chain.
989 *
990 * Calling this function on an active interface is a nop. On a failure
991 * a negative errno code is returned.
992 */
993 int dev_open(struct net_device *dev)
994 {
995 int ret = 0;
996
997 /*
998 * Is it already up?
999 */
1000
1001 if (dev->flags & IFF_UP)
1002 return 0;
1003
1004 /*
1005 * Is it even present?
1006 */
1007 if (!netif_device_present(dev))
1008 return -ENODEV;
1009
1010 /*
1011 * Call device private open method
1012 */
1013 set_bit(__LINK_STATE_START, &dev->state);
1014
1015 if (dev->validate_addr)
1016 ret = dev->validate_addr(dev);
1017
1018 if (!ret && dev->open)
1019 ret = dev->open(dev);
1020
1021 /*
1022 * If it went open OK then:
1023 */
1024
1025 if (ret)
1026 clear_bit(__LINK_STATE_START, &dev->state);
1027 else {
1028 /*
1029 * Set the flags.
1030 */
1031 dev->flags |= IFF_UP;
1032
1033 /*
1034 * Initialize multicasting status
1035 */
1036 dev_set_rx_mode(dev);
1037
1038 /*
1039 * Wakeup transmit queue engine
1040 */
1041 dev_activate(dev);
1042
1043 /*
1044 * ... and announce new interface.
1045 */
1046 call_netdevice_notifiers(NETDEV_UP, dev);
1047 }
1048
1049 return ret;
1050 }
1051
1052 /**
1053 * dev_close - shutdown an interface.
1054 * @dev: device to shutdown
1055 *
1056 * This function moves an active device into down state. A
1057 * %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
1058 * is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
1059 * chain.
1060 */
1061 int dev_close(struct net_device *dev)
1062 {
1063 might_sleep();
1064
1065 if (!(dev->flags & IFF_UP))
1066 return 0;
1067
1068 /*
1069 * Tell people we are going down, so that they can
1070 * prepare to death, when device is still operating.
1071 */
1072 call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
1073
1074 clear_bit(__LINK_STATE_START, &dev->state);
1075
1076 /* Synchronize to scheduled poll. We cannot touch poll list,
1077 * it can be even on different cpu. So just clear netif_running().
1078 *
1079 * dev->stop() will invoke napi_disable() on all of it's
1080 * napi_struct instances on this device.
1081 */
1082 smp_mb__after_clear_bit(); /* Commit netif_running(). */
1083
1084 dev_deactivate(dev);
1085
1086 /*
1087 * Call the device specific close. This cannot fail.
1088 * Only if device is UP
1089 *
1090 * We allow it to be called even after a DETACH hot-plug
1091 * event.
1092 */
1093 if (dev->stop)
1094 dev->stop(dev);
1095
1096 /*
1097 * Device is now down.
1098 */
1099
1100 dev->flags &= ~IFF_UP;
1101
1102 /*
1103 * Tell people we are down
1104 */
1105 call_netdevice_notifiers(NETDEV_DOWN, dev);
1106
1107 return 0;
1108 }
1109
1110
1111 static int dev_boot_phase = 1;
1112
1113 /*
1114 * Device change register/unregister. These are not inline or static
1115 * as we export them to the world.
1116 */
1117
1118 /**
1119 * register_netdevice_notifier - register a network notifier block
1120 * @nb: notifier
1121 *
1122 * Register a notifier to be called when network device events occur.
1123 * The notifier passed is linked into the kernel structures and must
1124 * not be reused until it has been unregistered. A negative errno code
1125 * is returned on a failure.
1126 *
1127 * When registered all registration and up events are replayed
1128 * to the new notifier to allow device to have a race free
1129 * view of the network device list.
1130 */
1131
1132 int register_netdevice_notifier(struct notifier_block *nb)
1133 {
1134 struct net_device *dev;
1135 struct net_device *last;
1136 struct net *net;
1137 int err;
1138
1139 rtnl_lock();
1140 err = raw_notifier_chain_register(&netdev_chain, nb);
1141 if (err)
1142 goto unlock;
1143 if (dev_boot_phase)
1144 goto unlock;
1145 for_each_net(net) {
1146 for_each_netdev(net, dev) {
1147 err = nb->notifier_call(nb, NETDEV_REGISTER, dev);
1148 err = notifier_to_errno(err);
1149 if (err)
1150 goto rollback;
1151
1152 if (!(dev->flags & IFF_UP))
1153 continue;
1154
1155 nb->notifier_call(nb, NETDEV_UP, dev);
1156 }
1157 }
1158
1159 unlock:
1160 rtnl_unlock();
1161 return err;
1162
1163 rollback:
1164 last = dev;
1165 for_each_net(net) {
1166 for_each_netdev(net, dev) {
1167 if (dev == last)
1168 break;
1169
1170 if (dev->flags & IFF_UP) {
1171 nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
1172 nb->notifier_call(nb, NETDEV_DOWN, dev);
1173 }
1174 nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
1175 }
1176 }
1177
1178 raw_notifier_chain_unregister(&netdev_chain, nb);
1179 goto unlock;
1180 }
1181
1182 /**
1183 * unregister_netdevice_notifier - unregister a network notifier block
1184 * @nb: notifier
1185 *
1186 * Unregister a notifier previously registered by
1187 * register_netdevice_notifier(). The notifier is unlinked into the
1188 * kernel structures and may then be reused. A negative errno code
1189 * is returned on a failure.
1190 */
1191
1192 int unregister_netdevice_notifier(struct notifier_block *nb)
1193 {
1194 int err;
1195
1196 rtnl_lock();
1197 err = raw_notifier_chain_unregister(&netdev_chain, nb);
1198 rtnl_unlock();
1199 return err;
1200 }
1201
1202 /**
1203 * call_netdevice_notifiers - call all network notifier blocks
1204 * @val: value passed unmodified to notifier function
1205 * @dev: net_device pointer passed unmodified to notifier function
1206 *
1207 * Call all network notifier blocks. Parameters and return value
1208 * are as for raw_notifier_call_chain().
1209 */
1210
1211 int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
1212 {
1213 return raw_notifier_call_chain(&netdev_chain, val, dev);
1214 }
1215
1216 /* When > 0 there are consumers of rx skb time stamps */
1217 static atomic_t netstamp_needed = ATOMIC_INIT(0);
1218
1219 void net_enable_timestamp(void)
1220 {
1221 atomic_inc(&netstamp_needed);
1222 }
1223
1224 void net_disable_timestamp(void)
1225 {
1226 atomic_dec(&netstamp_needed);
1227 }
1228
1229 static inline void net_timestamp(struct sk_buff *skb)
1230 {
1231 if (atomic_read(&netstamp_needed))
1232 __net_timestamp(skb);
1233 else
1234 skb->tstamp.tv64 = 0;
1235 }
1236
1237 /*
1238 * Support routine. Sends outgoing frames to any network
1239 * taps currently in use.
1240 */
1241
1242 static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1243 {
1244 struct packet_type *ptype;
1245
1246 net_timestamp(skb);
1247
1248 rcu_read_lock();
1249 list_for_each_entry_rcu(ptype, &ptype_all, list) {
1250 /* Never send packets back to the socket
1251 * they originated from - MvS (miquels@drinkel.ow.org)
1252 */
1253 if ((ptype->dev == dev || !ptype->dev) &&
1254 (ptype->af_packet_priv == NULL ||
1255 (struct sock *)ptype->af_packet_priv != skb->sk)) {
1256 struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC);
1257 if (!skb2)
1258 break;
1259
1260 /* skb->nh should be correctly
1261 set by sender, so that the second statement is
1262 just protection against buggy protocols.
1263 */
1264 skb_reset_mac_header(skb2);
1265
1266 if (skb_network_header(skb2) < skb2->data ||
1267 skb2->network_header > skb2->tail) {
1268 if (net_ratelimit())
1269 printk(KERN_CRIT "protocol %04x is "
1270 "buggy, dev %s\n",
1271 skb2->protocol, dev->name);
1272 skb_reset_network_header(skb2);
1273 }
1274
1275 skb2->transport_header = skb2->network_header;
1276 skb2->pkt_type = PACKET_OUTGOING;
1277 ptype->func(skb2, skb->dev, ptype, skb->dev);
1278 }
1279 }
1280 rcu_read_unlock();
1281 }
1282
1283
1284 void __netif_schedule(struct net_device *dev)
1285 {
1286 if (!test_and_set_bit(__LINK_STATE_SCHED, &dev->state)) {
1287 unsigned long flags;
1288 struct softnet_data *sd;
1289
1290 local_irq_save(flags);
1291 sd = &__get_cpu_var(softnet_data);
1292 dev->next_sched = sd->output_queue;
1293 sd->output_queue = dev;
1294 raise_softirq_irqoff(NET_TX_SOFTIRQ);
1295 local_irq_restore(flags);
1296 }
1297 }
1298 EXPORT_SYMBOL(__netif_schedule);
1299
1300 void dev_kfree_skb_irq(struct sk_buff *skb)
1301 {
1302 if (atomic_dec_and_test(&skb->users)) {
1303 struct softnet_data *sd;
1304 unsigned long flags;
1305
1306 local_irq_save(flags);
1307 sd = &__get_cpu_var(softnet_data);
1308 skb->next = sd->completion_queue;
1309 sd->completion_queue = skb;
1310 raise_softirq_irqoff(NET_TX_SOFTIRQ);
1311 local_irq_restore(flags);
1312 }
1313 }
1314 EXPORT_SYMBOL(dev_kfree_skb_irq);
1315
1316 void dev_kfree_skb_any(struct sk_buff *skb)
1317 {
1318 if (in_irq() || irqs_disabled())
1319 dev_kfree_skb_irq(skb);
1320 else
1321 dev_kfree_skb(skb);
1322 }
1323 EXPORT_SYMBOL(dev_kfree_skb_any);
1324
1325
1326 /**
1327 * netif_device_detach - mark device as removed
1328 * @dev: network device
1329 *
1330 * Mark device as removed from system and therefore no longer available.
1331 */
1332 void netif_device_detach(struct net_device *dev)
1333 {
1334 if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
1335 netif_running(dev)) {
1336 netif_stop_queue(dev);
1337 }
1338 }
1339 EXPORT_SYMBOL(netif_device_detach);
1340
1341 /**
1342 * netif_device_attach - mark device as attached
1343 * @dev: network device
1344 *
1345 * Mark device as attached from system and restart if needed.
1346 */
1347 void netif_device_attach(struct net_device *dev)
1348 {
1349 if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
1350 netif_running(dev)) {
1351 netif_wake_queue(dev);
1352 __netdev_watchdog_up(dev);
1353 }
1354 }
1355 EXPORT_SYMBOL(netif_device_attach);
1356
1357
1358 /*
1359 * Invalidate hardware checksum when packet is to be mangled, and
1360 * complete checksum manually on outgoing path.
1361 */
1362 int skb_checksum_help(struct sk_buff *skb)
1363 {
1364 __wsum csum;
1365 int ret = 0, offset;
1366
1367 if (skb->ip_summed == CHECKSUM_COMPLETE)
1368 goto out_set_summed;
1369
1370 if (unlikely(skb_shinfo(skb)->gso_size)) {
1371 /* Let GSO fix up the checksum. */
1372 goto out_set_summed;
1373 }
1374
1375 offset = skb->csum_start - skb_headroom(skb);
1376 BUG_ON(offset >= skb_headlen(skb));
1377 csum = skb_checksum(skb, offset, skb->len - offset, 0);
1378
1379 offset += skb->csum_offset;
1380 BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
1381
1382 if (skb_cloned(skb) &&
1383 !skb_clone_writable(skb, offset + sizeof(__sum16))) {
1384 ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
1385 if (ret)
1386 goto out;
1387 }
1388
1389 *(__sum16 *)(skb->data + offset) = csum_fold(csum);
1390 out_set_summed:
1391 skb->ip_summed = CHECKSUM_NONE;
1392 out:
1393 return ret;
1394 }
1395
1396 /**
1397 * skb_gso_segment - Perform segmentation on skb.
1398 * @skb: buffer to segment
1399 * @features: features for the output path (see dev->features)
1400 *
1401 * This function segments the given skb and returns a list of segments.
1402 *
1403 * It may return NULL if the skb requires no segmentation. This is
1404 * only possible when GSO is used for verifying header integrity.
1405 */
1406 struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
1407 {
1408 struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
1409 struct packet_type *ptype;
1410 __be16 type = skb->protocol;
1411 int err;
1412
1413 BUG_ON(skb_shinfo(skb)->frag_list);
1414
1415 skb_reset_mac_header(skb);
1416 skb->mac_len = skb->network_header - skb->mac_header;
1417 __skb_pull(skb, skb->mac_len);
1418
1419 if (WARN_ON(skb->ip_summed != CHECKSUM_PARTIAL)) {
1420 if (skb_header_cloned(skb) &&
1421 (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
1422 return ERR_PTR(err);
1423 }
1424
1425 rcu_read_lock();
1426 list_for_each_entry_rcu(ptype,
1427 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
1428 if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
1429 if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
1430 err = ptype->gso_send_check(skb);
1431 segs = ERR_PTR(err);
1432 if (err || skb_gso_ok(skb, features))
1433 break;
1434 __skb_push(skb, (skb->data -
1435 skb_network_header(skb)));
1436 }
1437 segs = ptype->gso_segment(skb, features);
1438 break;
1439 }
1440 }
1441 rcu_read_unlock();
1442
1443 __skb_push(skb, skb->data - skb_mac_header(skb));
1444
1445 return segs;
1446 }
1447
1448 EXPORT_SYMBOL(skb_gso_segment);
1449
1450 /* Take action when hardware reception checksum errors are detected. */
1451 #ifdef CONFIG_BUG
1452 void netdev_rx_csum_fault(struct net_device *dev)
1453 {
1454 if (net_ratelimit()) {
1455 printk(KERN_ERR "%s: hw csum failure.\n",
1456 dev ? dev->name : "<unknown>");
1457 dump_stack();
1458 }
1459 }
1460 EXPORT_SYMBOL(netdev_rx_csum_fault);
1461 #endif
1462
1463 /* Actually, we should eliminate this check as soon as we know, that:
1464 * 1. IOMMU is present and allows to map all the memory.
1465 * 2. No high memory really exists on this machine.
1466 */
1467
1468 static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1469 {
1470 #ifdef CONFIG_HIGHMEM
1471 int i;
1472
1473 if (dev->features & NETIF_F_HIGHDMA)
1474 return 0;
1475
1476 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1477 if (PageHighMem(skb_shinfo(skb)->frags[i].page))
1478 return 1;
1479
1480 #endif
1481 return 0;
1482 }
1483
1484 struct dev_gso_cb {
1485 void (*destructor)(struct sk_buff *skb);
1486 };
1487
1488 #define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
1489
1490 static void dev_gso_skb_destructor(struct sk_buff *skb)
1491 {
1492 struct dev_gso_cb *cb;
1493
1494 do {
1495 struct sk_buff *nskb = skb->next;
1496
1497 skb->next = nskb->next;
1498 nskb->next = NULL;
1499 kfree_skb(nskb);
1500 } while (skb->next);
1501
1502 cb = DEV_GSO_CB(skb);
1503 if (cb->destructor)
1504 cb->destructor(skb);
1505 }
1506
1507 /**
1508 * dev_gso_segment - Perform emulated hardware segmentation on skb.
1509 * @skb: buffer to segment
1510 *
1511 * This function segments the given skb and stores the list of segments
1512 * in skb->next.
1513 */
1514 static int dev_gso_segment(struct sk_buff *skb)
1515 {
1516 struct net_device *dev = skb->dev;
1517 struct sk_buff *segs;
1518 int features = dev->features & ~(illegal_highdma(dev, skb) ?
1519 NETIF_F_SG : 0);
1520
1521 segs = skb_gso_segment(skb, features);
1522
1523 /* Verifying header integrity only. */
1524 if (!segs)
1525 return 0;
1526
1527 if (unlikely(IS_ERR(segs)))
1528 return PTR_ERR(segs);
1529
1530 skb->next = segs;
1531 DEV_GSO_CB(skb)->destructor = skb->destructor;
1532 skb->destructor = dev_gso_skb_destructor;
1533
1534 return 0;
1535 }
1536
1537 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
1538 {
1539 if (likely(!skb->next)) {
1540 if (!list_empty(&ptype_all))
1541 dev_queue_xmit_nit(skb, dev);
1542
1543 if (netif_needs_gso(dev, skb)) {
1544 if (unlikely(dev_gso_segment(skb)))
1545 goto out_kfree_skb;
1546 if (skb->next)
1547 goto gso;
1548 }
1549
1550 return dev->hard_start_xmit(skb, dev);
1551 }
1552
1553 gso:
1554 do {
1555 struct sk_buff *nskb = skb->next;
1556 int rc;
1557
1558 skb->next = nskb->next;
1559 nskb->next = NULL;
1560 rc = dev->hard_start_xmit(nskb, dev);
1561 if (unlikely(rc)) {
1562 nskb->next = skb->next;
1563 skb->next = nskb;
1564 return rc;
1565 }
1566 if (unlikely((netif_queue_stopped(dev) ||
1567 netif_subqueue_stopped(dev, skb)) &&
1568 skb->next))
1569 return NETDEV_TX_BUSY;
1570 } while (skb->next);
1571
1572 skb->destructor = DEV_GSO_CB(skb)->destructor;
1573
1574 out_kfree_skb:
1575 kfree_skb(skb);
1576 return 0;
1577 }
1578
1579 /**
1580 * dev_queue_xmit - transmit a buffer
1581 * @skb: buffer to transmit
1582 *
1583 * Queue a buffer for transmission to a network device. The caller must
1584 * have set the device and priority and built the buffer before calling
1585 * this function. The function can be called from an interrupt.
1586 *
1587 * A negative errno code is returned on a failure. A success does not
1588 * guarantee the frame will be transmitted as it may be dropped due
1589 * to congestion or traffic shaping.
1590 *
1591 * -----------------------------------------------------------------------------------
1592 * I notice this method can also return errors from the queue disciplines,
1593 * including NET_XMIT_DROP, which is a positive value. So, errors can also
1594 * be positive.
1595 *
1596 * Regardless of the return value, the skb is consumed, so it is currently
1597 * difficult to retry a send to this method. (You can bump the ref count
1598 * before sending to hold a reference for retry if you are careful.)
1599 *
1600 * When calling this method, interrupts MUST be enabled. This is because
1601 * the BH enable code must have IRQs enabled so that it will not deadlock.
1602 * --BLG
1603 */
1604
1605 int dev_queue_xmit(struct sk_buff *skb)
1606 {
1607 struct net_device *dev = skb->dev;
1608 struct Qdisc *q;
1609 int rc = -ENOMEM;
1610
1611 /* GSO will handle the following emulations directly. */
1612 if (netif_needs_gso(dev, skb))
1613 goto gso;
1614
1615 if (skb_shinfo(skb)->frag_list &&
1616 !(dev->features & NETIF_F_FRAGLIST) &&
1617 __skb_linearize(skb))
1618 goto out_kfree_skb;
1619
1620 /* Fragmented skb is linearized if device does not support SG,
1621 * or if at least one of fragments is in highmem and device
1622 * does not support DMA from it.
1623 */
1624 if (skb_shinfo(skb)->nr_frags &&
1625 (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
1626 __skb_linearize(skb))
1627 goto out_kfree_skb;
1628
1629 /* If packet is not checksummed and device does not support
1630 * checksumming for this protocol, complete checksumming here.
1631 */
1632 if (skb->ip_summed == CHECKSUM_PARTIAL) {
1633 skb_set_transport_header(skb, skb->csum_start -
1634 skb_headroom(skb));
1635
1636 if (!(dev->features & NETIF_F_GEN_CSUM) &&
1637 !((dev->features & NETIF_F_IP_CSUM) &&
1638 skb->protocol == htons(ETH_P_IP)) &&
1639 !((dev->features & NETIF_F_IPV6_CSUM) &&
1640 skb->protocol == htons(ETH_P_IPV6)))
1641 if (skb_checksum_help(skb))
1642 goto out_kfree_skb;
1643 }
1644
1645 gso:
1646 spin_lock_prefetch(&dev->queue_lock);
1647
1648 /* Disable soft irqs for various locks below. Also
1649 * stops preemption for RCU.
1650 */
1651 rcu_read_lock_bh();
1652
1653 /* Updates of qdisc are serialized by queue_lock.
1654 * The struct Qdisc which is pointed to by qdisc is now a
1655 * rcu structure - it may be accessed without acquiring
1656 * a lock (but the structure may be stale.) The freeing of the
1657 * qdisc will be deferred until it's known that there are no
1658 * more references to it.
1659 *
1660 * If the qdisc has an enqueue function, we still need to
1661 * hold the queue_lock before calling it, since queue_lock
1662 * also serializes access to the device queue.
1663 */
1664
1665 q = rcu_dereference(dev->qdisc);
1666 #ifdef CONFIG_NET_CLS_ACT
1667 skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
1668 #endif
1669 if (q->enqueue) {
1670 /* Grab device queue */
1671 spin_lock(&dev->queue_lock);
1672 q = dev->qdisc;
1673 if (q->enqueue) {
1674 /* reset queue_mapping to zero */
1675 skb_set_queue_mapping(skb, 0);
1676 rc = q->enqueue(skb, q);
1677 qdisc_run(dev);
1678 spin_unlock(&dev->queue_lock);
1679
1680 rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
1681 goto out;
1682 }
1683 spin_unlock(&dev->queue_lock);
1684 }
1685
1686 /* The device has no queue. Common case for software devices:
1687 loopback, all the sorts of tunnels...
1688
1689 Really, it is unlikely that netif_tx_lock protection is necessary
1690 here. (f.e. loopback and IP tunnels are clean ignoring statistics
1691 counters.)
1692 However, it is possible, that they rely on protection
1693 made by us here.
1694
1695 Check this and shot the lock. It is not prone from deadlocks.
1696 Either shot noqueue qdisc, it is even simpler 8)
1697 */
1698 if (dev->flags & IFF_UP) {
1699
1700 if (dev->xmit_lock_owner != (void *)current) {
1701
1702 HARD_TX_LOCK(dev);
1703
1704 if (!netif_queue_stopped(dev) &&
1705 !netif_subqueue_stopped(dev, skb)) {
1706 rc = 0;
1707 if (!dev_hard_start_xmit(skb, dev)) {
1708 HARD_TX_UNLOCK(dev);
1709 goto out;
1710 }
1711 }
1712 HARD_TX_UNLOCK(dev);
1713 if (net_ratelimit())
1714 printk(KERN_CRIT "Virtual device %s asks to "
1715 "queue packet!\n", dev->name);
1716 } else {
1717 /* Recursion is detected! It is possible,
1718 * unfortunately */
1719 if (net_ratelimit())
1720 printk(KERN_CRIT "Dead loop on virtual device "
1721 "%s, fix it urgently!\n", dev->name);
1722 }
1723 }
1724
1725 rc = -ENETDOWN;
1726 rcu_read_unlock_bh();
1727
1728 out_kfree_skb:
1729 kfree_skb(skb);
1730 return rc;
1731 out:
1732 rcu_read_unlock_bh();
1733 return rc;
1734 }
1735
1736
1737 /*=======================================================================
1738 Receiver routines
1739 =======================================================================*/
1740
1741 int netdev_max_backlog __read_mostly = 1000;
1742 int netdev_budget __read_mostly = 300;
1743 int weight_p __read_mostly = 64; /* old backlog weight */
1744
1745 DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
1746
1747
1748 /**
1749 * netif_rx - post buffer to the network code
1750 * @skb: buffer to post
1751 *
1752 * This function receives a packet from a device driver and queues it for
1753 * the upper (protocol) levels to process. It always succeeds. The buffer
1754 * may be dropped during processing for congestion control or by the
1755 * protocol layers.
1756 *
1757 * return values:
1758 * NET_RX_SUCCESS (no congestion)
1759 * NET_RX_DROP (packet was dropped)
1760 *
1761 */
1762
1763 int netif_rx(struct sk_buff *skb)
1764 {
1765 struct softnet_data *queue;
1766 unsigned long flags;
1767
1768 /* if netpoll wants it, pretend we never saw it */
1769 if (netpoll_rx(skb))
1770 return NET_RX_DROP;
1771
1772 if (!skb->tstamp.tv64)
1773 net_timestamp(skb);
1774
1775 /*
1776 * The code is rearranged so that the path is the most
1777 * short when CPU is congested, but is still operating.
1778 */
1779 local_irq_save(flags);
1780 queue = &__get_cpu_var(softnet_data);
1781
1782 __get_cpu_var(netdev_rx_stat).total++;
1783 if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
1784 if (queue->input_pkt_queue.qlen) {
1785 enqueue:
1786 dev_hold(skb->dev);
1787 __skb_queue_tail(&queue->input_pkt_queue, skb);
1788 local_irq_restore(flags);
1789 return NET_RX_SUCCESS;
1790 }
1791
1792 napi_schedule(&queue->backlog);
1793 goto enqueue;
1794 }
1795
1796 __get_cpu_var(netdev_rx_stat).dropped++;
1797 local_irq_restore(flags);
1798
1799 kfree_skb(skb);
1800 return NET_RX_DROP;
1801 }
1802
1803 int netif_rx_ni(struct sk_buff *skb)
1804 {
1805 int err;
1806
1807 err = netif_rx(skb);
1808 preempt_disable();
1809 if (local_softirq_pending())
1810 do_softirq();
1811 preempt_enable();
1812
1813 return err;
1814 }
1815
1816 EXPORT_SYMBOL(netif_rx_ni);
1817
1818 static inline struct net_device *skb_bond(struct sk_buff *skb)
1819 {
1820 struct net_device *dev = skb->dev;
1821
1822 if (dev->master) {
1823 if (skb_bond_should_drop(skb)) {
1824 kfree_skb(skb);
1825 return NULL;
1826 }
1827 skb->dev = dev->master;
1828 }
1829
1830 return dev;
1831 }
1832
1833
1834 static void net_tx_action(struct softirq_action *h)
1835 {
1836 struct softnet_data *sd = &per_cpu(softnet_data,
1837 raw_smp_processor_id());
1838
1839 if (sd->completion_queue) {
1840 struct sk_buff *clist;
1841
1842 local_irq_disable();
1843 clist = sd->completion_queue;
1844 sd->completion_queue = NULL;
1845 local_irq_enable();
1846
1847 while (clist) {
1848 struct sk_buff *skb = clist;
1849 clist = clist->next;
1850
1851 BUG_TRAP(!atomic_read(&skb->users));
1852 __kfree_skb(skb);
1853 /*
1854 * Safe to reschedule - the list is private
1855 * at this point.
1856 */
1857 cond_resched_softirq_context();
1858 }
1859 }
1860
1861 if (sd->output_queue) {
1862 struct net_device *head;
1863
1864 local_irq_disable();
1865 head = sd->output_queue;
1866 sd->output_queue = NULL;
1867 local_irq_enable();
1868
1869 while (head) {
1870 struct net_device *dev = head;
1871 head = head->next_sched;
1872
1873 smp_mb__before_clear_bit();
1874 clear_bit(__LINK_STATE_SCHED, &dev->state);
1875
1876 /*
1877 * We are executing in softirq context here, and
1878 * if softirqs are preemptible, we must avoid
1879 * infinite reactivation of the softirq by
1880 * either the tx handler, or by netif_schedule().
1881 * (it would result in an infinitely looping
1882 * softirq context)
1883 * So we take the spinlock unconditionally.
1884 */
1885 #ifdef CONFIG_PREEMPT_SOFTIRQS
1886 spin_lock(&dev->queue_lock);
1887 qdisc_run(dev);
1888 spin_unlock(&dev->queue_lock);
1889 #else
1890 if (spin_trylock(&dev->queue_lock)) {
1891 qdisc_run(dev);
1892 spin_unlock(&dev->queue_lock);
1893 } else {
1894 netif_schedule(dev);
1895 }
1896 #endif
1897 }
1898 }
1899 }
1900
1901 static inline int deliver_skb(struct sk_buff *skb,
1902 struct packet_type *pt_prev,
1903 struct net_device *orig_dev)
1904 {
1905 atomic_inc(&skb->users);
1906 return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
1907 }
1908
1909 #if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
1910 /* These hooks defined here for ATM */
1911 struct net_bridge;
1912 struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
1913 unsigned char *addr);
1914 void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent) __read_mostly;
1915
1916 /*
1917 * If bridge module is loaded call bridging hook.
1918 * returns NULL if packet was consumed.
1919 */
1920 struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
1921 struct sk_buff *skb) __read_mostly;
1922 static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
1923 struct packet_type **pt_prev, int *ret,
1924 struct net_device *orig_dev)
1925 {
1926 struct net_bridge_port *port;
1927
1928 if (skb->pkt_type == PACKET_LOOPBACK ||
1929 (port = rcu_dereference(skb->dev->br_port)) == NULL)
1930 return skb;
1931
1932 if (*pt_prev) {
1933 *ret = deliver_skb(skb, *pt_prev, orig_dev);
1934 *pt_prev = NULL;
1935 }
1936
1937 return br_handle_frame_hook(port, skb);
1938 }
1939 #else
1940 #define handle_bridge(skb, pt_prev, ret, orig_dev) (skb)
1941 #endif
1942
1943 #if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE)
1944 struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *skb) __read_mostly;
1945 EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook);
1946
1947 static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
1948 struct packet_type **pt_prev,
1949 int *ret,
1950 struct net_device *orig_dev)
1951 {
1952 if (skb->dev->macvlan_port == NULL)
1953 return skb;
1954
1955 if (*pt_prev) {
1956 *ret = deliver_skb(skb, *pt_prev, orig_dev);
1957 *pt_prev = NULL;
1958 }
1959 return macvlan_handle_frame_hook(skb);
1960 }
1961 #else
1962 #define handle_macvlan(skb, pt_prev, ret, orig_dev) (skb)
1963 #endif
1964
1965 #ifdef CONFIG_NET_CLS_ACT
1966 /* TODO: Maybe we should just force sch_ingress to be compiled in
1967 * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
1968 * a compare and 2 stores extra right now if we dont have it on
1969 * but have CONFIG_NET_CLS_ACT
1970 * NOTE: This doesnt stop any functionality; if you dont have
1971 * the ingress scheduler, you just cant add policies on ingress.
1972 *
1973 */
1974 static int ing_filter(struct sk_buff *skb)
1975 {
1976 struct Qdisc *q;
1977 struct net_device *dev = skb->dev;
1978 int result = TC_ACT_OK;
1979 u32 ttl = G_TC_RTTL(skb->tc_verd);
1980
1981 if (MAX_RED_LOOP < ttl++) {
1982 printk(KERN_WARNING
1983 "Redir loop detected Dropping packet (%d->%d)\n",
1984 skb->iif, dev->ifindex);
1985 return TC_ACT_SHOT;
1986 }
1987
1988 skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
1989 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
1990
1991 spin_lock(&dev->ingress_lock);
1992 if ((q = dev->qdisc_ingress) != NULL)
1993 result = q->enqueue(skb, q);
1994 spin_unlock(&dev->ingress_lock);
1995
1996 return result;
1997 }
1998
1999 static inline struct sk_buff *handle_ing(struct sk_buff *skb,
2000 struct packet_type **pt_prev,
2001 int *ret, struct net_device *orig_dev)
2002 {
2003 if (!skb->dev->qdisc_ingress)
2004 goto out;
2005
2006 if (*pt_prev) {
2007 *ret = deliver_skb(skb, *pt_prev, orig_dev);
2008 *pt_prev = NULL;
2009 } else {
2010 /* Huh? Why does turning on AF_PACKET affect this? */
2011 skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
2012 }
2013
2014 switch (ing_filter(skb)) {
2015 case TC_ACT_SHOT:
2016 case TC_ACT_STOLEN:
2017 kfree_skb(skb);
2018 return NULL;
2019 }
2020
2021 out:
2022 skb->tc_verd = 0;
2023 return skb;
2024 }
2025 #endif
2026
2027 /**
2028 * netif_receive_skb - process receive buffer from network
2029 * @skb: buffer to process
2030 *
2031 * netif_receive_skb() is the main receive data processing function.
2032 * It always succeeds. The buffer may be dropped during processing
2033 * for congestion control or by the protocol layers.
2034 *
2035 * This function may only be called from softirq context and interrupts
2036 * should be enabled.
2037 *
2038 * Return values (usually ignored):
2039 * NET_RX_SUCCESS: no congestion
2040 * NET_RX_DROP: packet was dropped
2041 */
2042 int netif_receive_skb(struct sk_buff *skb)
2043 {
2044 struct packet_type *ptype, *pt_prev;
2045 struct net_device *orig_dev;
2046 int ret = NET_RX_DROP;
2047 __be16 type;
2048
2049 /* if we've gotten here through NAPI, check netpoll */
2050 if (netpoll_receive_skb(skb))
2051 return NET_RX_DROP;
2052
2053 if (!skb->tstamp.tv64)
2054 net_timestamp(skb);
2055
2056 if (!skb->iif)
2057 skb->iif = skb->dev->ifindex;
2058
2059 orig_dev = skb_bond(skb);
2060
2061 if (!orig_dev)
2062 return NET_RX_DROP;
2063
2064 per_cpu(netdev_rx_stat, raw_smp_processor_id()).total++;
2065
2066 skb_reset_network_header(skb);
2067 skb_reset_transport_header(skb);
2068 skb->mac_len = skb->network_header - skb->mac_header;
2069
2070 pt_prev = NULL;
2071
2072 rcu_read_lock();
2073
2074 #ifdef CONFIG_NET_CLS_ACT
2075 if (skb->tc_verd & TC_NCLS) {
2076 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
2077 goto ncls;
2078 }
2079 #endif
2080
2081 list_for_each_entry_rcu(ptype, &ptype_all, list) {
2082 if (!ptype->dev || ptype->dev == skb->dev) {
2083 if (pt_prev)
2084 ret = deliver_skb(skb, pt_prev, orig_dev);
2085 pt_prev = ptype;
2086 }
2087 }
2088
2089 #ifdef CONFIG_NET_CLS_ACT
2090 skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
2091 if (!skb)
2092 goto out;
2093 ncls:
2094 #endif
2095
2096 skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
2097 if (!skb)
2098 goto out;
2099 skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev);
2100 if (!skb)
2101 goto out;
2102
2103 type = skb->protocol;
2104 list_for_each_entry_rcu(ptype,
2105 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
2106 if (ptype->type == type &&
2107 (!ptype->dev || ptype->dev == skb->dev)) {
2108 if (pt_prev)
2109 ret = deliver_skb(skb, pt_prev, orig_dev);
2110 pt_prev = ptype;
2111 }
2112 }
2113
2114 if (pt_prev) {
2115 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
2116 } else {
2117 kfree_skb(skb);
2118 /* Jamal, now you will not able to escape explaining
2119 * me how you were going to use this. :-)
2120 */
2121 ret = NET_RX_DROP;
2122 }
2123
2124 out:
2125 rcu_read_unlock();
2126 return ret;
2127 }
2128
2129 static int process_backlog(struct napi_struct *napi, int quota)
2130 {
2131 int work = 0;
2132 struct softnet_data *queue;
2133 unsigned long start_time = jiffies;
2134
2135 queue = &per_cpu(softnet_data, raw_smp_processor_id());
2136 napi->weight = weight_p;
2137 do {
2138 struct sk_buff *skb;
2139 struct net_device *dev;
2140
2141 local_irq_disable();
2142 skb = __skb_dequeue(&queue->input_pkt_queue);
2143 if (!skb) {
2144 __napi_complete(napi);
2145 local_irq_enable();
2146 break;
2147 }
2148
2149 local_irq_enable();
2150
2151 dev = skb->dev;
2152
2153 netif_receive_skb(skb);
2154
2155 dev_put(dev);
2156 } while (++work < quota && jiffies == start_time);
2157
2158 return work;
2159 }
2160
2161 /**
2162 * __napi_schedule - schedule for receive
2163 * @n: entry to schedule
2164 *
2165 * The entry's receive function will be scheduled to run
2166 */
2167 void __napi_schedule(struct napi_struct *n)
2168 {
2169 unsigned long flags;
2170
2171 local_irq_save(flags);
2172 list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list);
2173 raise_softirq_irqoff(NET_RX_SOFTIRQ);
2174 local_irq_restore(flags);
2175 }
2176 EXPORT_SYMBOL(__napi_schedule);
2177
2178
2179 static void net_rx_action(struct softirq_action *h)
2180 {
2181 struct list_head *list = &__get_cpu_var(softnet_data).poll_list;
2182 unsigned long start_time = jiffies;
2183 int budget = netdev_budget;
2184 void *have;
2185
2186 local_irq_disable();
2187
2188 while (!list_empty(list)) {
2189 struct napi_struct *n;
2190 int work, weight;
2191
2192 /* If softirq window is exhuasted then punt.
2193 *
2194 * Note that this is a slight policy change from the
2195 * previous NAPI code, which would allow up to 2
2196 * jiffies to pass before breaking out. The test
2197 * used to be "jiffies - start_time > 1".
2198 */
2199 if (unlikely(budget <= 0 || jiffies != start_time))
2200 goto softnet_break;
2201
2202 local_irq_enable();
2203
2204 /* Even though interrupts have been re-enabled, this
2205 * access is safe because interrupts can only add new
2206 * entries to the tail of this list, and only ->poll()
2207 * calls can remove this head entry from the list.
2208 */
2209 n = list_entry(list->next, struct napi_struct, poll_list);
2210
2211 have = netpoll_poll_lock(n);
2212
2213 weight = n->weight;
2214
2215 /* This NAPI_STATE_SCHED test is for avoiding a race
2216 * with netpoll's poll_napi(). Only the entity which
2217 * obtains the lock and sees NAPI_STATE_SCHED set will
2218 * actually make the ->poll() call. Therefore we avoid
2219 * accidently calling ->poll() when NAPI is not scheduled.
2220 */
2221 work = 0;
2222 if (test_bit(NAPI_STATE_SCHED, &n->state))
2223 work = n->poll(n, weight);
2224
2225 WARN_ON_ONCE(work > weight);
2226
2227 budget -= work;
2228
2229 local_irq_disable();
2230
2231 /* Drivers must not modify the NAPI state if they
2232 * consume the entire weight. In such cases this code
2233 * still "owns" the NAPI instance and therefore can
2234 * move the instance around on the list at-will.
2235 */
2236 if (unlikely(work == weight)) {
2237 if (unlikely(napi_disable_pending(n)))
2238 __napi_complete(n);
2239 else
2240 list_move_tail(&n->poll_list, list);
2241 }
2242
2243 netpoll_poll_unlock(have);
2244 }
2245 out:
2246 local_irq_enable();
2247
2248 #ifdef CONFIG_NET_DMA
2249 /*
2250 * There may not be any more sk_buffs coming right now, so push
2251 * any pending DMA copies to hardware
2252 */
2253 if (!cpus_empty(net_dma.channel_mask)) {
2254 int chan_idx;
2255 for_each_cpu_mask(chan_idx, net_dma.channel_mask) {
2256 struct dma_chan *chan = net_dma.channels[chan_idx];
2257 if (chan)
2258 dma_async_memcpy_issue_pending(chan);
2259 }
2260 }
2261 #endif
2262
2263 return;
2264
2265 softnet_break:
2266 __get_cpu_var(netdev_rx_stat).time_squeeze++;
2267 raise_softirq_irqoff(NET_RX_SOFTIRQ);
2268 goto out;
2269 }
2270
2271 static gifconf_func_t * gifconf_list [NPROTO];
2272
2273 /**
2274 * register_gifconf - register a SIOCGIF handler
2275 * @family: Address family
2276 * @gifconf: Function handler
2277 *
2278 * Register protocol dependent address dumping routines. The handler
2279 * that is passed must not be freed or reused until it has been replaced
2280 * by another handler.
2281 */
2282 int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
2283 {
2284 if (family >= NPROTO)
2285 return -EINVAL;
2286 gifconf_list[family] = gifconf;
2287 return 0;
2288 }
2289
2290
2291 /*
2292 * Map an interface index to its name (SIOCGIFNAME)
2293 */
2294
2295 /*
2296 * We need this ioctl for efficient implementation of the
2297 * if_indextoname() function required by the IPv6 API. Without
2298 * it, we would have to search all the interfaces to find a
2299 * match. --pb
2300 */
2301
2302 static int dev_ifname(struct net *net, struct ifreq __user *arg)
2303 {
2304 struct net_device *dev;
2305 struct ifreq ifr;
2306
2307 /*
2308 * Fetch the caller's info block.
2309 */
2310
2311 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2312 return -EFAULT;
2313
2314 read_lock(&dev_base_lock);
2315 dev = __dev_get_by_index(net, ifr.ifr_ifindex);
2316 if (!dev) {
2317 read_unlock(&dev_base_lock);
2318 return -ENODEV;
2319 }
2320
2321 strcpy(ifr.ifr_name, dev->name);
2322 read_unlock(&dev_base_lock);
2323
2324 if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
2325 return -EFAULT;
2326 return 0;
2327 }
2328
2329 /*
2330 * Perform a SIOCGIFCONF call. This structure will change
2331 * size eventually, and there is nothing I can do about it.
2332 * Thus we will need a 'compatibility mode'.
2333 */
2334
2335 static int dev_ifconf(struct net *net, char __user *arg)
2336 {
2337 struct ifconf ifc;
2338 struct net_device *dev;
2339 char __user *pos;
2340 int len;
2341 int total;
2342 int i;
2343
2344 /*
2345 * Fetch the caller's info block.
2346 */
2347
2348 if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
2349 return -EFAULT;
2350
2351 pos = ifc.ifc_buf;
2352 len = ifc.ifc_len;
2353
2354 /*
2355 * Loop over the interfaces, and write an info block for each.
2356 */
2357
2358 total = 0;
2359 for_each_netdev(net, dev) {
2360 for (i = 0; i < NPROTO; i++) {
2361 if (gifconf_list[i]) {
2362 int done;
2363 if (!pos)
2364 done = gifconf_list[i](dev, NULL, 0);
2365 else
2366 done = gifconf_list[i](dev, pos + total,
2367 len - total);
2368 if (done < 0)
2369 return -EFAULT;
2370 total += done;
2371 }
2372 }
2373 }
2374
2375 /*
2376 * All done. Write the updated control block back to the caller.
2377 */
2378 ifc.ifc_len = total;
2379
2380 /*
2381 * Both BSD and Solaris return 0 here, so we do too.
2382 */
2383 return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
2384 }
2385
2386 #ifdef CONFIG_PROC_FS
2387 /*
2388 * This is invoked by the /proc filesystem handler to display a device
2389 * in detail.
2390 */
2391 void *dev_seq_start(struct seq_file *seq, loff_t *pos)
2392 __acquires(dev_base_lock)
2393 {
2394 struct net *net = seq_file_net(seq);
2395 loff_t off;
2396 struct net_device *dev;
2397
2398 read_lock(&dev_base_lock);
2399 if (!*pos)
2400 return SEQ_START_TOKEN;
2401
2402 off = 1;
2403 for_each_netdev(net, dev)
2404 if (off++ == *pos)
2405 return dev;
2406
2407 return NULL;
2408 }
2409
2410 void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2411 {
2412 struct net *net = seq_file_net(seq);
2413 ++*pos;
2414 return v == SEQ_START_TOKEN ?
2415 first_net_device(net) : next_net_device((struct net_device *)v);
2416 }
2417
2418 void dev_seq_stop(struct seq_file *seq, void *v)
2419 __releases(dev_base_lock)
2420 {
2421 read_unlock(&dev_base_lock);
2422 }
2423
2424 static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
2425 {
2426 struct net_device_stats *stats = dev->get_stats(dev);
2427
2428 seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
2429 "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
2430 dev->name, stats->rx_bytes, stats->rx_packets,
2431 stats->rx_errors,
2432 stats->rx_dropped + stats->rx_missed_errors,
2433 stats->rx_fifo_errors,
2434 stats->rx_length_errors + stats->rx_over_errors +
2435 stats->rx_crc_errors + stats->rx_frame_errors,
2436 stats->rx_compressed, stats->multicast,
2437 stats->tx_bytes, stats->tx_packets,
2438 stats->tx_errors, stats->tx_dropped,
2439 stats->tx_fifo_errors, stats->collisions,
2440 stats->tx_carrier_errors +
2441 stats->tx_aborted_errors +
2442 stats->tx_window_errors +
2443 stats->tx_heartbeat_errors,
2444 stats->tx_compressed);
2445 }
2446
2447 /*
2448 * Called from the PROCfs module. This now uses the new arbitrary sized
2449 * /proc/net interface to create /proc/net/dev
2450 */
2451 static int dev_seq_show(struct seq_file *seq, void *v)
2452 {
2453 if (v == SEQ_START_TOKEN)
2454 seq_puts(seq, "Inter-| Receive "
2455 " | Transmit\n"
2456 " face |bytes packets errs drop fifo frame "
2457 "compressed multicast|bytes packets errs "
2458 "drop fifo colls carrier compressed\n");
2459 else
2460 dev_seq_printf_stats(seq, v);
2461 return 0;
2462 }
2463
2464 static struct netif_rx_stats *softnet_get_online(loff_t *pos)
2465 {
2466 struct netif_rx_stats *rc = NULL;
2467
2468 while (*pos < NR_CPUS)
2469 if (cpu_online(*pos)) {
2470 rc = &per_cpu(netdev_rx_stat, *pos);
2471 break;
2472 } else
2473 ++*pos;
2474 return rc;
2475 }
2476
2477 static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
2478 {
2479 return softnet_get_online(pos);
2480 }
2481
2482 static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2483 {
2484 ++*pos;
2485 return softnet_get_online(pos);
2486 }
2487
2488 static void softnet_seq_stop(struct seq_file *seq, void *v)
2489 {
2490 }
2491
2492 static int softnet_seq_show(struct seq_file *seq, void *v)
2493 {
2494 struct netif_rx_stats *s = v;
2495
2496 seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
2497 s->total, s->dropped, s->time_squeeze, 0,
2498 0, 0, 0, 0, /* was fastroute */
2499 s->cpu_collision );
2500 return 0;
2501 }
2502
2503 static const struct seq_operations dev_seq_ops = {
2504 .start = dev_seq_start,
2505 .next = dev_seq_next,
2506 .stop = dev_seq_stop,
2507 .show = dev_seq_show,
2508 };
2509
2510 static int dev_seq_open(struct inode *inode, struct file *file)
2511 {
2512 return seq_open_net(inode, file, &dev_seq_ops,
2513 sizeof(struct seq_net_private));
2514 }
2515
2516 static const struct file_operations dev_seq_fops = {
2517 .owner = THIS_MODULE,
2518 .open = dev_seq_open,
2519 .read = seq_read,
2520 .llseek = seq_lseek,
2521 .release = seq_release_net,
2522 };
2523
2524 static const struct seq_operations softnet_seq_ops = {
2525 .start = softnet_seq_start,
2526 .next = softnet_seq_next,
2527 .stop = softnet_seq_stop,
2528 .show = softnet_seq_show,
2529 };
2530
2531 static int softnet_seq_open(struct inode *inode, struct file *file)
2532 {
2533 return seq_open(file, &softnet_seq_ops);
2534 }
2535
2536 static const struct file_operations softnet_seq_fops = {
2537 .owner = THIS_MODULE,
2538 .open = softnet_seq_open,
2539 .read = seq_read,
2540 .llseek = seq_lseek,
2541 .release = seq_release,
2542 };
2543
2544 static void *ptype_get_idx(loff_t pos)
2545 {
2546 struct packet_type *pt = NULL;
2547 loff_t i = 0;
2548 int t;
2549
2550 list_for_each_entry_rcu(pt, &ptype_all, list) {
2551 if (i == pos)
2552 return pt;
2553 ++i;
2554 }
2555
2556 for (t = 0; t < PTYPE_HASH_SIZE; t++) {
2557 list_for_each_entry_rcu(pt, &ptype_base[t], list) {
2558 if (i == pos)
2559 return pt;
2560 ++i;
2561 }
2562 }
2563 return NULL;
2564 }
2565
2566 static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
2567 __acquires(RCU)
2568 {
2569 rcu_read_lock();
2570 return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
2571 }
2572
2573 static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2574 {
2575 struct packet_type *pt;
2576 struct list_head *nxt;
2577 int hash;
2578
2579 ++*pos;
2580 if (v == SEQ_START_TOKEN)
2581 return ptype_get_idx(0);
2582
2583 pt = v;
2584 nxt = pt->list.next;
2585 if (pt->type == htons(ETH_P_ALL)) {
2586 if (nxt != &ptype_all)
2587 goto found;
2588 hash = 0;
2589 nxt = ptype_base[0].next;
2590 } else
2591 hash = ntohs(pt->type) & PTYPE_HASH_MASK;
2592
2593 while (nxt == &ptype_base[hash]) {
2594 if (++hash >= PTYPE_HASH_SIZE)
2595 return NULL;
2596 nxt = ptype_base[hash].next;
2597 }
2598 found:
2599 return list_entry(nxt, struct packet_type, list);
2600 }
2601
2602 static void ptype_seq_stop(struct seq_file *seq, void *v)
2603 __releases(RCU)
2604 {
2605 rcu_read_unlock();
2606 }
2607
2608 static void ptype_seq_decode(struct seq_file *seq, void *sym)
2609 {
2610 #ifdef CONFIG_KALLSYMS
2611 unsigned long offset = 0, symsize;
2612 const char *symname;
2613 char *modname;
2614 char namebuf[128];
2615
2616 symname = kallsyms_lookup((unsigned long)sym, &symsize, &offset,
2617 &modname, namebuf);
2618
2619 if (symname) {
2620 char *delim = ":";
2621
2622 if (!modname)
2623 modname = delim = "";
2624 seq_printf(seq, "%s%s%s%s+0x%lx", delim, modname, delim,
2625 symname, offset);
2626 return;
2627 }
2628 #endif
2629
2630 seq_printf(seq, "[%p]", sym);
2631 }
2632
2633 static int ptype_seq_show(struct seq_file *seq, void *v)
2634 {
2635 struct packet_type *pt = v;
2636
2637 if (v == SEQ_START_TOKEN)
2638 seq_puts(seq, "Type Device Function\n");
2639 else {
2640 if (pt->type == htons(ETH_P_ALL))
2641 seq_puts(seq, "ALL ");
2642 else
2643 seq_printf(seq, "%04x", ntohs(pt->type));
2644
2645 seq_printf(seq, " %-8s ",
2646 pt->dev ? pt->dev->name : "");
2647 ptype_seq_decode(seq, pt->func);
2648 seq_putc(seq, '\n');
2649 }
2650
2651 return 0;
2652 }
2653
2654 static const struct seq_operations ptype_seq_ops = {
2655 .start = ptype_seq_start,
2656 .next = ptype_seq_next,
2657 .stop = ptype_seq_stop,
2658 .show = ptype_seq_show,
2659 };
2660
2661 static int ptype_seq_open(struct inode *inode, struct file *file)
2662 {
2663 return seq_open(file, &ptype_seq_ops);
2664 }
2665
2666 static const struct file_operations ptype_seq_fops = {
2667 .owner = THIS_MODULE,
2668 .open = ptype_seq_open,
2669 .read = seq_read,
2670 .llseek = seq_lseek,
2671 .release = seq_release,
2672 };
2673
2674
2675 static int __net_init dev_proc_net_init(struct net *net)
2676 {
2677 int rc = -ENOMEM;
2678
2679 if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops))
2680 goto out;
2681 if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops))
2682 goto out_dev;
2683 if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops))
2684 goto out_softnet;
2685
2686 if (wext_proc_init(net))
2687 goto out_ptype;
2688 rc = 0;
2689 out:
2690 return rc;
2691 out_ptype:
2692 proc_net_remove(net, "ptype");
2693 out_softnet:
2694 proc_net_remove(net, "softnet_stat");
2695 out_dev:
2696 proc_net_remove(net, "dev");
2697 goto out;
2698 }
2699
2700 static void __net_exit dev_proc_net_exit(struct net *net)
2701 {
2702 wext_proc_exit(net);
2703
2704 proc_net_remove(net, "ptype");
2705 proc_net_remove(net, "softnet_stat");
2706 proc_net_remove(net, "dev");
2707 }
2708
2709 static struct pernet_operations __net_initdata dev_proc_ops = {
2710 .init = dev_proc_net_init,
2711 .exit = dev_proc_net_exit,
2712 };
2713
2714 static int __init dev_proc_init(void)
2715 {
2716 return register_pernet_subsys(&dev_proc_ops);
2717 }
2718 #else
2719 #define dev_proc_init() 0
2720 #endif /* CONFIG_PROC_FS */
2721
2722
2723 /**
2724 * netdev_set_master - set up master/slave pair
2725 * @slave: slave device
2726 * @master: new master device
2727 *
2728 * Changes the master device of the slave. Pass %NULL to break the
2729 * bonding. The caller must hold the RTNL semaphore. On a failure
2730 * a negative errno code is returned. On success the reference counts
2731 * are adjusted, %RTM_NEWLINK is sent to the routing socket and the
2732 * function returns zero.
2733 */
2734 int netdev_set_master(struct net_device *slave, struct net_device *master)
2735 {
2736 struct net_device *old = slave->master;
2737
2738 ASSERT_RTNL();
2739
2740 if (master) {
2741 if (old)
2742 return -EBUSY;
2743 dev_hold(master);
2744 }
2745
2746 slave->master = master;
2747
2748 synchronize_net();
2749
2750 if (old)
2751 dev_put(old);
2752
2753 if (master)
2754 slave->flags |= IFF_SLAVE;
2755 else
2756 slave->flags &= ~IFF_SLAVE;
2757
2758 rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
2759 return 0;
2760 }
2761
2762 static void __dev_set_promiscuity(struct net_device *dev, int inc)
2763 {
2764 unsigned short old_flags = dev->flags;
2765
2766 ASSERT_RTNL();
2767
2768 if ((dev->promiscuity += inc) == 0)
2769 dev->flags &= ~IFF_PROMISC;
2770 else
2771 dev->flags |= IFF_PROMISC;
2772 if (dev->flags != old_flags) {
2773 printk(KERN_INFO "device %s %s promiscuous mode\n",
2774 dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
2775 "left");
2776 if (audit_enabled)
2777 audit_log(current->audit_context, GFP_ATOMIC,
2778 AUDIT_ANOM_PROMISCUOUS,
2779 "dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",
2780 dev->name, (dev->flags & IFF_PROMISC),
2781 (old_flags & IFF_PROMISC),
2782 audit_get_loginuid(current),
2783 current->uid, current->gid,
2784 audit_get_sessionid(current));
2785
2786 if (dev->change_rx_flags)
2787 dev->change_rx_flags(dev, IFF_PROMISC);
2788 }
2789 }
2790
2791 /**
2792 * dev_set_promiscuity - update promiscuity count on a device
2793 * @dev: device
2794 * @inc: modifier
2795 *
2796 * Add or remove promiscuity from a device. While the count in the device
2797 * remains above zero the interface remains promiscuous. Once it hits zero
2798 * the device reverts back to normal filtering operation. A negative inc
2799 * value is used to drop promiscuity on the device.
2800 */
2801 void dev_set_promiscuity(struct net_device *dev, int inc)
2802 {
2803 unsigned short old_flags = dev->flags;
2804
2805 __dev_set_promiscuity(dev, inc);
2806 if (dev->flags != old_flags)
2807 dev_set_rx_mode(dev);
2808 }
2809
2810 /**
2811 * dev_set_allmulti - update allmulti count on a device
2812 * @dev: device
2813 * @inc: modifier
2814 *
2815 * Add or remove reception of all multicast frames to a device. While the
2816 * count in the device remains above zero the interface remains listening
2817 * to all interfaces. Once it hits zero the device reverts back to normal
2818 * filtering operation. A negative @inc value is used to drop the counter
2819 * when releasing a resource needing all multicasts.
2820 */
2821
2822 void dev_set_allmulti(struct net_device *dev, int inc)
2823 {
2824 unsigned short old_flags = dev->flags;
2825
2826 ASSERT_RTNL();
2827
2828 dev->flags |= IFF_ALLMULTI;
2829 if ((dev->allmulti += inc) == 0)
2830 dev->flags &= ~IFF_ALLMULTI;
2831 if (dev->flags ^ old_flags) {
2832 if (dev->change_rx_flags)
2833 dev->change_rx_flags(dev, IFF_ALLMULTI);
2834 dev_set_rx_mode(dev);
2835 }
2836 }
2837
2838 /*
2839 * Upload unicast and multicast address lists to device and
2840 * configure RX filtering. When the device doesn't support unicast
2841 * filtering it is put in promiscuous mode while unicast addresses
2842 * are present.
2843 */
2844 void __dev_set_rx_mode(struct net_device *dev)
2845 {
2846 /* dev_open will call this function so the list will stay sane. */
2847 if (!(dev->flags&IFF_UP))
2848 return;
2849
2850 if (!netif_device_present(dev))
2851 return;
2852
2853 if (dev->set_rx_mode)
2854 dev->set_rx_mode(dev);
2855 else {
2856 /* Unicast addresses changes may only happen under the rtnl,
2857 * therefore calling __dev_set_promiscuity here is safe.
2858 */
2859 if (dev->uc_count > 0 && !dev->uc_promisc) {
2860 __dev_set_promiscuity(dev, 1);
2861 dev->uc_promisc = 1;
2862 } else if (dev->uc_count == 0 && dev->uc_promisc) {
2863 __dev_set_promiscuity(dev, -1);
2864 dev->uc_promisc = 0;
2865 }
2866
2867 if (dev->set_multicast_list)
2868 dev->set_multicast_list(dev);
2869 }
2870 }
2871
2872 void dev_set_rx_mode(struct net_device *dev)
2873 {
2874 netif_tx_lock_bh(dev);
2875 __dev_set_rx_mode(dev);
2876 netif_tx_unlock_bh(dev);
2877 }
2878
2879 int __dev_addr_delete(struct dev_addr_list **list, int *count,
2880 void *addr, int alen, int glbl)
2881 {
2882 struct dev_addr_list *da;
2883
2884 for (; (da = *list) != NULL; list = &da->next) {
2885 if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
2886 alen == da->da_addrlen) {
2887 if (glbl) {
2888 int old_glbl = da->da_gusers;
2889 da->da_gusers = 0;
2890 if (old_glbl == 0)
2891 break;
2892 }
2893 if (--da->da_users)
2894 return 0;
2895
2896 *list = da->next;
2897 kfree(da);
2898 (*count)--;
2899 return 0;
2900 }
2901 }
2902 return -ENOENT;
2903 }
2904
2905 int __dev_addr_add(struct dev_addr_list **list, int *count,
2906 void *addr, int alen, int glbl)
2907 {
2908 struct dev_addr_list *da;
2909
2910 for (da = *list; da != NULL; da = da->next) {
2911 if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
2912 da->da_addrlen == alen) {
2913 if (glbl) {
2914 int old_glbl = da->da_gusers;
2915 da->da_gusers = 1;
2916 if (old_glbl)
2917 return 0;
2918 }
2919 da->da_users++;
2920 return 0;
2921 }
2922 }
2923
2924 da = kzalloc(sizeof(*da), GFP_ATOMIC);
2925 if (da == NULL)
2926 return -ENOMEM;
2927 memcpy(da->da_addr, addr, alen);
2928 da->da_addrlen = alen;
2929 da->da_users = 1;
2930 da->da_gusers = glbl ? 1 : 0;
2931 da->next = *list;
2932 *list = da;
2933 (*count)++;
2934 return 0;
2935 }
2936
2937 /**
2938 * dev_unicast_delete - Release secondary unicast address.
2939 * @dev: device
2940 * @addr: address to delete
2941 * @alen: length of @addr
2942 *
2943 * Release reference to a secondary unicast address and remove it
2944 * from the device if the reference count drops to zero.
2945 *
2946 * The caller must hold the rtnl_mutex.
2947 */
2948 int dev_unicast_delete(struct net_device *dev, void *addr, int alen)
2949 {
2950 int err;
2951
2952 ASSERT_RTNL();
2953
2954 netif_tx_lock_bh(dev);
2955 err = __dev_addr_delete(&dev->uc_list, &dev->uc_count, addr, alen, 0);
2956 if (!err)
2957 __dev_set_rx_mode(dev);
2958 netif_tx_unlock_bh(dev);
2959 return err;
2960 }
2961 EXPORT_SYMBOL(dev_unicast_delete);
2962
2963 /**
2964 * dev_unicast_add - add a secondary unicast address
2965 * @dev: device
2966 * @addr: address to delete
2967 * @alen: length of @addr
2968 *
2969 * Add a secondary unicast address to the device or increase
2970 * the reference count if it already exists.
2971 *
2972 * The caller must hold the rtnl_mutex.
2973 */
2974 int dev_unicast_add(struct net_device *dev, void *addr, int alen)
2975 {
2976 int err;
2977
2978 ASSERT_RTNL();
2979
2980 netif_tx_lock_bh(dev);
2981 err = __dev_addr_add(&dev->uc_list, &dev->uc_count, addr, alen, 0);
2982 if (!err)
2983 __dev_set_rx_mode(dev);
2984 netif_tx_unlock_bh(dev);
2985 return err;
2986 }
2987 EXPORT_SYMBOL(dev_unicast_add);
2988
2989 int __dev_addr_sync(struct dev_addr_list **to, int *to_count,
2990 struct dev_addr_list **from, int *from_count)
2991 {
2992 struct dev_addr_list *da, *next;
2993 int err = 0;
2994
2995 da = *from;
2996 while (da != NULL) {
2997 next = da->next;
2998 if (!da->da_synced) {
2999 err = __dev_addr_add(to, to_count,
3000 da->da_addr, da->da_addrlen, 0);
3001 if (err < 0)
3002 break;
3003 da->da_synced = 1;
3004 da->da_users++;
3005 } else if (da->da_users == 1) {
3006 __dev_addr_delete(to, to_count,
3007 da->da_addr, da->da_addrlen, 0);
3008 __dev_addr_delete(from, from_count,
3009 da->da_addr, da->da_addrlen, 0);
3010 }
3011 da = next;
3012 }
3013 return err;
3014 }
3015
3016 void __dev_addr_unsync(struct dev_addr_list **to, int *to_count,
3017 struct dev_addr_list **from, int *from_count)
3018 {
3019 struct dev_addr_list *da, *next;
3020
3021 da = *from;
3022 while (da != NULL) {
3023 next = da->next;
3024 if (da->da_synced) {
3025 __dev_addr_delete(to, to_count,
3026 da->da_addr, da->da_addrlen, 0);
3027 da->da_synced = 0;
3028 __dev_addr_delete(from, from_count,
3029 da->da_addr, da->da_addrlen, 0);
3030 }
3031 da = next;
3032 }
3033 }
3034
3035 /**
3036 * dev_unicast_sync - Synchronize device's unicast list to another device
3037 * @to: destination device
3038 * @from: source device
3039 *
3040 * Add newly added addresses to the destination device and release
3041 * addresses that have no users left. The source device must be
3042 * locked by netif_tx_lock_bh.
3043 *
3044 * This function is intended to be called from the dev->set_rx_mode
3045 * function of layered software devices.
3046 */
3047 int dev_unicast_sync(struct net_device *to, struct net_device *from)
3048 {
3049 int err = 0;
3050
3051 netif_tx_lock_bh(to);
3052 err = __dev_addr_sync(&to->uc_list, &to->uc_count,
3053 &from->uc_list, &from->uc_count);
3054 if (!err)
3055 __dev_set_rx_mode(to);
3056 netif_tx_unlock_bh(to);
3057 return err;
3058 }
3059 EXPORT_SYMBOL(dev_unicast_sync);
3060
3061 /**
3062 * dev_unicast_unsync - Remove synchronized addresses from the destination device
3063 * @to: destination device
3064 * @from: source device
3065 *
3066 * Remove all addresses that were added to the destination device by
3067 * dev_unicast_sync(). This function is intended to be called from the
3068 * dev->stop function of layered software devices.
3069 */
3070 void dev_unicast_unsync(struct net_device *to, struct net_device *from)
3071 {
3072 netif_tx_lock_bh(from);
3073 netif_tx_lock_bh(to);
3074
3075 __dev_addr_unsync(&to->uc_list, &to->uc_count,
3076 &from->uc_list, &from->uc_count);
3077 __dev_set_rx_mode(to);
3078
3079 netif_tx_unlock_bh(to);
3080 netif_tx_unlock_bh(from);
3081 }
3082 EXPORT_SYMBOL(dev_unicast_unsync);
3083
3084 static void __dev_addr_discard(struct dev_addr_list **list)
3085 {
3086 struct dev_addr_list *tmp;
3087
3088 while (*list != NULL) {
3089 tmp = *list;
3090 *list = tmp->next;
3091 if (tmp->da_users > tmp->da_gusers)
3092 printk("__dev_addr_discard: address leakage! "
3093 "da_users=%d\n", tmp->da_users);
3094 kfree(tmp);
3095 }
3096 }
3097
3098 static void dev_addr_discard(struct net_device *dev)
3099 {
3100 netif_tx_lock_bh(dev);
3101
3102 __dev_addr_discard(&dev->uc_list);
3103 dev->uc_count = 0;
3104
3105 __dev_addr_discard(&dev->mc_list);
3106 dev->mc_count = 0;
3107
3108 netif_tx_unlock_bh(dev);
3109 }
3110
3111 unsigned dev_get_flags(const struct net_device *dev)
3112 {
3113 unsigned flags;
3114
3115 flags = (dev->flags & ~(IFF_PROMISC |
3116 IFF_ALLMULTI |
3117 IFF_RUNNING |
3118 IFF_LOWER_UP |
3119 IFF_DORMANT)) |
3120 (dev->gflags & (IFF_PROMISC |
3121 IFF_ALLMULTI));
3122
3123 if (netif_running(dev)) {
3124 if (netif_oper_up(dev))
3125 flags |= IFF_RUNNING;
3126 if (netif_carrier_ok(dev))
3127 flags |= IFF_LOWER_UP;
3128 if (netif_dormant(dev))
3129 flags |= IFF_DORMANT;
3130 }
3131
3132 return flags;
3133 }
3134
3135 int dev_change_flags(struct net_device *dev, unsigned flags)
3136 {
3137 int ret, changes;
3138 int old_flags = dev->flags;
3139
3140 ASSERT_RTNL();
3141
3142 /*
3143 * Set the flags on our device.
3144 */
3145
3146 dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
3147 IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
3148 IFF_AUTOMEDIA)) |
3149 (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
3150 IFF_ALLMULTI));
3151
3152 /*
3153 * Load in the correct multicast list now the flags have changed.
3154 */
3155
3156 if (dev->change_rx_flags && (old_flags ^ flags) & IFF_MULTICAST)
3157 dev->change_rx_flags(dev, IFF_MULTICAST);
3158
3159 dev_set_rx_mode(dev);
3160
3161 /*
3162 * Have we downed the interface. We handle IFF_UP ourselves
3163 * according to user attempts to set it, rather than blindly
3164 * setting it.
3165 */
3166
3167 ret = 0;
3168 if ((old_flags ^ flags) & IFF_UP) { /* Bit is different ? */
3169 ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
3170
3171 if (!ret)
3172 dev_set_rx_mode(dev);
3173 }
3174
3175 if (dev->flags & IFF_UP &&
3176 ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
3177 IFF_VOLATILE)))
3178 call_netdevice_notifiers(NETDEV_CHANGE, dev);
3179
3180 if ((flags ^ dev->gflags) & IFF_PROMISC) {
3181 int inc = (flags & IFF_PROMISC) ? +1 : -1;
3182 dev->gflags ^= IFF_PROMISC;
3183 dev_set_promiscuity(dev, inc);
3184 }
3185
3186 /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
3187 is important. Some (broken) drivers set IFF_PROMISC, when
3188 IFF_ALLMULTI is requested not asking us and not reporting.
3189 */
3190 if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
3191 int inc = (flags & IFF_ALLMULTI) ? +1 : -1;
3192 dev->gflags ^= IFF_ALLMULTI;
3193 dev_set_allmulti(dev, inc);
3194 }
3195
3196 /* Exclude state transition flags, already notified */
3197 changes = (old_flags ^ dev->flags) & ~(IFF_UP | IFF_RUNNING);
3198 if (changes)
3199 rtmsg_ifinfo(RTM_NEWLINK, dev, changes);
3200
3201 return ret;
3202 }
3203
3204 int dev_set_mtu(struct net_device *dev, int new_mtu)
3205 {
3206 int err;
3207
3208 if (new_mtu == dev->mtu)
3209 return 0;
3210
3211 /* MTU must be positive. */
3212 if (new_mtu < 0)
3213 return -EINVAL;
3214
3215 if (!netif_device_present(dev))
3216 return -ENODEV;
3217
3218 err = 0;
3219 if (dev->change_mtu)
3220 err = dev->change_mtu(dev, new_mtu);
3221 else
3222 dev->mtu = new_mtu;
3223 if (!err && dev->flags & IFF_UP)
3224 call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
3225 return err;
3226 }
3227
3228 int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
3229 {
3230 int err;
3231
3232 if (!dev->set_mac_address)
3233 return -EOPNOTSUPP;
3234 if (sa->sa_family != dev->type)
3235 return -EINVAL;
3236 if (!netif_device_present(dev))
3237 return -ENODEV;
3238 err = dev->set_mac_address(dev, sa);
3239 if (!err)
3240 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
3241 return err;
3242 }
3243
3244 /*
3245 * Perform the SIOCxIFxxx calls, inside read_lock(dev_base_lock)
3246 */
3247 static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd)
3248 {
3249 int err;
3250 struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
3251
3252 if (!dev)
3253 return -ENODEV;
3254
3255 switch (cmd) {
3256 case SIOCGIFFLAGS: /* Get interface flags */
3257 ifr->ifr_flags = dev_get_flags(dev);
3258 return 0;
3259
3260 case SIOCGIFMETRIC: /* Get the metric on the interface
3261 (currently unused) */
3262 ifr->ifr_metric = 0;
3263 return 0;
3264
3265 case SIOCGIFMTU: /* Get the MTU of a device */
3266 ifr->ifr_mtu = dev->mtu;
3267 return 0;
3268
3269 case SIOCGIFHWADDR:
3270 if (!dev->addr_len)
3271 memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
3272 else
3273 memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
3274 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
3275 ifr->ifr_hwaddr.sa_family = dev->type;
3276 return 0;
3277
3278 case SIOCGIFSLAVE:
3279 err = -EINVAL;
3280 break;
3281
3282 case SIOCGIFMAP:
3283 ifr->ifr_map.mem_start = dev->mem_start;
3284 ifr->ifr_map.mem_end = dev->mem_end;
3285 ifr->ifr_map.base_addr = dev->base_addr;
3286 ifr->ifr_map.irq = dev->irq;
3287 ifr->ifr_map.dma = dev->dma;
3288 ifr->ifr_map.port = dev->if_port;
3289 return 0;
3290
3291 case SIOCGIFINDEX:
3292 ifr->ifr_ifindex = dev->ifindex;
3293 return 0;
3294
3295 case SIOCGIFTXQLEN:
3296 ifr->ifr_qlen = dev->tx_queue_len;
3297 return 0;
3298
3299 default:
3300 /* dev_ioctl() should ensure this case
3301 * is never reached
3302 */
3303 WARN_ON(1);
3304 err = -EINVAL;
3305 break;
3306
3307 }
3308 return err;
3309 }
3310
3311 /*
3312 * Perform the SIOCxIFxxx calls, inside rtnl_lock()
3313 */
3314 static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
3315 {
3316 int err;
3317 struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
3318
3319 if (!dev)
3320 return -ENODEV;
3321
3322 switch (cmd) {
3323 case SIOCSIFFLAGS: /* Set interface flags */
3324 return dev_change_flags(dev, ifr->ifr_flags);
3325
3326 case SIOCSIFMETRIC: /* Set the metric on the interface
3327 (currently unused) */
3328 return -EOPNOTSUPP;
3329
3330 case SIOCSIFMTU: /* Set the MTU of a device */
3331 return dev_set_mtu(dev, ifr->ifr_mtu);
3332
3333 case SIOCSIFHWADDR:
3334 return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
3335
3336 case SIOCSIFHWBROADCAST:
3337 if (ifr->ifr_hwaddr.sa_family != dev->type)
3338 return -EINVAL;
3339 memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
3340 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
3341 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
3342 return 0;
3343
3344 case SIOCSIFMAP:
3345 if (dev->set_config) {
3346 if (!netif_device_present(dev))
3347 return -ENODEV;
3348 return dev->set_config(dev, &ifr->ifr_map);
3349 }
3350 return -EOPNOTSUPP;
3351
3352 case SIOCADDMULTI:
3353 if ((!dev->set_multicast_list && !dev->set_rx_mode) ||
3354 ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
3355 return -EINVAL;
3356 if (!netif_device_present(dev))
3357 return -ENODEV;
3358 return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
3359 dev->addr_len, 1);
3360
3361 case SIOCDELMULTI:
3362 if ((!dev->set_multicast_list && !dev->set_rx_mode) ||
3363 ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
3364 return -EINVAL;
3365 if (!netif_device_present(dev))
3366 return -ENODEV;
3367 return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
3368 dev->addr_len, 1);
3369
3370 case SIOCSIFTXQLEN:
3371 if (ifr->ifr_qlen < 0)
3372 return -EINVAL;
3373 dev->tx_queue_len = ifr->ifr_qlen;
3374 return 0;
3375
3376 case SIOCSIFNAME:
3377 ifr->ifr_newname[IFNAMSIZ-1] = '\0';
3378 return dev_change_name(dev, ifr->ifr_newname);
3379
3380 /*
3381 * Unknown or private ioctl
3382 */
3383
3384 default:
3385 if ((cmd >= SIOCDEVPRIVATE &&
3386 cmd <= SIOCDEVPRIVATE + 15) ||
3387 cmd == SIOCBONDENSLAVE ||
3388 cmd == SIOCBONDRELEASE ||
3389 cmd == SIOCBONDSETHWADDR ||
3390 cmd == SIOCBONDSLAVEINFOQUERY ||
3391 cmd == SIOCBONDINFOQUERY ||
3392 cmd == SIOCBONDCHANGEACTIVE ||
3393 cmd == SIOCGMIIPHY ||
3394 cmd == SIOCGMIIREG ||
3395 cmd == SIOCSMIIREG ||
3396 cmd == SIOCBRADDIF ||
3397 cmd == SIOCBRDELIF ||
3398 cmd == SIOCWANDEV) {
3399 err = -EOPNOTSUPP;
3400 if (dev->do_ioctl) {
3401 if (netif_device_present(dev))
3402 err = dev->do_ioctl(dev, ifr,
3403 cmd);
3404 else
3405 err = -ENODEV;
3406 }
3407 } else
3408 err = -EINVAL;
3409
3410 }
3411 return err;
3412 }
3413
3414 /*
3415 * This function handles all "interface"-type I/O control requests. The actual
3416 * 'doing' part of this is dev_ifsioc above.
3417 */
3418
3419 /**
3420 * dev_ioctl - network device ioctl
3421 * @net: the applicable net namespace
3422 * @cmd: command to issue
3423 * @arg: pointer to a struct ifreq in user space
3424 *
3425 * Issue ioctl functions to devices. This is normally called by the
3426 * user space syscall interfaces but can sometimes be useful for
3427 * other purposes. The return value is the return from the syscall if
3428 * positive or a negative errno code on error.
3429 */
3430
3431 int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
3432 {
3433 struct ifreq ifr;
3434 int ret;
3435 char *colon;
3436
3437 /* One special case: SIOCGIFCONF takes ifconf argument
3438 and requires shared lock, because it sleeps writing
3439 to user space.
3440 */
3441
3442 if (cmd == SIOCGIFCONF) {
3443 rtnl_lock();
3444 ret = dev_ifconf(net, (char __user *) arg);
3445 rtnl_unlock();
3446 return ret;
3447 }
3448 if (cmd == SIOCGIFNAME)
3449 return dev_ifname(net, (struct ifreq __user *)arg);
3450
3451 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
3452 return -EFAULT;
3453
3454 ifr.ifr_name[IFNAMSIZ-1] = 0;
3455
3456 colon = strchr(ifr.ifr_name, ':');
3457 if (colon)
3458 *colon = 0;
3459
3460 /*
3461 * See which interface the caller is talking about.
3462 */
3463
3464 switch (cmd) {
3465 /*
3466 * These ioctl calls:
3467 * - can be done by all.
3468 * - atomic and do not require locking.
3469 * - return a value
3470 */
3471 case SIOCGIFFLAGS:
3472 case SIOCGIFMETRIC:
3473 case SIOCGIFMTU:
3474 case SIOCGIFHWADDR:
3475 case SIOCGIFSLAVE:
3476 case SIOCGIFMAP:
3477 case SIOCGIFINDEX:
3478 case SIOCGIFTXQLEN:
3479 dev_load(net, ifr.ifr_name);
3480 read_lock(&dev_base_lock);
3481 ret = dev_ifsioc_locked(net, &ifr, cmd);
3482 read_unlock(&dev_base_lock);
3483 if (!ret) {
3484 if (colon)
3485 *colon = ':';
3486 if (copy_to_user(arg, &ifr,
3487 sizeof(struct ifreq)))
3488 ret = -EFAULT;
3489 }
3490 return ret;
3491
3492 case SIOCETHTOOL:
3493 dev_load(net, ifr.ifr_name);
3494 rtnl_lock();
3495 ret = dev_ethtool(net, &ifr);
3496 rtnl_unlock();
3497 if (!ret) {
3498 if (colon)
3499 *colon = ':';
3500 if (copy_to_user(arg, &ifr,
3501 sizeof(struct ifreq)))
3502 ret = -EFAULT;
3503 }
3504 return ret;
3505
3506 /*
3507 * These ioctl calls:
3508 * - require superuser power.
3509 * - require strict serialization.
3510 * - return a value
3511 */
3512 case SIOCGMIIPHY:
3513 case SIOCGMIIREG:
3514 case SIOCSIFNAME:
3515 if (!capable(CAP_NET_ADMIN))
3516 return -EPERM;
3517 dev_load(net, ifr.ifr_name);
3518 rtnl_lock();
3519 ret = dev_ifsioc(net, &ifr, cmd);
3520 rtnl_unlock();
3521 if (!ret) {
3522 if (colon)
3523 *colon = ':';
3524 if (copy_to_user(arg, &ifr,
3525 sizeof(struct ifreq)))
3526 ret = -EFAULT;
3527 }
3528 return ret;
3529
3530 /*
3531 * These ioctl calls:
3532 * - require superuser power.
3533 * - require strict serialization.
3534 * - do not return a value
3535 */
3536 case SIOCSIFFLAGS:
3537 case SIOCSIFMETRIC:
3538 case SIOCSIFMTU:
3539 case SIOCSIFMAP:
3540 case SIOCSIFHWADDR:
3541 case SIOCSIFSLAVE:
3542 case SIOCADDMULTI:
3543 case SIOCDELMULTI:
3544 case SIOCSIFHWBROADCAST:
3545 case SIOCSIFTXQLEN:
3546 case SIOCSMIIREG:
3547 case SIOCBONDENSLAVE:
3548 case SIOCBONDRELEASE:
3549 case SIOCBONDSETHWADDR:
3550 case SIOCBONDCHANGEACTIVE:
3551 case SIOCBRADDIF:
3552 case SIOCBRDELIF:
3553 if (!capable(CAP_NET_ADMIN))
3554 return -EPERM;
3555 /* fall through */
3556 case SIOCBONDSLAVEINFOQUERY:
3557 case SIOCBONDINFOQUERY:
3558 dev_load(net, ifr.ifr_name);
3559 rtnl_lock();
3560 ret = dev_ifsioc(net, &ifr, cmd);
3561 rtnl_unlock();
3562 return ret;
3563
3564 case SIOCGIFMEM:
3565 /* Get the per device memory space. We can add this but
3566 * currently do not support it */
3567 case SIOCSIFMEM:
3568 /* Set the per device memory buffer space.
3569 * Not applicable in our case */
3570 case SIOCSIFLINK:
3571 return -EINVAL;
3572
3573 /*
3574 * Unknown or private ioctl.
3575 */
3576 default:
3577 if (cmd == SIOCWANDEV ||
3578 (cmd >= SIOCDEVPRIVATE &&
3579 cmd <= SIOCDEVPRIVATE + 15)) {
3580 dev_load(net, ifr.ifr_name);
3581 rtnl_lock();
3582 ret = dev_ifsioc(net, &ifr, cmd);
3583 rtnl_unlock();
3584 if (!ret && copy_to_user(arg, &ifr,
3585 sizeof(struct ifreq)))
3586 ret = -EFAULT;
3587 return ret;
3588 }
3589 /* Take care of Wireless Extensions */
3590 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
3591 return wext_handle_ioctl(net, &ifr, cmd, arg);
3592 return -EINVAL;
3593 }
3594 }
3595
3596
3597 /**
3598 * dev_new_index - allocate an ifindex
3599 * @net: the applicable net namespace
3600 *
3601 * Returns a suitable unique value for a new device interface
3602 * number. The caller must hold the rtnl semaphore or the
3603 * dev_base_lock to be sure it remains unique.
3604 */
3605 static int dev_new_index(struct net *net)
3606 {
3607 static int ifindex;
3608 for (;;) {
3609 if (++ifindex <= 0)
3610 ifindex = 1;
3611 if (!__dev_get_by_index(net, ifindex))
3612 return ifindex;
3613 }
3614 }
3615
3616 /* Delayed registration/unregisteration */
3617 static DEFINE_SPINLOCK(net_todo_list_lock);
3618 static LIST_HEAD(net_todo_list);
3619
3620 static void net_set_todo(struct net_device *dev)
3621 {
3622 spin_lock(&net_todo_list_lock);
3623 list_add_tail(&dev->todo_list, &net_todo_list);
3624 spin_unlock(&net_todo_list_lock);
3625 }
3626
3627 static void rollback_registered(struct net_device *dev)
3628 {
3629 BUG_ON(dev_boot_phase);
3630 ASSERT_RTNL();
3631
3632 /* Some devices call without registering for initialization unwind. */
3633 if (dev->reg_state == NETREG_UNINITIALIZED) {
3634 printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
3635 "was registered\n", dev->name, dev);
3636
3637 WARN_ON(1);
3638 return;
3639 }
3640
3641 BUG_ON(dev->reg_state != NETREG_REGISTERED);
3642
3643 /* If device is running, close it first. */
3644 dev_close(dev);
3645
3646 /* And unlink it from device chain. */
3647 unlist_netdevice(dev);
3648
3649 dev->reg_state = NETREG_UNREGISTERING;
3650
3651 synchronize_net();
3652
3653 /* Shutdown queueing discipline. */
3654 dev_shutdown(dev);
3655
3656
3657 /* Notify protocols, that we are about to destroy
3658 this device. They should clean all the things.
3659 */
3660 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
3661
3662 /*
3663 * Flush the unicast and multicast chains
3664 */
3665 dev_addr_discard(dev);
3666
3667 if (dev->uninit)
3668 dev->uninit(dev);
3669
3670 /* Notifier chain MUST detach us from master device. */
3671 BUG_TRAP(!dev->master);
3672
3673 /* Remove entries from kobject tree */
3674 netdev_unregister_kobject(dev);
3675
3676 synchronize_net();
3677
3678 dev_put(dev);
3679 }
3680
3681 /**
3682 * register_netdevice - register a network device
3683 * @dev: device to register
3684 *
3685 * Take a completed network device structure and add it to the kernel
3686 * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
3687 * chain. 0 is returned on success. A negative errno code is returned
3688 * on a failure to set up the device, or if the name is a duplicate.
3689 *
3690 * Callers must hold the rtnl semaphore. You may want
3691 * register_netdev() instead of this.
3692 *
3693 * BUGS:
3694 * The locking appears insufficient to guarantee two parallel registers
3695 * will not get the same name.
3696 */
3697
3698 int register_netdevice(struct net_device *dev)
3699 {
3700 struct hlist_head *head;
3701 struct hlist_node *p;
3702 int ret;
3703 struct net *net;
3704
3705 BUG_ON(dev_boot_phase);
3706 ASSERT_RTNL();
3707
3708 might_sleep();
3709
3710 /* When net_device's are persistent, this will be fatal. */
3711 BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
3712 BUG_ON(!dev->nd_net);
3713 net = dev->nd_net;
3714
3715 spin_lock_init(&dev->queue_lock);
3716 spin_lock_init(&dev->_xmit_lock);
3717 netdev_set_lockdep_class(&dev->_xmit_lock, dev->type);
3718 dev->xmit_lock_owner = (void *)-1;
3719 spin_lock_init(&dev->ingress_lock);
3720
3721 dev->iflink = -1;
3722
3723 /* Init, if this function is available */
3724 if (dev->init) {
3725 ret = dev->init(dev);
3726 if (ret) {
3727 if (ret > 0)
3728 ret = -EIO;
3729 goto out;
3730 }
3731 }
3732
3733 if (!dev_valid_name(dev->name)) {
3734 ret = -EINVAL;
3735 goto err_uninit;
3736 }
3737
3738 dev->ifindex = dev_new_index(net);
3739 if (dev->iflink == -1)
3740 dev->iflink = dev->ifindex;
3741
3742 /* Check for existence of name */
3743 head = dev_name_hash(net, dev->name);
3744 hlist_for_each(p, head) {
3745 struct net_device *d
3746 = hlist_entry(p, struct net_device, name_hlist);
3747 if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
3748 ret = -EEXIST;
3749 goto err_uninit;
3750 }
3751 }
3752
3753 /* Fix illegal checksum combinations */
3754 if ((dev->features & NETIF_F_HW_CSUM) &&
3755 (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
3756 printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n",
3757 dev->name);
3758 dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
3759 }
3760
3761 if ((dev->features & NETIF_F_NO_CSUM) &&
3762 (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
3763 printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n",
3764 dev->name);
3765 dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
3766 }
3767
3768
3769 /* Fix illegal SG+CSUM combinations. */
3770 if ((dev->features & NETIF_F_SG) &&
3771 !(dev->features & NETIF_F_ALL_CSUM)) {
3772 printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no checksum feature.\n",
3773 dev->name);
3774 dev->features &= ~NETIF_F_SG;
3775 }
3776
3777 /* TSO requires that SG is present as well. */
3778 if ((dev->features & NETIF_F_TSO) &&
3779 !(dev->features & NETIF_F_SG)) {
3780 printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no SG feature.\n",
3781 dev->name);
3782 dev->features &= ~NETIF_F_TSO;
3783 }
3784 if (dev->features & NETIF_F_UFO) {
3785 if (!(dev->features & NETIF_F_HW_CSUM)) {
3786 printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
3787 "NETIF_F_HW_CSUM feature.\n",
3788 dev->name);
3789 dev->features &= ~NETIF_F_UFO;
3790 }
3791 if (!(dev->features & NETIF_F_SG)) {
3792 printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
3793 "NETIF_F_SG feature.\n",
3794 dev->name);
3795 dev->features &= ~NETIF_F_UFO;
3796 }
3797 }
3798
3799 ret = netdev_register_kobject(dev);
3800 if (ret)
3801 goto err_uninit;
3802 dev->reg_state = NETREG_REGISTERED;
3803
3804 /*
3805 * Default initial state at registry is that the
3806 * device is present.
3807 */
3808
3809 set_bit(__LINK_STATE_PRESENT, &dev->state);
3810
3811 dev_init_scheduler(dev);
3812 dev_hold(dev);
3813 list_netdevice(dev);
3814
3815 /* Notify protocols, that a new device appeared. */
3816 ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
3817 ret = notifier_to_errno(ret);
3818 if (ret) {
3819 rollback_registered(dev);
3820 dev->reg_state = NETREG_UNREGISTERED;
3821 }
3822
3823 out:
3824 return ret;
3825
3826 err_uninit:
3827 if (dev->uninit)
3828 dev->uninit(dev);
3829 goto out;
3830 }
3831
3832 /**
3833 * register_netdev - register a network device
3834 * @dev: device to register
3835 *
3836 * Take a completed network device structure and add it to the kernel
3837 * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
3838 * chain. 0 is returned on success. A negative errno code is returned
3839 * on a failure to set up the device, or if the name is a duplicate.
3840 *
3841 * This is a wrapper around register_netdevice that takes the rtnl semaphore
3842 * and expands the device name if you passed a format string to
3843 * alloc_netdev.
3844 */
3845 int register_netdev(struct net_device *dev)
3846 {
3847 int err;
3848
3849 rtnl_lock();
3850
3851 /*
3852 * If the name is a format string the caller wants us to do a
3853 * name allocation.
3854 */
3855 if (strchr(dev->name, '%')) {
3856 err = dev_alloc_name(dev, dev->name);
3857 if (err < 0)
3858 goto out;
3859 }
3860
3861 err = register_netdevice(dev);
3862 out:
3863 rtnl_unlock();
3864 return err;
3865 }
3866 EXPORT_SYMBOL(register_netdev);
3867
3868 /*
3869 * netdev_wait_allrefs - wait until all references are gone.
3870 *
3871 * This is called when unregistering network devices.
3872 *
3873 * Any protocol or device that holds a reference should register
3874 * for netdevice notification, and cleanup and put back the
3875 * reference if they receive an UNREGISTER event.
3876 * We can get stuck here if buggy protocols don't correctly
3877 * call dev_put.
3878 */
3879 static void netdev_wait_allrefs(struct net_device *dev)
3880 {
3881 unsigned long rebroadcast_time, warning_time;
3882
3883 rebroadcast_time = warning_time = jiffies;
3884 while (atomic_read(&dev->refcnt) != 0) {
3885 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
3886 rtnl_lock();
3887
3888 /* Rebroadcast unregister notification */
3889 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
3890
3891 if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
3892 &dev->state)) {
3893 /* We must not have linkwatch events
3894 * pending on unregister. If this
3895 * happens, we simply run the queue
3896 * unscheduled, resulting in a noop
3897 * for this device.
3898 */
3899 linkwatch_run_queue();
3900 }
3901
3902 __rtnl_unlock();
3903
3904 rebroadcast_time = jiffies;
3905 }
3906
3907 msleep(250);
3908
3909 if (time_after(jiffies, warning_time + 10 * HZ)) {
3910 printk(KERN_EMERG "unregister_netdevice: "
3911 "waiting for %s to become free. Usage "
3912 "count = %d\n",
3913 dev->name, atomic_read(&dev->refcnt));
3914 warning_time = jiffies;
3915 }
3916 }
3917 }
3918
3919 /* The sequence is:
3920 *
3921 * rtnl_lock();
3922 * ...
3923 * register_netdevice(x1);
3924 * register_netdevice(x2);
3925 * ...
3926 * unregister_netdevice(y1);
3927 * unregister_netdevice(y2);
3928 * ...
3929 * rtnl_unlock();
3930 * free_netdev(y1);
3931 * free_netdev(y2);
3932 *
3933 * We are invoked by rtnl_unlock() after it drops the semaphore.
3934 * This allows us to deal with problems:
3935 * 1) We can delete sysfs objects which invoke hotplug
3936 * without deadlocking with linkwatch via keventd.
3937 * 2) Since we run with the RTNL semaphore not held, we can sleep
3938 * safely in order to wait for the netdev refcnt to drop to zero.
3939 */
3940 static DEFINE_MUTEX(net_todo_run_mutex);
3941 void netdev_run_todo(void)
3942 {
3943 struct list_head list;
3944
3945 /* Need to guard against multiple cpu's getting out of order. */
3946 mutex_lock(&net_todo_run_mutex);
3947
3948 /* Not safe to do outside the semaphore. We must not return
3949 * until all unregister events invoked by the local processor
3950 * have been completed (either by this todo run, or one on
3951 * another cpu).
3952 */
3953 if (list_empty(&net_todo_list))
3954 goto out;
3955
3956 /* Snapshot list, allow later requests */
3957 spin_lock(&net_todo_list_lock);
3958 list_replace_init(&net_todo_list, &list);
3959 spin_unlock(&net_todo_list_lock);
3960
3961 while (!list_empty(&list)) {
3962 struct net_device *dev
3963 = list_entry(list.next, struct net_device, todo_list);
3964 list_del(&dev->todo_list);
3965
3966 if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
3967 printk(KERN_ERR "network todo '%s' but state %d\n",
3968 dev->name, dev->reg_state);
3969 dump_stack();
3970 continue;
3971 }
3972
3973 dev->reg_state = NETREG_UNREGISTERED;
3974
3975 netdev_wait_allrefs(dev);
3976
3977 /* paranoia */
3978 BUG_ON(atomic_read(&dev->refcnt));
3979 BUG_TRAP(!dev->ip_ptr);
3980 BUG_TRAP(!dev->ip6_ptr);
3981 BUG_TRAP(!dev->dn_ptr);
3982
3983 if (dev->destructor)
3984 dev->destructor(dev);
3985
3986 /* Free network device */
3987 kobject_put(&dev->dev.kobj);
3988 }
3989
3990 out:
3991 mutex_unlock(&net_todo_run_mutex);
3992 }
3993
3994 static struct net_device_stats *internal_stats(struct net_device *dev)
3995 {
3996 return &dev->stats;
3997 }
3998
3999 /**
4000 * alloc_netdev_mq - allocate network device
4001 * @sizeof_priv: size of private data to allocate space for
4002 * @name: device name format string
4003 * @setup: callback to initialize device
4004 * @queue_count: the number of subqueues to allocate
4005 *
4006 * Allocates a struct net_device with private data area for driver use
4007 * and performs basic initialization. Also allocates subquue structs
4008 * for each queue on the device at the end of the netdevice.
4009 */
4010 struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
4011 void (*setup)(struct net_device *), unsigned int queue_count)
4012 {
4013 void *p;
4014 struct net_device *dev;
4015 int alloc_size;
4016
4017 BUG_ON(strlen(name) >= sizeof(dev->name));
4018
4019 /* ensure 32-byte alignment of both the device and private area */
4020 alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST +
4021 (sizeof(struct net_device_subqueue) * (queue_count - 1))) &
4022 ~NETDEV_ALIGN_CONST;
4023 alloc_size += sizeof_priv + NETDEV_ALIGN_CONST;
4024
4025 p = kzalloc(alloc_size, GFP_KERNEL);
4026 if (!p) {
4027 printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n");
4028 return NULL;
4029 }
4030
4031 dev = (struct net_device *)
4032 (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
4033 dev->padded = (char *)dev - (char *)p;
4034 dev->nd_net = &init_net;
4035
4036 if (sizeof_priv) {
4037 dev->priv = ((char *)dev +
4038 ((sizeof(struct net_device) +
4039 (sizeof(struct net_device_subqueue) *
4040 (queue_count - 1)) + NETDEV_ALIGN_CONST)
4041 & ~NETDEV_ALIGN_CONST));
4042 }
4043
4044 dev->egress_subqueue_count = queue_count;
4045
4046 dev->get_stats = internal_stats;
4047 netpoll_netdev_init(dev);
4048 setup(dev);
4049 strcpy(dev->name, name);
4050 return dev;
4051 }
4052 EXPORT_SYMBOL(alloc_netdev_mq);
4053
4054 /**
4055 * free_netdev - free network device
4056 * @dev: device
4057 *
4058 * This function does the last stage of destroying an allocated device
4059 * interface. The reference to the device object is released.
4060 * If this is the last reference then it will be freed.
4061 */
4062 void free_netdev(struct net_device *dev)
4063 {
4064 /* Compatibility with error handling in drivers */
4065 if (dev->reg_state == NETREG_UNINITIALIZED) {
4066 kfree((char *)dev - dev->padded);
4067 return;
4068 }
4069
4070 BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
4071 dev->reg_state = NETREG_RELEASED;
4072
4073 /* will free via device release */
4074 put_device(&dev->dev);
4075 }
4076
4077 /* Synchronize with packet receive processing. */
4078 void synchronize_net(void)
4079 {
4080 might_sleep();
4081 synchronize_rcu();
4082 }
4083
4084 /**
4085 * unregister_netdevice - remove device from the kernel
4086 * @dev: device
4087 *
4088 * This function shuts down a device interface and removes it
4089 * from the kernel tables.
4090 *
4091 * Callers must hold the rtnl semaphore. You may want
4092 * unregister_netdev() instead of this.
4093 */
4094
4095 void unregister_netdevice(struct net_device *dev)
4096 {
4097 ASSERT_RTNL();
4098
4099 rollback_registered(dev);
4100 /* Finish processing unregister after unlock */
4101 net_set_todo(dev);
4102 }
4103
4104 /**
4105 * unregister_netdev - remove device from the kernel
4106 * @dev: device
4107 *
4108 * This function shuts down a device interface and removes it
4109 * from the kernel tables.
4110 *
4111 * This is just a wrapper for unregister_netdevice that takes
4112 * the rtnl semaphore. In general you want to use this and not
4113 * unregister_netdevice.
4114 */
4115 void unregister_netdev(struct net_device *dev)
4116 {
4117 rtnl_lock();
4118 unregister_netdevice(dev);
4119 rtnl_unlock();
4120 }
4121
4122 EXPORT_SYMBOL(unregister_netdev);
4123
4124 /**
4125 * dev_change_net_namespace - move device to different nethost namespace
4126 * @dev: device
4127 * @net: network namespace
4128 * @pat: If not NULL name pattern to try if the current device name
4129 * is already taken in the destination network namespace.
4130 *
4131 * This function shuts down a device interface and moves it
4132 * to a new network namespace. On success 0 is returned, on
4133 * a failure a netagive errno code is returned.
4134 *
4135 * Callers must hold the rtnl semaphore.
4136 */
4137
4138 int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
4139 {
4140 char buf[IFNAMSIZ];
4141 const char *destname;
4142 int err;
4143
4144 ASSERT_RTNL();
4145
4146 /* Don't allow namespace local devices to be moved. */
4147 err = -EINVAL;
4148 if (dev->features & NETIF_F_NETNS_LOCAL)
4149 goto out;
4150
4151 /* Ensure the device has been registrered */
4152 err = -EINVAL;
4153 if (dev->reg_state != NETREG_REGISTERED)
4154 goto out;
4155
4156 /* Get out if there is nothing todo */
4157 err = 0;
4158 if (dev->nd_net == net)
4159 goto out;
4160
4161 /* Pick the destination device name, and ensure
4162 * we can use it in the destination network namespace.
4163 */
4164 err = -EEXIST;
4165 destname = dev->name;
4166 if (__dev_get_by_name(net, destname)) {
4167 /* We get here if we can't use the current device name */
4168 if (!pat)
4169 goto out;
4170 if (!dev_valid_name(pat))
4171 goto out;
4172 if (strchr(pat, '%')) {
4173 if (__dev_alloc_name(net, pat, buf) < 0)
4174 goto out;
4175 destname = buf;
4176 } else
4177 destname = pat;
4178 if (__dev_get_by_name(net, destname))
4179 goto out;
4180 }
4181
4182 /*
4183 * And now a mini version of register_netdevice unregister_netdevice.
4184 */
4185
4186 /* If device is running close it first. */
4187 dev_close(dev);
4188
4189 /* And unlink it from device chain */
4190 err = -ENODEV;
4191 unlist_netdevice(dev);
4192
4193 synchronize_net();
4194
4195 /* Shutdown queueing discipline. */
4196 dev_shutdown(dev);
4197
4198 /* Notify protocols, that we are about to destroy
4199 this device. They should clean all the things.
4200 */
4201 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
4202
4203 /*
4204 * Flush the unicast and multicast chains
4205 */
4206 dev_addr_discard(dev);
4207
4208 /* Actually switch the network namespace */
4209 dev->nd_net = net;
4210
4211 /* Assign the new device name */
4212 if (destname != dev->name)
4213 strcpy(dev->name, destname);
4214
4215 /* If there is an ifindex conflict assign a new one */
4216 if (__dev_get_by_index(net, dev->ifindex)) {
4217 int iflink = (dev->iflink == dev->ifindex);
4218 dev->ifindex = dev_new_index(net);
4219 if (iflink)
4220 dev->iflink = dev->ifindex;
4221 }
4222
4223 /* Fixup kobjects */
4224 err = device_rename(&dev->dev, dev->name);
4225 WARN_ON(err);
4226
4227 /* Add the device back in the hashes */
4228 list_netdevice(dev);
4229
4230 /* Notify protocols, that a new device appeared. */
4231 call_netdevice_notifiers(NETDEV_REGISTER, dev);
4232
4233 synchronize_net();
4234 err = 0;
4235 out:
4236 return err;
4237 }
4238
4239 static int dev_cpu_callback(struct notifier_block *nfb,
4240 unsigned long action,
4241 void *ocpu)
4242 {
4243 struct sk_buff **list_skb;
4244 struct net_device **list_net;
4245 struct sk_buff *skb;
4246 unsigned int cpu, oldcpu = (unsigned long)ocpu;
4247 struct softnet_data *sd, *oldsd;
4248
4249 if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
4250 return NOTIFY_OK;
4251
4252 local_irq_disable();
4253 cpu = smp_processor_id();
4254 sd = &per_cpu(softnet_data, cpu);
4255 oldsd = &per_cpu(softnet_data, oldcpu);
4256
4257 /* Find end of our completion_queue. */
4258 list_skb = &sd->completion_queue;
4259 while (*list_skb)
4260 list_skb = &(*list_skb)->next;
4261 /* Append completion queue from offline CPU. */
4262 *list_skb = oldsd->completion_queue;
4263 oldsd->completion_queue = NULL;
4264
4265 /* Find end of our output_queue. */
4266 list_net = &sd->output_queue;
4267 while (*list_net)
4268 list_net = &(*list_net)->next_sched;
4269 /* Append output queue from offline CPU. */
4270 *list_net = oldsd->output_queue;
4271 oldsd->output_queue = NULL;
4272
4273 raise_softirq_irqoff(NET_TX_SOFTIRQ);
4274 local_irq_enable();
4275
4276 /* Process offline CPU's input_pkt_queue */
4277 while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
4278 netif_rx(skb);
4279
4280 return NOTIFY_OK;
4281 }
4282
4283 #ifdef CONFIG_NET_DMA
4284 /**
4285 * net_dma_rebalance - try to maintain one DMA channel per CPU
4286 * @net_dma: DMA client and associated data (lock, channels, channel_mask)
4287 *
4288 * This is called when the number of channels allocated to the net_dma client
4289 * changes. The net_dma client tries to have one DMA channel per CPU.
4290 */
4291
4292 static void net_dma_rebalance(struct net_dma *net_dma)
4293 {
4294 unsigned int cpu, i, n, chan_idx;
4295 struct dma_chan *chan;
4296
4297 if (cpus_empty(net_dma->channel_mask)) {
4298 for_each_online_cpu(cpu)
4299 rcu_assign_pointer(per_cpu(softnet_data, cpu).net_dma, NULL);
4300 return;
4301 }
4302
4303 i = 0;
4304 cpu = first_cpu(cpu_online_map);
4305
4306 for_each_cpu_mask(chan_idx, net_dma->channel_mask) {
4307 chan = net_dma->channels[chan_idx];
4308
4309 n = ((num_online_cpus() / cpus_weight(net_dma->channel_mask))
4310 + (i < (num_online_cpus() %
4311 cpus_weight(net_dma->channel_mask)) ? 1 : 0));
4312
4313 while(n) {
4314 per_cpu(softnet_data, cpu).net_dma = chan;
4315 cpu = next_cpu(cpu, cpu_online_map);
4316 n--;
4317 }
4318 i++;
4319 }
4320 }
4321
4322 /**
4323 * netdev_dma_event - event callback for the net_dma_client
4324 * @client: should always be net_dma_client
4325 * @chan: DMA channel for the event
4326 * @state: DMA state to be handled
4327 */
4328 static enum dma_state_client
4329 netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
4330 enum dma_state state)
4331 {
4332 int i, found = 0, pos = -1;
4333 struct net_dma *net_dma =
4334 container_of(client, struct net_dma, client);
4335 enum dma_state_client ack = DMA_DUP; /* default: take no action */
4336
4337 spin_lock(&net_dma->lock);
4338 switch (state) {
4339 case DMA_RESOURCE_AVAILABLE:
4340 for (i = 0; i < NR_CPUS; i++)
4341 if (net_dma->channels[i] == chan) {
4342 found = 1;
4343 break;
4344 } else if (net_dma->channels[i] == NULL && pos < 0)
4345 pos = i;
4346
4347 if (!found && pos >= 0) {
4348 ack = DMA_ACK;
4349 net_dma->channels[pos] = chan;
4350 cpu_set(pos, net_dma->channel_mask);
4351 net_dma_rebalance(net_dma);
4352 }
4353 break;
4354 case DMA_RESOURCE_REMOVED:
4355 for (i = 0; i < NR_CPUS; i++)
4356 if (net_dma->channels[i] == chan) {
4357 found = 1;
4358 pos = i;
4359 break;
4360 }
4361
4362 if (found) {
4363 ack = DMA_ACK;
4364 cpu_clear(pos, net_dma->channel_mask);
4365 net_dma->channels[i] = NULL;
4366 net_dma_rebalance(net_dma);
4367 }
4368 break;
4369 default:
4370 break;
4371 }
4372 spin_unlock(&net_dma->lock);
4373
4374 return ack;
4375 }
4376
4377 /**
4378 * netdev_dma_regiser - register the networking subsystem as a DMA client
4379 */
4380 static int __init netdev_dma_register(void)
4381 {
4382 spin_lock_init(&net_dma.lock);
4383 dma_cap_set(DMA_MEMCPY, net_dma.client.cap_mask);
4384 dma_async_client_register(&net_dma.client);
4385 dma_async_client_chan_request(&net_dma.client);
4386 return 0;
4387 }
4388
4389 #else
4390 static int __init netdev_dma_register(void) { return -ENODEV; }
4391 #endif /* CONFIG_NET_DMA */
4392
4393 /**
4394 * netdev_compute_feature - compute conjunction of two feature sets
4395 * @all: first feature set
4396 * @one: second feature set
4397 *
4398 * Computes a new feature set after adding a device with feature set
4399 * @one to the master device with current feature set @all. Returns
4400 * the new feature set.
4401 */
4402 int netdev_compute_features(unsigned long all, unsigned long one)
4403 {
4404 /* if device needs checksumming, downgrade to hw checksumming */
4405 if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM))
4406 all ^= NETIF_F_NO_CSUM | NETIF_F_HW_CSUM;
4407
4408 /* if device can't do all checksum, downgrade to ipv4/ipv6 */
4409 if (all & NETIF_F_HW_CSUM && !(one & NETIF_F_HW_CSUM))
4410 all ^= NETIF_F_HW_CSUM
4411 | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
4412
4413 if (one & NETIF_F_GSO)
4414 one |= NETIF_F_GSO_SOFTWARE;
4415 one |= NETIF_F_GSO;
4416
4417 /* If even one device supports robust GSO, enable it for all. */
4418 if (one & NETIF_F_GSO_ROBUST)
4419 all |= NETIF_F_GSO_ROBUST;
4420
4421 all &= one | NETIF_F_LLTX;
4422
4423 if (!(all & NETIF_F_ALL_CSUM))
4424 all &= ~NETIF_F_SG;
4425 if (!(all & NETIF_F_SG))
4426 all &= ~NETIF_F_GSO_MASK;
4427
4428 return all;
4429 }
4430 EXPORT_SYMBOL(netdev_compute_features);
4431
4432 static struct hlist_head *netdev_create_hash(void)
4433 {
4434 int i;
4435 struct hlist_head *hash;
4436
4437 hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL);
4438 if (hash != NULL)
4439 for (i = 0; i < NETDEV_HASHENTRIES; i++)
4440 INIT_HLIST_HEAD(&hash[i]);
4441
4442 return hash;
4443 }
4444
4445 /* Initialize per network namespace state */
4446 static int __net_init netdev_init(struct net *net)
4447 {
4448 INIT_LIST_HEAD(&net->dev_base_head);
4449
4450 net->dev_name_head = netdev_create_hash();
4451 if (net->dev_name_head == NULL)
4452 goto err_name;
4453
4454 net->dev_index_head = netdev_create_hash();
4455 if (net->dev_index_head == NULL)
4456 goto err_idx;
4457
4458 return 0;
4459
4460 err_idx:
4461 kfree(net->dev_name_head);
4462 err_name:
4463 return -ENOMEM;
4464 }
4465
4466 static void __net_exit netdev_exit(struct net *net)
4467 {
4468 kfree(net->dev_name_head);
4469 kfree(net->dev_index_head);
4470 }
4471
4472 static struct pernet_operations __net_initdata netdev_net_ops = {
4473 .init = netdev_init,
4474 .exit = netdev_exit,
4475 };
4476
4477 static void __net_exit default_device_exit(struct net *net)
4478 {
4479 struct net_device *dev, *next;
4480 /*
4481 * Push all migratable of the network devices back to the
4482 * initial network namespace
4483 */
4484 rtnl_lock();
4485 for_each_netdev_safe(net, dev, next) {
4486 int err;
4487
4488 /* Ignore unmoveable devices (i.e. loopback) */
4489 if (dev->features & NETIF_F_NETNS_LOCAL)
4490 continue;
4491
4492 /* Push remaing network devices to init_net */
4493 err = dev_change_net_namespace(dev, &init_net, "dev%d");
4494 if (err) {
4495 printk(KERN_WARNING "%s: failed to move %s to init_net: %d\n",
4496 __func__, dev->name, err);
4497 unregister_netdevice(dev);
4498 }
4499 }
4500 rtnl_unlock();
4501 }
4502
4503 static struct pernet_operations __net_initdata default_device_ops = {
4504 .exit = default_device_exit,
4505 };
4506
4507 /*
4508 * Initialize the DEV module. At boot time this walks the device list and
4509 * unhooks any devices that fail to initialise (normally hardware not
4510 * present) and leaves us with a valid list of present and active devices.
4511 *
4512 */
4513
4514 /*
4515 * This is called single threaded during boot, so no need
4516 * to take the rtnl semaphore.
4517 */
4518 static int __init net_dev_init(void)
4519 {
4520 int i, rc = -ENOMEM;
4521
4522 BUG_ON(!dev_boot_phase);
4523
4524 if (dev_proc_init())
4525 goto out;
4526
4527 if (netdev_kobject_init())
4528 goto out;
4529
4530 INIT_LIST_HEAD(&ptype_all);
4531 for (i = 0; i < PTYPE_HASH_SIZE; i++)
4532 INIT_LIST_HEAD(&ptype_base[i]);
4533
4534 if (register_pernet_subsys(&netdev_net_ops))
4535 goto out;
4536
4537 if (register_pernet_device(&default_device_ops))
4538 goto out;
4539
4540 /*
4541 * Initialise the packet receive queues.
4542 */
4543
4544 for_each_possible_cpu(i) {
4545 struct softnet_data *queue;
4546
4547 queue = &per_cpu(softnet_data, i);
4548 skb_queue_head_init(&queue->input_pkt_queue);
4549 queue->completion_queue = NULL;
4550 INIT_LIST_HEAD(&queue->poll_list);
4551
4552 queue->backlog.poll = process_backlog;
4553 queue->backlog.weight = weight_p;
4554 }
4555
4556 netdev_dma_register();
4557
4558 dev_boot_phase = 0;
4559
4560 open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL);
4561 open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL);
4562
4563 hotcpu_notifier(dev_cpu_callback, 0);
4564 dst_init();
4565 dev_mcast_init();
4566 rc = 0;
4567 out:
4568 return rc;
4569 }
4570
4571 subsys_initcall(net_dev_init);
4572
4573 EXPORT_SYMBOL(__dev_get_by_index);
4574 EXPORT_SYMBOL(__dev_get_by_name);
4575 EXPORT_SYMBOL(__dev_remove_pack);
4576 EXPORT_SYMBOL(dev_valid_name);
4577 EXPORT_SYMBOL(dev_add_pack);
4578 EXPORT_SYMBOL(dev_alloc_name);
4579 EXPORT_SYMBOL(dev_close);
4580 EXPORT_SYMBOL(dev_get_by_flags);
4581 EXPORT_SYMBOL(dev_get_by_index);
4582 EXPORT_SYMBOL(dev_get_by_name);
4583 EXPORT_SYMBOL(dev_open);
4584 EXPORT_SYMBOL(dev_queue_xmit);
4585 EXPORT_SYMBOL(dev_remove_pack);
4586 EXPORT_SYMBOL(dev_set_allmulti);
4587 EXPORT_SYMBOL(dev_set_promiscuity);
4588 EXPORT_SYMBOL(dev_change_flags);
4589 EXPORT_SYMBOL(dev_set_mtu);
4590 EXPORT_SYMBOL(dev_set_mac_address);
4591 EXPORT_SYMBOL(free_netdev);
4592 EXPORT_SYMBOL(netdev_boot_setup_check);
4593 EXPORT_SYMBOL(netdev_set_master);
4594 EXPORT_SYMBOL(netdev_state_change);
4595 EXPORT_SYMBOL(netif_receive_skb);
4596 EXPORT_SYMBOL(netif_rx);
4597 EXPORT_SYMBOL(register_gifconf);
4598 EXPORT_SYMBOL(register_netdevice);
4599 EXPORT_SYMBOL(register_netdevice_notifier);
4600 EXPORT_SYMBOL(skb_checksum_help);
4601 EXPORT_SYMBOL(synchronize_net);
4602 EXPORT_SYMBOL(unregister_netdevice);
4603 EXPORT_SYMBOL(unregister_netdevice_notifier);
4604 EXPORT_SYMBOL(net_enable_timestamp);
4605 EXPORT_SYMBOL(net_disable_timestamp);
4606 EXPORT_SYMBOL(dev_get_flags);
4607
4608 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
4609 EXPORT_SYMBOL(br_handle_frame_hook);
4610 EXPORT_SYMBOL(br_fdb_get_hook);
4611 EXPORT_SYMBOL(br_fdb_put_hook);
4612 #endif
4613
4614 #ifdef CONFIG_KMOD
4615 EXPORT_SYMBOL(dev_load);
4616 #endif
4617
4618 EXPORT_PER_CPU_SYMBOL(softnet_data);
4619
|
This page was automatically generated by the
LXR engine.
|