Linux kernel & device driver programming

Cross-Referenced Linux and Device Driver Code

[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ]
Version: [ 2.6.11.8 ] [ 2.6.25 ] [ 2.6.25.8 ] [ 2.6.31.13 ] Architecture: [ i386 ]
  1 /*
  2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
  3  *              operating system.  INET is implemented using the  BSD Socket
  4  *              interface as the means of communication with the user level.
  5  *
  6  *              PACKET - implements raw packet sockets.
  7  *
  8  * Version:     $Id: af_packet.c,v 1.61 2002/02/08 03:57:19 davem Exp $
  9  *
 10  * Authors:     Ross Biro
 11  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
 12  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
 13  *
 14  * Fixes:
 15  *              Alan Cox        :       verify_area() now used correctly
 16  *              Alan Cox        :       new skbuff lists, look ma no backlogs!
 17  *              Alan Cox        :       tidied skbuff lists.
 18  *              Alan Cox        :       Now uses generic datagram routines I
 19  *                                      added. Also fixed the peek/read crash
 20  *                                      from all old Linux datagram code.
 21  *              Alan Cox        :       Uses the improved datagram code.
 22  *              Alan Cox        :       Added NULL's for socket options.
 23  *              Alan Cox        :       Re-commented the code.
 24  *              Alan Cox        :       Use new kernel side addressing
 25  *              Rob Janssen     :       Correct MTU usage.
 26  *              Dave Platt      :       Counter leaks caused by incorrect
 27  *                                      interrupt locking and some slightly
 28  *                                      dubious gcc output. Can you read
 29  *                                      compiler: it said _VOLATILE_
 30  *      Richard Kooijman        :       Timestamp fixes.
 31  *              Alan Cox        :       New buffers. Use sk->mac.raw.
 32  *              Alan Cox        :       sendmsg/recvmsg support.
 33  *              Alan Cox        :       Protocol setting support
 34  *      Alexey Kuznetsov        :       Untied from IPv4 stack.
 35  *      Cyrus Durgin            :       Fixed kerneld for kmod.
 36  *      Michal Ostrowski        :       Module initialization cleanup.
 37  *         Ulises Alonso        :       Frame number limit removal and
 38  *                                      packet_set_ring memory leak.
 39  *              Eric Biederman  :       Allow for > 8 byte hardware addresses.
 40  *                                      The convention is that longer addresses
 41  *                                      will simply extend the hardware address
 42  *                                      byte arrays at the end of sockaddr_ll
 43  *                                      and packet_mreq.
 44  *
 45  *              This program is free software; you can redistribute it and/or
 46  *              modify it under the terms of the GNU General Public License
 47  *              as published by the Free Software Foundation; either version
 48  *              2 of the License, or (at your option) any later version.
 49  *
 50  */
 51 
 52 #include <linux/types.h>
 53 #include <linux/mm.h>
 54 #include <linux/capability.h>
 55 #include <linux/fcntl.h>
 56 #include <linux/socket.h>
 57 #include <linux/in.h>
 58 #include <linux/inet.h>
 59 #include <linux/netdevice.h>
 60 #include <linux/if_packet.h>
 61 #include <linux/wireless.h>
 62 #include <linux/kernel.h>
 63 #include <linux/kmod.h>
 64 #include <net/net_namespace.h>
 65 #include <net/ip.h>
 66 #include <net/protocol.h>
 67 #include <linux/skbuff.h>
 68 #include <net/sock.h>
 69 #include <linux/errno.h>
 70 #include <linux/timer.h>
 71 #include <asm/system.h>
 72 #include <asm/uaccess.h>
 73 #include <asm/ioctls.h>
 74 #include <asm/page.h>
 75 #include <asm/cacheflush.h>
 76 #include <asm/io.h>
 77 #include <linux/proc_fs.h>
 78 #include <linux/seq_file.h>
 79 #include <linux/poll.h>
 80 #include <linux/module.h>
 81 #include <linux/init.h>
 82 
 83 #ifdef CONFIG_INET
 84 #include <net/inet_common.h>
 85 #endif
 86 
 87 /*
 88    Assumptions:
 89    - if device has no dev->hard_header routine, it adds and removes ll header
 90      inside itself. In this case ll header is invisible outside of device,
 91      but higher levels still should reserve dev->hard_header_len.
 92      Some devices are enough clever to reallocate skb, when header
 93      will not fit to reserved space (tunnel), another ones are silly
 94      (PPP).
 95    - packet socket receives packets with pulled ll header,
 96      so that SOCK_RAW should push it back.
 97 
 98 On receive:
 99 -----------
100 
101 Incoming, dev->hard_header!=NULL
102    mac_header -> ll header
103    data       -> data
104 
105 Outgoing, dev->hard_header!=NULL
106    mac_header -> ll header
107    data       -> ll header
108 
109 Incoming, dev->hard_header==NULL
110    mac_header -> UNKNOWN position. It is very likely, that it points to ll
111                  header.  PPP makes it, that is wrong, because introduce
112                  assymetry between rx and tx paths.
113    data       -> data
114 
115 Outgoing, dev->hard_header==NULL
116    mac_header -> data. ll header is still not built!
117    data       -> data
118 
119 Resume
120   If dev->hard_header==NULL we are unlikely to restore sensible ll header.
121 
122 
123 On transmit:
124 ------------
125 
126 dev->hard_header != NULL
127    mac_header -> ll header
128    data       -> ll header
129 
130 dev->hard_header == NULL (ll header is added by device, we cannot control it)
131    mac_header -> data
132    data       -> data
133 
134    We should set nh.raw on output to correct posistion,
135    packet classifier depends on it.
136  */
137 
138 /* Private packet socket structures. */
139 
140 struct packet_mclist
141 {
142         struct packet_mclist    *next;
143         int                     ifindex;
144         int                     count;
145         unsigned short          type;
146         unsigned short          alen;
147         unsigned char           addr[MAX_ADDR_LEN];
148 };
149 /* identical to struct packet_mreq except it has
150  * a longer address field.
151  */
152 struct packet_mreq_max
153 {
154         int             mr_ifindex;
155         unsigned short  mr_type;
156         unsigned short  mr_alen;
157         unsigned char   mr_address[MAX_ADDR_LEN];
158 };
159 
160 #ifdef CONFIG_PACKET_MMAP
161 static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing);
162 #endif
163 
164 static void packet_flush_mclist(struct sock *sk);
165 
166 struct packet_sock {
167         /* struct sock has to be the first member of packet_sock */
168         struct sock             sk;
169         struct tpacket_stats    stats;
170 #ifdef CONFIG_PACKET_MMAP
171         char *                  *pg_vec;
172         unsigned int            head;
173         unsigned int            frames_per_block;
174         unsigned int            frame_size;
175         unsigned int            frame_max;
176         int                     copy_thresh;
177 #endif
178         struct packet_type      prot_hook;
179         spinlock_t              bind_lock;
180         unsigned int            running:1,      /* prot_hook is attached*/
181                                 auxdata:1,
182                                 origdev:1;
183         int                     ifindex;        /* bound device         */
184         __be16                  num;
185         struct packet_mclist    *mclist;
186 #ifdef CONFIG_PACKET_MMAP
187         atomic_t                mapped;
188         unsigned int            pg_vec_order;
189         unsigned int            pg_vec_pages;
190         unsigned int            pg_vec_len;
191 #endif
192 };
193 
194 struct packet_skb_cb {
195         unsigned int origlen;
196         union {
197                 struct sockaddr_pkt pkt;
198                 struct sockaddr_ll ll;
199         } sa;
200 };
201 
202 #define PACKET_SKB_CB(__skb)    ((struct packet_skb_cb *)((__skb)->cb))
203 
204 #ifdef CONFIG_PACKET_MMAP
205 
206 static inline struct tpacket_hdr *packet_lookup_frame(struct packet_sock *po, unsigned int position)
207 {
208         unsigned int pg_vec_pos, frame_offset;
209 
210         pg_vec_pos = position / po->frames_per_block;
211         frame_offset = position % po->frames_per_block;
212 
213         return (struct tpacket_hdr *)(po->pg_vec[pg_vec_pos] + (frame_offset * po->frame_size));
214 }
215 #endif
216 
217 static inline struct packet_sock *pkt_sk(struct sock *sk)
218 {
219         return (struct packet_sock *)sk;
220 }
221 
222 static void packet_sock_destruct(struct sock *sk)
223 {
224         BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc));
225         BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
226 
227         if (!sock_flag(sk, SOCK_DEAD)) {
228                 printk("Attempt to release alive packet socket: %p\n", sk);
229                 return;
230         }
231 
232         sk_refcnt_debug_dec(sk);
233 }
234 
235 
236 static const struct proto_ops packet_ops;
237 
238 static const struct proto_ops packet_ops_spkt;
239 
240 static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,  struct packet_type *pt, struct net_device *orig_dev)
241 {
242         struct sock *sk;
243         struct sockaddr_pkt *spkt;
244 
245         /*
246          *      When we registered the protocol we saved the socket in the data
247          *      field for just this event.
248          */
249 
250         sk = pt->af_packet_priv;
251 
252         /*
253          *      Yank back the headers [hope the device set this
254          *      right or kerboom...]
255          *
256          *      Incoming packets have ll header pulled,
257          *      push it back.
258          *
259          *      For outgoing ones skb->data == skb_mac_header(skb)
260          *      so that this procedure is noop.
261          */
262 
263         if (skb->pkt_type == PACKET_LOOPBACK)
264                 goto out;
265 
266         if (dev->nd_net != sk->sk_net)
267                 goto out;
268 
269         if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
270                 goto oom;
271 
272         /* drop any routing info */
273         dst_release(skb->dst);
274         skb->dst = NULL;
275 
276         /* drop conntrack reference */
277         nf_reset(skb);
278 
279         spkt = &PACKET_SKB_CB(skb)->sa.pkt;
280 
281         skb_push(skb, skb->data - skb_mac_header(skb));
282 
283         /*
284          *      The SOCK_PACKET socket receives _all_ frames.
285          */
286 
287         spkt->spkt_family = dev->type;
288         strlcpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
289         spkt->spkt_protocol = skb->protocol;
290 
291         /*
292          *      Charge the memory to the socket. This is done specifically
293          *      to prevent sockets using all the memory up.
294          */
295 
296         if (sock_queue_rcv_skb(sk,skb) == 0)
297                 return 0;
298 
299 out:
300         kfree_skb(skb);
301 oom:
302         return 0;
303 }
304 
305 
306 /*
307  *      Output a raw packet to a device layer. This bypasses all the other
308  *      protocol layers and you must therefore supply it with a complete frame
309  */
310 
311 static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
312                                struct msghdr *msg, size_t len)
313 {
314         struct sock *sk = sock->sk;
315         struct sockaddr_pkt *saddr=(struct sockaddr_pkt *)msg->msg_name;
316         struct sk_buff *skb;
317         struct net_device *dev;
318         __be16 proto=0;
319         int err;
320 
321         /*
322          *      Get and verify the address.
323          */
324 
325         if (saddr)
326         {
327                 if (msg->msg_namelen < sizeof(struct sockaddr))
328                         return(-EINVAL);
329                 if (msg->msg_namelen==sizeof(struct sockaddr_pkt))
330                         proto=saddr->spkt_protocol;
331         }
332         else
333                 return(-ENOTCONN);      /* SOCK_PACKET must be sent giving an address */
334 
335         /*
336          *      Find the device first to size check it
337          */
338 
339         saddr->spkt_device[13] = 0;
340         dev = dev_get_by_name(sk->sk_net, saddr->spkt_device);
341         err = -ENODEV;
342         if (dev == NULL)
343                 goto out_unlock;
344 
345         err = -ENETDOWN;
346         if (!(dev->flags & IFF_UP))
347                 goto out_unlock;
348 
349         /*
350          *      You may not queue a frame bigger than the mtu. This is the lowest level
351          *      raw protocol and you must do your own fragmentation at this level.
352          */
353 
354         err = -EMSGSIZE;
355         if (len > dev->mtu + dev->hard_header_len)
356                 goto out_unlock;
357 
358         err = -ENOBUFS;
359         skb = sock_wmalloc(sk, len + LL_RESERVED_SPACE(dev), 0, GFP_KERNEL);
360 
361         /*
362          *      If the write buffer is full, then tough. At this level the user gets to
363          *      deal with the problem - do your own algorithmic backoffs. That's far
364          *      more flexible.
365          */
366 
367         if (skb == NULL)
368                 goto out_unlock;
369 
370         /*
371          *      Fill it in
372          */
373 
374         /* FIXME: Save some space for broken drivers that write a
375          * hard header at transmission time by themselves. PPP is the
376          * notable one here. This should really be fixed at the driver level.
377          */
378         skb_reserve(skb, LL_RESERVED_SPACE(dev));
379         skb_reset_network_header(skb);
380 
381         /* Try to align data part correctly */
382         if (dev->header_ops) {
383                 skb->data -= dev->hard_header_len;
384                 skb->tail -= dev->hard_header_len;
385                 if (len < dev->hard_header_len)
386                         skb_reset_network_header(skb);
387         }
388 
389         /* Returns -EFAULT on error */
390         err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
391         skb->protocol = proto;
392         skb->dev = dev;
393         skb->priority = sk->sk_priority;
394         if (err)
395                 goto out_free;
396 
397         /*
398          *      Now send it
399          */
400 
401         dev_queue_xmit(skb);
402         dev_put(dev);
403         return(len);
404 
405 out_free:
406         kfree_skb(skb);
407 out_unlock:
408         if (dev)
409                 dev_put(dev);
410         return err;
411 }
412 
413 static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk,
414                                       unsigned int res)
415 {
416         struct sk_filter *filter;
417 
418         rcu_read_lock_bh();
419         filter = rcu_dereference(sk->sk_filter);
420         if (filter != NULL)
421                 res = sk_run_filter(skb, filter->insns, filter->len);
422         rcu_read_unlock_bh();
423 
424         return res;
425 }
426 
427 /*
428    This function makes lazy skb cloning in hope that most of packets
429    are discarded by BPF.
430 
431    Note tricky part: we DO mangle shared skb! skb->data, skb->len
432    and skb->cb are mangled. It works because (and until) packets
433    falling here are owned by current CPU. Output packets are cloned
434    by dev_queue_xmit_nit(), input packets are processed by net_bh
435    sequencially, so that if we return skb to original state on exit,
436    we will not harm anyone.
437  */
438 
439 static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
440 {
441         struct sock *sk;
442         struct sockaddr_ll *sll;
443         struct packet_sock *po;
444         u8 * skb_head = skb->data;
445         int skb_len = skb->len;
446         unsigned int snaplen, res;
447 
448         if (skb->pkt_type == PACKET_LOOPBACK)
449                 goto drop;
450 
451         sk = pt->af_packet_priv;
452         po = pkt_sk(sk);
453 
454         if (dev->nd_net != sk->sk_net)
455                 goto drop;
456 
457         skb->dev = dev;
458 
459         if (dev->header_ops) {
460                 /* The device has an explicit notion of ll header,
461                    exported to higher levels.
462 
463                    Otherwise, the device hides datails of it frame
464                    structure, so that corresponding packet head
465                    never delivered to user.
466                  */
467                 if (sk->sk_type != SOCK_DGRAM)
468                         skb_push(skb, skb->data - skb_mac_header(skb));
469                 else if (skb->pkt_type == PACKET_OUTGOING) {
470                         /* Special case: outgoing packets have ll header at head */
471                         skb_pull(skb, skb_network_offset(skb));
472                 }
473         }
474 
475         snaplen = skb->len;
476 
477         res = run_filter(skb, sk, snaplen);
478         if (!res)
479                 goto drop_n_restore;
480         if (snaplen > res)
481                 snaplen = res;
482 
483         if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
484             (unsigned)sk->sk_rcvbuf)
485                 goto drop_n_acct;
486 
487         if (skb_shared(skb)) {
488                 struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
489                 if (nskb == NULL)
490                         goto drop_n_acct;
491 
492                 if (skb_head != skb->data) {
493                         skb->data = skb_head;
494                         skb->len = skb_len;
495                 }
496                 kfree_skb(skb);
497                 skb = nskb;
498         }
499 
500         BUILD_BUG_ON(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8 >
501                      sizeof(skb->cb));
502 
503         sll = &PACKET_SKB_CB(skb)->sa.ll;
504         sll->sll_family = AF_PACKET;
505         sll->sll_hatype = dev->type;
506         sll->sll_protocol = skb->protocol;
507         sll->sll_pkttype = skb->pkt_type;
508         if (unlikely(po->origdev))
509                 sll->sll_ifindex = orig_dev->ifindex;
510         else
511                 sll->sll_ifindex = dev->ifindex;
512 
513         sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
514 
515         PACKET_SKB_CB(skb)->origlen = skb->len;
516 
517         if (pskb_trim(skb, snaplen))
518                 goto drop_n_acct;
519 
520         skb_set_owner_r(skb, sk);
521         skb->dev = NULL;
522         dst_release(skb->dst);
523         skb->dst = NULL;
524 
525         /* drop conntrack reference */
526         nf_reset(skb);
527 
528         spin_lock(&sk->sk_receive_queue.lock);
529         po->stats.tp_packets++;
530         __skb_queue_tail(&sk->sk_receive_queue, skb);
531         spin_unlock(&sk->sk_receive_queue.lock);
532         sk->sk_data_ready(sk, skb->len);
533         return 0;
534 
535 drop_n_acct:
536         spin_lock(&sk->sk_receive_queue.lock);
537         po->stats.tp_drops++;
538         spin_unlock(&sk->sk_receive_queue.lock);
539 
540 drop_n_restore:
541         if (skb_head != skb->data && skb_shared(skb)) {
542                 skb->data = skb_head;
543                 skb->len = skb_len;
544         }
545 drop:
546         kfree_skb(skb);
547         return 0;
548 }
549 
550 #ifdef CONFIG_PACKET_MMAP
551 static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
552 {
553         struct sock *sk;
554         struct packet_sock *po;
555         struct sockaddr_ll *sll;
556         struct tpacket_hdr *h;
557         u8 * skb_head = skb->data;
558         int skb_len = skb->len;
559         unsigned int snaplen, res;
560         unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER;
561         unsigned short macoff, netoff;
562         struct sk_buff *copy_skb = NULL;
563         struct timeval tv;
564 
565         if (skb->pkt_type == PACKET_LOOPBACK)
566                 goto drop;
567 
568         sk = pt->af_packet_priv;
569         po = pkt_sk(sk);
570 
571         if (dev->nd_net != sk->sk_net)
572                 goto drop;
573 
574         if (dev->header_ops) {
575                 if (sk->sk_type != SOCK_DGRAM)
576                         skb_push(skb, skb->data - skb_mac_header(skb));
577                 else if (skb->pkt_type == PACKET_OUTGOING) {
578                         /* Special case: outgoing packets have ll header at head */
579                         skb_pull(skb, skb_network_offset(skb));
580                 }
581         }
582 
583         if (skb->ip_summed == CHECKSUM_PARTIAL)
584                 status |= TP_STATUS_CSUMNOTREADY;
585 
586         snaplen = skb->len;
587 
588         res = run_filter(skb, sk, snaplen);
589         if (!res)
590                 goto drop_n_restore;
591         if (snaplen > res)
592                 snaplen = res;
593 
594         if (sk->sk_type == SOCK_DGRAM) {
595                 macoff = netoff = TPACKET_ALIGN(TPACKET_HDRLEN) + 16;
596         } else {
597                 unsigned maclen = skb_network_offset(skb);
598                 netoff = TPACKET_ALIGN(TPACKET_HDRLEN + (maclen < 16 ? 16 : maclen));
599                 macoff = netoff - maclen;
600         }
601 
602         if (macoff + snaplen > po->frame_size) {
603                 if (po->copy_thresh &&
604                     atomic_read(&sk->sk_rmem_alloc) + skb->truesize <
605                     (unsigned)sk->sk_rcvbuf) {
606                         if (skb_shared(skb)) {
607                                 copy_skb = skb_clone(skb, GFP_ATOMIC);
608                         } else {
609                                 copy_skb = skb_get(skb);
610                                 skb_head = skb->data;
611                         }
612                         if (copy_skb)
613                                 skb_set_owner_r(copy_skb, sk);
614                 }
615                 snaplen = po->frame_size - macoff;
616                 if ((int)snaplen < 0)
617                         snaplen = 0;
618         }
619 
620         spin_lock(&sk->sk_receive_queue.lock);
621         h = packet_lookup_frame(po, po->head);
622 
623         if (h->tp_status)
624                 goto ring_is_full;
625         po->head = po->head != po->frame_max ? po->head+1 : 0;
626         po->stats.tp_packets++;
627         if (copy_skb) {
628                 status |= TP_STATUS_COPY;
629                 __skb_queue_tail(&sk->sk_receive_queue, copy_skb);
630         }
631         if (!po->stats.tp_drops)
632                 status &= ~TP_STATUS_LOSING;
633         spin_unlock(&sk->sk_receive_queue.lock);
634 
635         skb_copy_bits(skb, 0, (u8*)h + macoff, snaplen);
636 
637         h->tp_len = skb->len;
638         h->tp_snaplen = snaplen;
639         h->tp_mac = macoff;
640         h->tp_net = netoff;
641         if (skb->tstamp.tv64)
642                 tv = ktime_to_timeval(skb->tstamp);
643         else
644                 do_gettimeofday(&tv);
645         h->tp_sec = tv.tv_sec;
646         h->tp_usec = tv.tv_usec;
647 
648         sll = (struct sockaddr_ll*)((u8*)h + TPACKET_ALIGN(sizeof(*h)));
649         sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
650         sll->sll_family = AF_PACKET;
651         sll->sll_hatype = dev->type;
652         sll->sll_protocol = skb->protocol;
653         sll->sll_pkttype = skb->pkt_type;
654         if (unlikely(po->origdev))
655                 sll->sll_ifindex = orig_dev->ifindex;
656         else
657                 sll->sll_ifindex = dev->ifindex;
658 
659         h->tp_status = status;
660         smp_mb();
661 
662         {
663                 struct page *p_start, *p_end;
664                 u8 *h_end = (u8 *)h + macoff + snaplen - 1;
665 
666                 p_start = virt_to_page(h);
667                 p_end = virt_to_page(h_end);
668                 while (p_start <= p_end) {
669                         flush_dcache_page(p_start);
670                         p_start++;
671                 }
672         }
673 
674         sk->sk_data_ready(sk, 0);
675 
676 drop_n_restore:
677         if (skb_head != skb->data && skb_shared(skb)) {
678                 skb->data = skb_head;
679                 skb->len = skb_len;
680         }
681 drop:
682         kfree_skb(skb);
683         return 0;
684 
685 ring_is_full:
686         po->stats.tp_drops++;
687         spin_unlock(&sk->sk_receive_queue.lock);
688 
689         sk->sk_data_ready(sk, 0);
690         if (copy_skb)
691                 kfree_skb(copy_skb);
692         goto drop_n_restore;
693 }
694 
695 #endif
696 
697 
698 static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
699                           struct msghdr *msg, size_t len)
700 {
701         struct sock *sk = sock->sk;
702         struct sockaddr_ll *saddr=(struct sockaddr_ll *)msg->msg_name;
703         struct sk_buff *skb;
704         struct net_device *dev;
705         __be16 proto;
706         unsigned char *addr;
707         int ifindex, err, reserve = 0;
708 
709         /*
710          *      Get and verify the address.
711          */
712 
713         if (saddr == NULL) {
714                 struct packet_sock *po = pkt_sk(sk);
715 
716                 ifindex = po->ifindex;
717                 proto   = po->num;
718                 addr    = NULL;
719         } else {
720                 err = -EINVAL;
721                 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
722                         goto out;
723                 if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr)))
724                         goto out;
725                 ifindex = saddr->sll_ifindex;
726                 proto   = saddr->sll_protocol;
727                 addr    = saddr->sll_addr;
728         }
729 
730 
731         dev = dev_get_by_index(sk->sk_net, ifindex);
732         err = -ENXIO;
733         if (dev == NULL)
734                 goto out_unlock;
735         if (sock->type == SOCK_RAW)
736                 reserve = dev->hard_header_len;
737 
738         err = -ENETDOWN;
739         if (!(dev->flags & IFF_UP))
740                 goto out_unlock;
741 
742         err = -EMSGSIZE;
743         if (len > dev->mtu+reserve)
744                 goto out_unlock;
745 
746         skb = sock_alloc_send_skb(sk, len + LL_RESERVED_SPACE(dev),
747                                 msg->msg_flags & MSG_DONTWAIT, &err);
748         if (skb==NULL)
749                 goto out_unlock;
750 
751         skb_reserve(skb, LL_RESERVED_SPACE(dev));
752         skb_reset_network_header(skb);
753 
754         err = -EINVAL;
755         if (sock->type == SOCK_DGRAM &&
756             dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len) < 0)
757                 goto out_free;
758 
759         /* Returns -EFAULT on error */
760         err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
761         if (err)
762                 goto out_free;
763 
764         skb->protocol = proto;
765         skb->dev = dev;
766         skb->priority = sk->sk_priority;
767 
768         /*
769          *      Now send it
770          */
771 
772         err = dev_queue_xmit(skb);
773         if (err > 0 && (err = net_xmit_errno(err)) != 0)
774                 goto out_unlock;
775 
776         dev_put(dev);
777 
778         return(len);
779 
780 out_free:
781         kfree_skb(skb);
782 out_unlock:
783         if (dev)
784                 dev_put(dev);
785 out:
786         return err;
787 }
788 
789 /*
790  *      Close a PACKET socket. This is fairly simple. We immediately go
791  *      to 'closed' state and remove our protocol entry in the device list.
792  */
793 
794 static int packet_release(struct socket *sock)
795 {
796         struct sock *sk = sock->sk;
797         struct packet_sock *po;
798         struct net *net;
799 
800         if (!sk)
801                 return 0;
802 
803         net = sk->sk_net;
804         po = pkt_sk(sk);
805 
806         write_lock_bh(&net->packet.sklist_lock);
807         sk_del_node_init(sk);
808         write_unlock_bh(&net->packet.sklist_lock);
809 
810         /*
811          *      Unhook packet receive handler.
812          */
813 
814         if (po->running) {
815                 /*
816                  *      Remove the protocol hook
817                  */
818                 dev_remove_pack(&po->prot_hook);
819                 po->running = 0;
820                 po->num = 0;
821                 __sock_put(sk);
822         }
823 
824         packet_flush_mclist(sk);
825 
826 #ifdef CONFIG_PACKET_MMAP
827         if (po->pg_vec) {
828                 struct tpacket_req req;
829                 memset(&req, 0, sizeof(req));
830                 packet_set_ring(sk, &req, 1);
831         }
832 #endif
833 
834         /*
835          *      Now the socket is dead. No more input will appear.
836          */
837 
838         sock_orphan(sk);
839         sock->sk = NULL;
840 
841         /* Purge queues */
842 
843         skb_queue_purge(&sk->sk_receive_queue);
844         sk_refcnt_debug_release(sk);
845 
846         sock_put(sk);
847         return 0;
848 }
849 
850 /*
851  *      Attach a packet hook.
852  */
853 
854 static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protocol)
855 {
856         struct packet_sock *po = pkt_sk(sk);
857         /*
858          *      Detach an existing hook if present.
859          */
860 
861         lock_sock(sk);
862 
863         spin_lock(&po->bind_lock);
864         if (po->running) {
865                 __sock_put(sk);
866                 po->running = 0;
867                 po->num = 0;
868                 spin_unlock(&po->bind_lock);
869                 dev_remove_pack(&po->prot_hook);
870                 spin_lock(&po->bind_lock);
871         }
872 
873         po->num = protocol;
874         po->prot_hook.type = protocol;
875         po->prot_hook.dev = dev;
876 
877         po->ifindex = dev ? dev->ifindex : 0;
878 
879         if (protocol == 0)
880                 goto out_unlock;
881 
882         if (!dev || (dev->flags & IFF_UP)) {
883                 dev_add_pack(&po->prot_hook);
884                 sock_hold(sk);
885                 po->running = 1;
886         } else {
887                 sk->sk_err = ENETDOWN;
888                 if (!sock_flag(sk, SOCK_DEAD))
889                         sk->sk_error_report(sk);
890         }
891 
892 out_unlock:
893         spin_unlock(&po->bind_lock);
894         release_sock(sk);
895         return 0;
896 }
897 
898 /*
899  *      Bind a packet socket to a device
900  */
901 
902 static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, int addr_len)
903 {
904         struct sock *sk=sock->sk;
905         char name[15];
906         struct net_device *dev;
907         int err = -ENODEV;
908 
909         /*
910          *      Check legality
911          */
912 
913         if (addr_len != sizeof(struct sockaddr))
914                 return -EINVAL;
915         strlcpy(name,uaddr->sa_data,sizeof(name));
916 
917         dev = dev_get_by_name(sk->sk_net, name);
918         if (dev) {
919                 err = packet_do_bind(sk, dev, pkt_sk(sk)->num);
920                 dev_put(dev);
921         }
922         return err;
923 }
924 
925 static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
926 {
927         struct sockaddr_ll *sll = (struct sockaddr_ll*)uaddr;
928         struct sock *sk=sock->sk;
929         struct net_device *dev = NULL;
930         int err;
931 
932 
933         /*
934          *      Check legality
935          */
936 
937         if (addr_len < sizeof(struct sockaddr_ll))
938                 return -EINVAL;
939         if (sll->sll_family != AF_PACKET)
940                 return -EINVAL;
941 
942         if (sll->sll_ifindex) {
943                 err = -ENODEV;
944                 dev = dev_get_by_index(sk->sk_net, sll->sll_ifindex);
945                 if (dev == NULL)
946                         goto out;
947         }
948         err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num);
949         if (dev)
950                 dev_put(dev);
951 
952 out:
953         return err;
954 }
955 
956 static struct proto packet_proto = {
957         .name     = "PACKET",
958         .owner    = THIS_MODULE,
959         .obj_size = sizeof(struct packet_sock),
960 };
961 
962 /*
963  *      Create a packet of type SOCK_PACKET.
964  */
965 
966 static int packet_create(struct net *net, struct socket *sock, int protocol)
967 {
968         struct sock *sk;
969         struct packet_sock *po;
970         __be16 proto = (__force __be16)protocol; /* weird, but documented */
971         int err;
972 
973         if (!capable(CAP_NET_RAW))
974                 return -EPERM;
975         if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW &&
976             sock->type != SOCK_PACKET)
977                 return -ESOCKTNOSUPPORT;
978 
979         sock->state = SS_UNCONNECTED;
980 
981         err = -ENOBUFS;
982         sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto);
983         if (sk == NULL)
984                 goto out;
985 
986         sock->ops = &packet_ops;
987         if (sock->type == SOCK_PACKET)
988                 sock->ops = &packet_ops_spkt;
989 
990         sock_init_data(sock, sk);
991 
992         po = pkt_sk(sk);
993         sk->sk_family = PF_PACKET;
994         po->num = proto;
995 
996         sk->sk_destruct = packet_sock_destruct;
997         sk_refcnt_debug_inc(sk);
998 
999         /*
1000          *      Attach a protocol block
1001          */
1002 
1003         spin_lock_init(&po->bind_lock);
1004         po->prot_hook.func = packet_rcv;
1005 
1006         if (sock->type == SOCK_PACKET)
1007                 po->prot_hook.func = packet_rcv_spkt;
1008 
1009         po->prot_hook.af_packet_priv = sk;
1010 
1011         if (proto) {
1012                 po->prot_hook.type = proto;
1013                 dev_add_pack(&po->prot_hook);
1014                 sock_hold(sk);
1015                 po->running = 1;
1016         }
1017 
1018         write_lock_bh(&net->packet.sklist_lock);
1019         sk_add_node(sk, &net->packet.sklist);
1020         write_unlock_bh(&net->packet.sklist_lock);
1021         return(0);
1022 out:
1023         return err;
1024 }
1025 
1026 /*
1027  *      Pull a packet from our receive queue and hand it to the user.
1028  *      If necessary we block.
1029  */
1030 
1031 static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
1032                           struct msghdr *msg, size_t len, int flags)
1033 {
1034         struct sock *sk = sock->sk;
1035         struct sk_buff *skb;
1036         int copied, err;
1037         struct sockaddr_ll *sll;
1038 
1039         err = -EINVAL;
1040         if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT))
1041                 goto out;
1042 
1043 #if 0
1044         /* What error should we return now? EUNATTACH? */
1045         if (pkt_sk(sk)->ifindex < 0)
1046                 return -ENODEV;
1047 #endif
1048 
1049         /*
1050          *      Call the generic datagram receiver. This handles all sorts
1051          *      of horrible races and re-entrancy so we can forget about it
1052          *      in the protocol layers.
1053          *
1054          *      Now it will return ENETDOWN, if device have just gone down,
1055          *      but then it will block.
1056          */
1057 
1058         skb=skb_recv_datagram(sk,flags,flags&MSG_DONTWAIT,&err);
1059 
1060         /*
1061          *      An error occurred so return it. Because skb_recv_datagram()
1062          *      handles the blocking we don't see and worry about blocking
1063          *      retries.
1064          */
1065 
1066         if (skb == NULL)
1067                 goto out;
1068 
1069         /*
1070          *      If the address length field is there to be filled in, we fill
1071          *      it in now.
1072          */
1073 
1074         sll = &PACKET_SKB_CB(skb)->sa.ll;
1075         if (sock->type == SOCK_PACKET)
1076                 msg->msg_namelen = sizeof(struct sockaddr_pkt);
1077         else
1078                 msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr);
1079 
1080         /*
1081          *      You lose any data beyond the buffer you gave. If it worries a
1082          *      user program they can ask the device for its MTU anyway.
1083          */
1084 
1085         copied = skb->len;
1086         if (copied > len)
1087         {
1088                 copied=len;
1089                 msg->msg_flags|=MSG_TRUNC;
1090         }
1091 
1092         err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
1093         if (err)
1094                 goto out_free;
1095 
1096         sock_recv_timestamp(msg, sk, skb);
1097 
1098         if (msg->msg_name)
1099                 memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa,
1100                        msg->msg_namelen);
1101 
1102         if (pkt_sk(sk)->auxdata) {
1103                 struct tpacket_auxdata aux;
1104 
1105                 aux.tp_status = TP_STATUS_USER;
1106                 if (skb->ip_summed == CHECKSUM_PARTIAL)
1107                         aux.tp_status |= TP_STATUS_CSUMNOTREADY;
1108                 aux.tp_len = PACKET_SKB_CB(skb)->origlen;
1109                 aux.tp_snaplen = skb->len;
1110                 aux.tp_mac = 0;
1111                 aux.tp_net = skb_network_offset(skb);
1112 
1113                 put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);
1114         }
1115 
1116         /*
1117          *      Free or return the buffer as appropriate. Again this
1118          *      hides all the races and re-entrancy issues from us.
1119          */
1120         err = (flags&MSG_TRUNC) ? skb->len : copied;
1121 
1122 out_free:
1123         skb_free_datagram(sk, skb);
1124 out:
1125         return err;
1126 }
1127 
1128 static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
1129                                int *uaddr_len, int peer)
1130 {
1131         struct net_device *dev;
1132         struct sock *sk = sock->sk;
1133 
1134         if (peer)
1135                 return -EOPNOTSUPP;
1136 
1137         uaddr->sa_family = AF_PACKET;
1138         dev = dev_get_by_index(sk->sk_net, pkt_sk(sk)->ifindex);
1139         if (dev) {
1140                 strlcpy(uaddr->sa_data, dev->name, 15);
1141                 dev_put(dev);
1142         } else
1143                 memset(uaddr->sa_data, 0, 14);
1144         *uaddr_len = sizeof(*uaddr);
1145 
1146         return 0;
1147 }
1148 
1149 static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
1150                           int *uaddr_len, int peer)
1151 {
1152         struct net_device *dev;
1153         struct sock *sk = sock->sk;
1154         struct packet_sock *po = pkt_sk(sk);
1155         struct sockaddr_ll *sll = (struct sockaddr_ll*)uaddr;
1156 
1157         if (peer)
1158                 return -EOPNOTSUPP;
1159 
1160         sll->sll_family = AF_PACKET;
1161         sll->sll_ifindex = po->ifindex;
1162         sll->sll_protocol = po->num;
1163         dev = dev_get_by_index(sk->sk_net, po->ifindex);
1164         if (dev) {
1165                 sll->sll_hatype = dev->type;
1166                 sll->sll_halen = dev->addr_len;
1167                 memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len);
1168                 dev_put(dev);
1169         } else {
1170                 sll->sll_hatype = 0;    /* Bad: we have no ARPHRD_UNSPEC */
1171                 sll->sll_halen = 0;
1172         }
1173         *uaddr_len = offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen;
1174 
1175         return 0;
1176 }
1177 
1178 static void packet_dev_mc(struct net_device *dev, struct packet_mclist *i, int what)
1179 {
1180         switch (i->type) {
1181         case PACKET_MR_MULTICAST:
1182                 if (what > 0)
1183                         dev_mc_add(dev, i->addr, i->alen, 0);
1184                 else
1185                         dev_mc_delete(dev, i->addr, i->alen, 0);
1186                 break;
1187         case PACKET_MR_PROMISC:
1188                 dev_set_promiscuity(dev, what);
1189                 break;
1190         case PACKET_MR_ALLMULTI:
1191                 dev_set_allmulti(dev, what);
1192                 break;
1193         default:;
1194         }
1195 }
1196 
1197 static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, int what)
1198 {
1199         for ( ; i; i=i->next) {
1200                 if (i->ifindex == dev->ifindex)
1201                         packet_dev_mc(dev, i, what);
1202         }
1203 }
1204 
1205 static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq)
1206 {
1207         struct packet_sock *po = pkt_sk(sk);
1208         struct packet_mclist *ml, *i;
1209         struct net_device *dev;
1210         int err;
1211 
1212         rtnl_lock();
1213 
1214         err = -ENODEV;
1215         dev = __dev_get_by_index(sk->sk_net, mreq->mr_ifindex);
1216         if (!dev)
1217                 goto done;
1218 
1219         err = -EINVAL;
1220         if (mreq->mr_alen > dev->addr_len)
1221                 goto done;
1222 
1223         err = -ENOBUFS;
1224         i = kmalloc(sizeof(*i), GFP_KERNEL);
1225         if (i == NULL)
1226                 goto done;
1227 
1228         err = 0;
1229         for (ml = po->mclist; ml; ml = ml->next) {
1230                 if (ml->ifindex == mreq->mr_ifindex &&
1231                     ml->type == mreq->mr_type &&
1232                     ml->alen == mreq->mr_alen &&
1233                     memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1234                         ml->count++;
1235                         /* Free the new element ... */
1236                         kfree(i);
1237                         goto done;
1238                 }
1239         }
1240 
1241         i->type = mreq->mr_type;
1242         i->ifindex = mreq->mr_ifindex;
1243         i->alen = mreq->mr_alen;
1244         memcpy(i->addr, mreq->mr_address, i->alen);
1245         i->count = 1;
1246         i->next = po->mclist;
1247         po->mclist = i;
1248         packet_dev_mc(dev, i, +1);
1249 
1250 done:
1251         rtnl_unlock();
1252         return err;
1253 }
1254 
1255 static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq)
1256 {
1257         struct packet_mclist *ml, **mlp;
1258 
1259         rtnl_lock();
1260 
1261         for (mlp = &pkt_sk(sk)->mclist; (ml = *mlp) != NULL; mlp = &ml->next) {
1262                 if (ml->ifindex == mreq->mr_ifindex &&
1263                     ml->type == mreq->mr_type &&
1264                     ml->alen == mreq->mr_alen &&
1265                     memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1266                         if (--ml->count == 0) {
1267                                 struct net_device *dev;
1268                                 *mlp = ml->next;
1269                                 dev = dev_get_by_index(sk->sk_net, ml->ifindex);
1270                                 if (dev) {
1271                                         packet_dev_mc(dev, ml, -1);
1272                                         dev_put(dev);
1273                                 }
1274                                 kfree(ml);
1275                         }
1276                         rtnl_unlock();
1277                         return 0;
1278                 }
1279         }
1280         rtnl_unlock();
1281         return -EADDRNOTAVAIL;
1282 }
1283 
1284 static void packet_flush_mclist(struct sock *sk)
1285 {
1286         struct packet_sock *po = pkt_sk(sk);
1287         struct packet_mclist *ml;
1288 
1289         if (!po->mclist)
1290                 return;
1291 
1292         rtnl_lock();
1293         while ((ml = po->mclist) != NULL) {
1294                 struct net_device *dev;
1295 
1296                 po->mclist = ml->next;
1297                 if ((dev = dev_get_by_index(sk->sk_net, ml->ifindex)) != NULL) {
1298                         packet_dev_mc(dev, ml, -1);
1299                         dev_put(dev);
1300                 }
1301                 kfree(ml);
1302         }
1303         rtnl_unlock();
1304 }
1305 
1306 static int
1307 packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen)
1308 {
1309         struct sock *sk = sock->sk;
1310         struct packet_sock *po = pkt_sk(sk);
1311         int ret;
1312 
1313         if (level != SOL_PACKET)
1314                 return -ENOPROTOOPT;
1315 
1316         switch(optname) {
1317         case PACKET_ADD_MEMBERSHIP:
1318         case PACKET_DROP_MEMBERSHIP:
1319         {
1320                 struct packet_mreq_max mreq;
1321                 int len = optlen;
1322                 memset(&mreq, 0, sizeof(mreq));
1323                 if (len < sizeof(struct packet_mreq))
1324                         return -EINVAL;
1325                 if (len > sizeof(mreq))
1326                         len = sizeof(mreq);
1327                 if (copy_from_user(&mreq,optval,len))
1328                         return -EFAULT;
1329                 if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address)))
1330                         return -EINVAL;
1331                 if (optname == PACKET_ADD_MEMBERSHIP)
1332                         ret = packet_mc_add(sk, &mreq);
1333                 else
1334                         ret = packet_mc_drop(sk, &mreq);
1335                 return ret;
1336         }
1337 
1338 #ifdef CONFIG_PACKET_MMAP
1339         case PACKET_RX_RING:
1340         {
1341                 struct tpacket_req req;
1342 
1343                 if (optlen<sizeof(req))
1344                         return -EINVAL;
1345                 if (copy_from_user(&req,optval,sizeof(req)))
1346                         return -EFAULT;
1347                 return packet_set_ring(sk, &req, 0);
1348         }
1349         case PACKET_COPY_THRESH:
1350         {
1351                 int val;
1352 
1353                 if (optlen!=sizeof(val))
1354                         return -EINVAL;
1355                 if (copy_from_user(&val,optval,sizeof(val)))
1356                         return -EFAULT;
1357 
1358                 pkt_sk(sk)->copy_thresh = val;
1359                 return 0;
1360         }
1361 #endif
1362         case PACKET_AUXDATA:
1363         {
1364                 int val;
1365 
1366                 if (optlen < sizeof(val))
1367                         return -EINVAL;
1368                 if (copy_from_user(&val, optval, sizeof(val)))
1369                         return -EFAULT;
1370 
1371                 po->auxdata = !!val;
1372                 return 0;
1373         }
1374         case PACKET_ORIGDEV:
1375         {
1376                 int val;
1377 
1378                 if (optlen < sizeof(val))
1379                         return -EINVAL;
1380                 if (copy_from_user(&val, optval, sizeof(val)))
1381                         return -EFAULT;
1382 
1383                 po->origdev = !!val;
1384                 return 0;
1385         }
1386         default:
1387                 return -ENOPROTOOPT;
1388         }
1389 }
1390 
1391 static int packet_getsockopt(struct socket *sock, int level, int optname,
1392                              char __user *optval, int __user *optlen)
1393 {
1394         int len;
1395         int val;
1396         struct sock *sk = sock->sk;
1397         struct packet_sock *po = pkt_sk(sk);
1398         void *data;
1399         struct tpacket_stats st;
1400 
1401         if (level != SOL_PACKET)
1402                 return -ENOPROTOOPT;
1403 
1404         if (get_user(len, optlen))
1405                 return -EFAULT;
1406 
1407         if (len < 0)
1408                 return -EINVAL;
1409 
1410         switch(optname) {
1411         case PACKET_STATISTICS:
1412                 if (len > sizeof(struct tpacket_stats))
1413                         len = sizeof(struct tpacket_stats);
1414                 spin_lock_bh(&sk->sk_receive_queue.lock);
1415                 st = po->stats;
1416                 memset(&po->stats, 0, sizeof(st));
1417                 spin_unlock_bh(&sk->sk_receive_queue.lock);
1418                 st.tp_packets += st.tp_drops;
1419 
1420                 data = &st;
1421                 break;
1422         case PACKET_AUXDATA:
1423                 if (len > sizeof(int))
1424                         len = sizeof(int);
1425                 val = po->auxdata;
1426 
1427                 data = &val;
1428                 break;
1429         case PACKET_ORIGDEV:
1430                 if (len > sizeof(int))
1431                         len = sizeof(int);
1432                 val = po->origdev;
1433 
1434                 data = &val;
1435                 break;
1436         default:
1437                 return -ENOPROTOOPT;
1438         }
1439 
1440         if (put_user(len, optlen))
1441                 return -EFAULT;
1442         if (copy_to_user(optval, data, len))
1443                 return -EFAULT;
1444         return 0;
1445 }
1446 
1447 
1448 static int packet_notifier(struct notifier_block *this, unsigned long msg, void *data)
1449 {
1450         struct sock *sk;
1451         struct hlist_node *node;
1452         struct net_device *dev = data;
1453         struct net *net = dev->nd_net;
1454 
1455         read_lock(&net->packet.sklist_lock);
1456         sk_for_each(sk, node, &net->packet.sklist) {
1457                 struct packet_sock *po = pkt_sk(sk);
1458 
1459                 switch (msg) {
1460                 case NETDEV_UNREGISTER:
1461                         if (po->mclist)
1462                                 packet_dev_mclist(dev, po->mclist, -1);
1463                         /* fallthrough */
1464 
1465                 case NETDEV_DOWN:
1466                         if (dev->ifindex == po->ifindex) {
1467                                 spin_lock(&po->bind_lock);
1468                                 if (po->running) {
1469                                         __dev_remove_pack(&po->prot_hook);
1470                                         __sock_put(sk);
1471                                         po->running = 0;
1472                                         sk->sk_err = ENETDOWN;
1473                                         if (!sock_flag(sk, SOCK_DEAD))
1474                                                 sk->sk_error_report(sk);
1475                                 }
1476                                 if (msg == NETDEV_UNREGISTER) {
1477                                         po->ifindex = -1;
1478                                         po->prot_hook.dev = NULL;
1479                                 }
1480                                 spin_unlock(&po->bind_lock);
1481                         }
1482                         break;
1483                 case NETDEV_UP:
1484                         spin_lock(&po->bind_lock);
1485                         if (dev->ifindex == po->ifindex && po->num &&
1486                             !po->running) {
1487                                 dev_add_pack(&po->prot_hook);
1488                                 sock_hold(sk);
1489                                 po->running = 1;
1490                         }
1491                         spin_unlock(&po->bind_lock);
1492                         break;
1493                 }
1494         }
1495         read_unlock(&net->packet.sklist_lock);
1496         return NOTIFY_DONE;
1497 }
1498 
1499 
1500 static int packet_ioctl(struct socket *sock, unsigned int cmd,
1501                         unsigned long arg)
1502 {
1503         struct sock *sk = sock->sk;
1504 
1505         switch(cmd) {
1506                 case SIOCOUTQ:
1507                 {
1508                         int amount = atomic_read(&sk->sk_wmem_alloc);
1509                         return put_user(amount, (int __user *)arg);
1510                 }
1511                 case SIOCINQ:
1512                 {
1513                         struct sk_buff *skb;
1514                         int amount = 0;
1515 
1516                         spin_lock_bh(&sk->sk_receive_queue.lock);
1517                         skb = skb_peek(&sk->sk_receive_queue);
1518                         if (skb)
1519                                 amount = skb->len;
1520                         spin_unlock_bh(&sk->sk_receive_queue.lock);
1521                         return put_user(amount, (int __user *)arg);
1522                 }
1523                 case SIOCGSTAMP:
1524                         return sock_get_timestamp(sk, (struct timeval __user *)arg);
1525                 case SIOCGSTAMPNS:
1526                         return sock_get_timestampns(sk, (struct timespec __user *)arg);
1527 
1528 #ifdef CONFIG_INET
1529                 case SIOCADDRT:
1530                 case SIOCDELRT:
1531                 case SIOCDARP:
1532                 case SIOCGARP:
1533                 case SIOCSARP:
1534                 case SIOCGIFADDR:
1535                 case SIOCSIFADDR:
1536                 case SIOCGIFBRDADDR:
1537                 case SIOCSIFBRDADDR:
1538                 case SIOCGIFNETMASK:
1539                 case SIOCSIFNETMASK:
1540                 case SIOCGIFDSTADDR:
1541                 case SIOCSIFDSTADDR:
1542                 case SIOCSIFFLAGS:
1543                         if (sk->sk_net != &init_net)
1544                                 return -ENOIOCTLCMD;
1545                         return inet_dgram_ops.ioctl(sock, cmd, arg);
1546 #endif
1547 
1548                 default:
1549                         return -ENOIOCTLCMD;
1550         }
1551         return 0;
1552 }
1553 
1554 #ifndef CONFIG_PACKET_MMAP
1555 #define packet_mmap sock_no_mmap
1556 #define packet_poll datagram_poll
1557 #else
1558 
1559 static unsigned int packet_poll(struct file * file, struct socket *sock,
1560                                 poll_table *wait)
1561 {
1562         struct sock *sk = sock->sk;
1563         struct packet_sock *po = pkt_sk(sk);
1564         unsigned int mask = datagram_poll(file, sock, wait);
1565 
1566         spin_lock_bh(&sk->sk_receive_queue.lock);
1567         if (po->pg_vec) {
1568                 unsigned last = po->head ? po->head-1 : po->frame_max;
1569                 struct tpacket_hdr *h;
1570 
1571                 h = packet_lookup_frame(po, last);
1572 
1573                 if (h->tp_status)
1574                         mask |= POLLIN | POLLRDNORM;
1575         }
1576         spin_unlock_bh(&sk->sk_receive_queue.lock);
1577         return mask;
1578 }
1579 
1580 
1581 /* Dirty? Well, I still did not learn better way to account
1582  * for user mmaps.
1583  */
1584 
1585 static void packet_mm_open(struct vm_area_struct *vma)
1586 {
1587         struct file *file = vma->vm_file;
1588         struct socket * sock = file->private_data;
1589         struct sock *sk = sock->sk;
1590 
1591         if (sk)
1592                 atomic_inc(&pkt_sk(sk)->mapped);
1593 }
1594 
1595 static void packet_mm_close(struct vm_area_struct *vma)
1596 {
1597         struct file *file = vma->vm_file;
1598         struct socket * sock = file->private_data;
1599         struct sock *sk = sock->sk;
1600 
1601         if (sk)
1602                 atomic_dec(&pkt_sk(sk)->mapped);
1603 }
1604 
1605 static struct vm_operations_struct packet_mmap_ops = {
1606         .open = packet_mm_open,
1607         .close =packet_mm_close,
1608 };
1609 
1610 static void free_pg_vec(char **pg_vec, unsigned int order, unsigned int len)
1611 {
1612         int i;
1613 
1614         for (i = 0; i < len; i++) {
1615                 if (likely(pg_vec[i]))
1616                         free_pages((unsigned long) pg_vec[i], order);
1617         }
1618         kfree(pg_vec);
1619 }
1620 
1621 static inline char *alloc_one_pg_vec_page(unsigned long order)
1622 {
1623         return (char *) __get_free_pages(GFP_KERNEL | __GFP_COMP | __GFP_ZERO,
1624                                          order);
1625 }
1626 
1627 static char **alloc_pg_vec(struct tpacket_req *req, int order)
1628 {
1629         unsigned int block_nr = req->tp_block_nr;
1630         char **pg_vec;
1631         int i;
1632 
1633         pg_vec = kzalloc(block_nr * sizeof(char *), GFP_KERNEL);
1634         if (unlikely(!pg_vec))
1635                 goto out;
1636 
1637         for (i = 0; i < block_nr; i++) {
1638                 pg_vec[i] = alloc_one_pg_vec_page(order);
1639                 if (unlikely(!pg_vec[i]))
1640                         goto out_free_pgvec;
1641         }
1642 
1643 out:
1644         return pg_vec;
1645 
1646 out_free_pgvec:
1647         free_pg_vec(pg_vec, order, block_nr);
1648         pg_vec = NULL;
1649         goto out;
1650 }
1651 
1652 static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing)
1653 {
1654         char **pg_vec = NULL;
1655         struct packet_sock *po = pkt_sk(sk);
1656         int was_running, order = 0;
1657         __be16 num;
1658         int err = 0;
1659 
1660         if (req->tp_block_nr) {
1661                 int i, l;
1662 
1663                 /* Sanity tests and some calculations */
1664 
1665                 if (unlikely(po->pg_vec))
1666                         return -EBUSY;
1667 
1668                 if (unlikely((int)req->tp_block_size <= 0))
1669                         return -EINVAL;
1670                 if (unlikely(req->tp_block_size & (PAGE_SIZE - 1)))
1671                         return -EINVAL;
1672                 if (unlikely(req->tp_frame_size < TPACKET_HDRLEN))
1673                         return -EINVAL;
1674                 if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
1675                         return -EINVAL;
1676 
1677                 po->frames_per_block = req->tp_block_size/req->tp_frame_size;
1678                 if (unlikely(po->frames_per_block <= 0))
1679                         return -EINVAL;
1680                 if (unlikely((po->frames_per_block * req->tp_block_nr) !=
1681                              req->tp_frame_nr))
1682                         return -EINVAL;
1683 
1684                 err = -ENOMEM;
1685                 order = get_order(req->tp_block_size);
1686                 pg_vec = alloc_pg_vec(req, order);
1687                 if (unlikely(!pg_vec))
1688                         goto out;
1689 
1690                 l = 0;
1691                 for (i = 0; i < req->tp_block_nr; i++) {
1692                         char *ptr = pg_vec[i];
1693                         struct tpacket_hdr *header;
1694                         int k;
1695 
1696                         for (k = 0; k < po->frames_per_block; k++) {
1697                                 header = (struct tpacket_hdr *) ptr;
1698                                 header->tp_status = TP_STATUS_KERNEL;
1699                                 ptr += req->tp_frame_size;
1700                         }
1701                 }
1702                 /* Done */
1703         } else {
1704                 if (unlikely(req->tp_frame_nr))
1705                         return -EINVAL;
1706         }
1707 
1708         lock_sock(sk);
1709 
1710         /* Detach socket from network */
1711         spin_lock(&po->bind_lock);
1712         was_running = po->running;
1713         num = po->num;
1714         if (was_running) {
1715                 __dev_remove_pack(&po->prot_hook);
1716                 po->num = 0;
1717                 po->running = 0;
1718                 __sock_put(sk);
1719         }
1720         spin_unlock(&po->bind_lock);
1721 
1722         synchronize_net();
1723 
1724         err = -EBUSY;
1725         if (closing || atomic_read(&po->mapped) == 0) {
1726                 err = 0;
1727 #define XC(a, b) ({ __typeof__ ((a)) __t; __t = (a); (a) = (b); __t; })
1728 
1729                 spin_lock_bh(&sk->sk_receive_queue.lock);
1730                 pg_vec = XC(po->pg_vec, pg_vec);
1731                 po->frame_max = (req->tp_frame_nr - 1);
1732                 po->head = 0;
1733                 po->frame_size = req->tp_frame_size;
1734                 spin_unlock_bh(&sk->sk_receive_queue.lock);
1735 
1736                 order = XC(po->pg_vec_order, order);
1737                 req->tp_block_nr = XC(po->pg_vec_len, req->tp_block_nr);
1738 
1739                 po->pg_vec_pages = req->tp_block_size/PAGE_SIZE;
1740                 po->prot_hook.func = po->pg_vec ? tpacket_rcv : packet_rcv;
1741                 skb_queue_purge(&sk->sk_receive_queue);
1742 #undef XC
1743                 if (atomic_read(&po->mapped))
1744                         printk(KERN_DEBUG "packet_mmap: vma is busy: %d\n", atomic_read(&po->mapped));
1745         }
1746 
1747         spin_lock(&po->bind_lock);
1748         if (was_running && !po->running) {
1749                 sock_hold(sk);
1750                 po->running = 1;
1751                 po->num = num;
1752                 dev_add_pack(&po->prot_hook);
1753         }
1754         spin_unlock(&po->bind_lock);
1755 
1756         release_sock(sk);
1757 
1758         if (pg_vec)
1759                 free_pg_vec(pg_vec, order, req->tp_block_nr);
1760 out:
1761         return err;
1762 }
1763 
1764 static int packet_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
1765 {
1766         struct sock *sk = sock->sk;
1767         struct packet_sock *po = pkt_sk(sk);
1768         unsigned long size;
1769         unsigned long start;
1770         int err = -EINVAL;
1771         int i;
1772 
1773         if (vma->vm_pgoff)
1774                 return -EINVAL;
1775 
1776         size = vma->vm_end - vma->vm_start;
1777 
1778         lock_sock(sk);
1779         if (po->pg_vec == NULL)
1780                 goto out;
1781         if (size != po->pg_vec_len*po->pg_vec_pages*PAGE_SIZE)
1782                 goto out;
1783 
1784         start = vma->vm_start;
1785         for (i = 0; i < po->pg_vec_len; i++) {
1786                 struct page *page = virt_to_page(po->pg_vec[i]);
1787                 int pg_num;
1788 
1789                 for (pg_num = 0; pg_num < po->pg_vec_pages; pg_num++, page++) {
1790                         err = vm_insert_page(vma, start, page);
1791                         if (unlikely(err))
1792                                 goto out;
1793                         start += PAGE_SIZE;
1794                 }
1795         }
1796         atomic_inc(&po->mapped);
1797         vma->vm_ops = &packet_mmap_ops;
1798         err = 0;
1799 
1800 out:
1801         release_sock(sk);
1802         return err;
1803 }
1804 #endif
1805 
1806 
1807 static const struct proto_ops packet_ops_spkt = {
1808         .family =       PF_PACKET,
1809         .owner =        THIS_MODULE,
1810         .release =      packet_release,
1811         .bind =         packet_bind_spkt,
1812         .connect =      sock_no_connect,
1813         .socketpair =   sock_no_socketpair,
1814         .accept =       sock_no_accept,
1815         .getname =      packet_getname_spkt,
1816         .poll =         datagram_poll,
1817         .ioctl =        packet_ioctl,
1818         .listen =       sock_no_listen,
1819         .shutdown =     sock_no_shutdown,
1820         .setsockopt =   sock_no_setsockopt,
1821         .getsockopt =   sock_no_getsockopt,
1822         .sendmsg =      packet_sendmsg_spkt,
1823         .recvmsg =      packet_recvmsg,
1824         .mmap =         sock_no_mmap,
1825         .sendpage =     sock_no_sendpage,
1826 };
1827 
1828 static const struct proto_ops packet_ops = {
1829         .family =       PF_PACKET,
1830         .owner =        THIS_MODULE,
1831         .release =      packet_release,
1832         .bind =         packet_bind,
1833         .connect =      sock_no_connect,
1834         .socketpair =   sock_no_socketpair,
1835         .accept =       sock_no_accept,
1836         .getname =      packet_getname,
1837         .poll =         packet_poll,
1838         .ioctl =        packet_ioctl,
1839         .listen =       sock_no_listen,
1840         .shutdown =     sock_no_shutdown,
1841         .setsockopt =   packet_setsockopt,
1842         .getsockopt =   packet_getsockopt,
1843         .sendmsg =      packet_sendmsg,
1844         .recvmsg =      packet_recvmsg,
1845         .mmap =         packet_mmap,
1846         .sendpage =     sock_no_sendpage,
1847 };
1848 
1849 static struct net_proto_family packet_family_ops = {
1850         .family =       PF_PACKET,
1851         .create =       packet_create,
1852         .owner  =       THIS_MODULE,
1853 };
1854 
1855 static struct notifier_block packet_netdev_notifier = {
1856         .notifier_call =packet_notifier,
1857 };
1858 
1859 #ifdef CONFIG_PROC_FS
1860 static inline struct sock *packet_seq_idx(struct net *net, loff_t off)
1861 {
1862         struct sock *s;
1863         struct hlist_node *node;
1864 
1865         sk_for_each(s, node, &net->packet.sklist) {
1866                 if (!off--)
1867                         return s;
1868         }
1869         return NULL;
1870 }
1871 
1872 static void *packet_seq_start(struct seq_file *seq, loff_t *pos)
1873         __acquires(seq_file_net(seq)->packet.sklist_lock)
1874 {
1875         struct net *net = seq_file_net(seq);
1876         read_lock(&net->packet.sklist_lock);
1877         return *pos ? packet_seq_idx(net, *pos - 1) : SEQ_START_TOKEN;
1878 }
1879 
1880 static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1881 {
1882         struct net *net = seq_file_net(seq);
1883         ++*pos;
1884         return  (v == SEQ_START_TOKEN)
1885                 ? sk_head(&net->packet.sklist)
1886                 : sk_next((struct sock*)v) ;
1887 }
1888 
1889 static void packet_seq_stop(struct seq_file *seq, void *v)
1890         __releases(seq_file_net(seq)->packet.sklist_lock)
1891 {
1892         struct net *net = seq_file_net(seq);
1893         read_unlock(&net->packet.sklist_lock);
1894 }
1895 
1896 static int packet_seq_show(struct seq_file *seq, void *v)
1897 {
1898         if (v == SEQ_START_TOKEN)
1899                 seq_puts(seq, "sk       RefCnt Type Proto  Iface R Rmem   User   Inode\n");
1900         else {
1901                 struct sock *s = v;
1902                 const struct packet_sock *po = pkt_sk(s);
1903 
1904                 seq_printf(seq,
1905                            "%p %-6d %-4d %04x   %-5d %1d %-6u %-6u %-6lu\n",
1906                            s,
1907                            atomic_read(&s->sk_refcnt),
1908                            s->sk_type,
1909                            ntohs(po->num),
1910                            po->ifindex,
1911                            po->running,
1912                            atomic_read(&s->sk_rmem_alloc),
1913                            sock_i_uid(s),
1914                            sock_i_ino(s) );
1915         }
1916 
1917         return 0;
1918 }
1919 
1920 static const struct seq_operations packet_seq_ops = {
1921         .start  = packet_seq_start,
1922         .next   = packet_seq_next,
1923         .stop   = packet_seq_stop,
1924         .show   = packet_seq_show,
1925 };
1926 
1927 static int packet_seq_open(struct inode *inode, struct file *file)
1928 {
1929         return seq_open_net(inode, file, &packet_seq_ops,
1930                             sizeof(struct seq_net_private));
1931 }
1932 
1933 static const struct file_operations packet_seq_fops = {
1934         .owner          = THIS_MODULE,
1935         .open           = packet_seq_open,
1936         .read           = seq_read,
1937         .llseek         = seq_lseek,
1938         .release        = seq_release_net,
1939 };
1940 
1941 #endif
1942 
1943 static int packet_net_init(struct net *net)
1944 {
1945         rwlock_init(&net->packet.sklist_lock);
1946         INIT_HLIST_HEAD(&net->packet.sklist);
1947 
1948         if (!proc_net_fops_create(net, "packet", 0, &packet_seq_fops))
1949                 return -ENOMEM;
1950 
1951         return 0;
1952 }
1953 
1954 static void packet_net_exit(struct net *net)
1955 {
1956         proc_net_remove(net, "packet");
1957 }
1958 
1959 static struct pernet_operations packet_net_ops = {
1960         .init = packet_net_init,
1961         .exit = packet_net_exit,
1962 };
1963 
1964 
1965 static void __exit packet_exit(void)
1966 {
1967         unregister_netdevice_notifier(&packet_netdev_notifier);
1968         unregister_pernet_subsys(&packet_net_ops);
1969         sock_unregister(PF_PACKET);
1970         proto_unregister(&packet_proto);
1971 }
1972 
1973 static int __init packet_init(void)
1974 {
1975         int rc = proto_register(&packet_proto, 0);
1976 
1977         if (rc != 0)
1978                 goto out;
1979 
1980         sock_register(&packet_family_ops);
1981         register_pernet_subsys(&packet_net_ops);
1982         register_netdevice_notifier(&packet_netdev_notifier);
1983 out:
1984         return rc;
1985 }
1986 
1987 module_init(packet_init);
1988 module_exit(packet_exit);
1989 MODULE_LICENSE("GPL");
1990 MODULE_ALIAS_NETPROTO(PF_PACKET);
1991 
  This page was automatically generated by the LXR engine.