1 /*
2 * NETLINK Kernel-user communication protocol.
3 *
4 * Authors: Alan Cox <alan@redhat.com>
5 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 *
12 * Tue Jun 26 14:36:48 MEST 2001 Herbert "herp" Rosmanith
13 * added netlink_proto_exit
14 * Tue Jan 22 18:32:44 BRST 2002 Arnaldo C. de Melo <acme@conectiva.com.br>
15 * use nlk_sk, as sk->protinfo is on a diet 8)
16 *
17 */
18
19 #include <linux/config.h>
20 #include <linux/module.h>
21
22 #include <linux/kernel.h>
23 #include <linux/init.h>
24 #include <linux/major.h>
25 #include <linux/signal.h>
26 #include <linux/sched.h>
27 #include <linux/errno.h>
28 #include <linux/string.h>
29 #include <linux/stat.h>
30 #include <linux/socket.h>
31 #include <linux/un.h>
32 #include <linux/fcntl.h>
33 #include <linux/termios.h>
34 #include <linux/sockios.h>
35 #include <linux/net.h>
36 #include <linux/fs.h>
37 #include <linux/slab.h>
38 #include <asm/uaccess.h>
39 #include <linux/skbuff.h>
40 #include <linux/netdevice.h>
41 #include <linux/rtnetlink.h>
42 #include <linux/proc_fs.h>
43 #include <linux/seq_file.h>
44 #include <linux/smp_lock.h>
45 #include <linux/notifier.h>
46 #include <linux/security.h>
47 #include <linux/jhash.h>
48 #include <linux/jiffies.h>
49 #include <linux/random.h>
50 #include <linux/bitops.h>
51 #include <linux/mm.h>
52 #include <linux/types.h>
53 #include <net/sock.h>
54 #include <net/scm.h>
55
56 #define Nprintk(a...)
57
58 #if defined(CONFIG_NETLINK_DEV) || defined(CONFIG_NETLINK_DEV_MODULE)
59 #define NL_EMULATE_DEV
60 #endif
61
62 struct netlink_opt
63 {
64 u32 pid;
65 unsigned int groups;
66 u32 dst_pid;
67 unsigned int dst_groups;
68 unsigned long state;
69 int (*handler)(int unit, struct sk_buff *skb);
70 wait_queue_head_t wait;
71 struct netlink_callback *cb;
72 spinlock_t cb_lock;
73 void (*data_ready)(struct sock *sk, int bytes);
74 };
75
76 #define nlk_sk(__sk) ((struct netlink_opt *)(__sk)->sk_protinfo)
77
78 struct nl_pid_hash {
79 struct hlist_head *table;
80 unsigned long rehash_time;
81
82 unsigned int mask;
83 unsigned int shift;
84
85 unsigned int entries;
86 unsigned int max_shift;
87
88 u32 rnd;
89 };
90
91 struct netlink_table {
92 struct nl_pid_hash hash;
93 struct hlist_head mc_list;
94 unsigned int nl_nonroot;
95 };
96
97 static struct netlink_table *nl_table;
98
99 static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait);
100
101 static int netlink_dump(struct sock *sk);
102 static void netlink_destroy_callback(struct netlink_callback *cb);
103
104 static DEFINE_RWLOCK(nl_table_lock);
105 static atomic_t nl_table_users = ATOMIC_INIT(0);
106
107 static struct notifier_block *netlink_chain;
108
109 static struct hlist_head *nl_pid_hashfn(struct nl_pid_hash *hash, u32 pid)
110 {
111 return &hash->table[jhash_1word(pid, hash->rnd) & hash->mask];
112 }
113
114 static void netlink_sock_destruct(struct sock *sk)
115 {
116 skb_queue_purge(&sk->sk_receive_queue);
117
118 if (!sock_flag(sk, SOCK_DEAD)) {
119 printk("Freeing alive netlink socket %p\n", sk);
120 return;
121 }
122 BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc));
123 BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
124 BUG_TRAP(!nlk_sk(sk)->cb);
125
126 kfree(nlk_sk(sk));
127 }
128
129 /* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on SMP.
130 * Look, when several writers sleep and reader wakes them up, all but one
131 * immediately hit write lock and grab all the cpus. Exclusive sleep solves
132 * this, _but_ remember, it adds useless work on UP machines.
133 */
134
135 static void netlink_table_grab(void)
136 {
137 write_lock_bh(&nl_table_lock);
138
139 if (atomic_read(&nl_table_users)) {
140 DECLARE_WAITQUEUE(wait, current);
141
142 add_wait_queue_exclusive(&nl_table_wait, &wait);
143 for(;;) {
144 set_current_state(TASK_UNINTERRUPTIBLE);
145 if (atomic_read(&nl_table_users) == 0)
146 break;
147 write_unlock_bh(&nl_table_lock);
148 schedule();
149 write_lock_bh(&nl_table_lock);
150 }
151
152 __set_current_state(TASK_RUNNING);
153 remove_wait_queue(&nl_table_wait, &wait);
154 }
155 }
156
157 static __inline__ void netlink_table_ungrab(void)
158 {
159 write_unlock_bh(&nl_table_lock);
160 wake_up(&nl_table_wait);
161 }
162
163 static __inline__ void
164 netlink_lock_table(void)
165 {
166 /* read_lock() synchronizes us to netlink_table_grab */
167
168 read_lock(&nl_table_lock);
169 atomic_inc(&nl_table_users);
170 read_unlock(&nl_table_lock);
171 }
172
173 static __inline__ void
174 netlink_unlock_table(void)
175 {
176 if (atomic_dec_and_test(&nl_table_users))
177 wake_up(&nl_table_wait);
178 }
179
180 static __inline__ struct sock *netlink_lookup(int protocol, u32 pid)
181 {
182 struct nl_pid_hash *hash = &nl_table[protocol].hash;
183 struct hlist_head *head;
184 struct sock *sk;
185 struct hlist_node *node;
186
187 read_lock(&nl_table_lock);
188 head = nl_pid_hashfn(hash, pid);
189 sk_for_each(sk, node, head) {
190 if (nlk_sk(sk)->pid == pid) {
191 sock_hold(sk);
192 goto found;
193 }
194 }
195 sk = NULL;
196 found:
197 read_unlock(&nl_table_lock);
198 return sk;
199 }
200
201 static inline struct hlist_head *nl_pid_hash_alloc(size_t size)
202 {
203 if (size <= PAGE_SIZE)
204 return kmalloc(size, GFP_ATOMIC);
205 else
206 return (struct hlist_head *)
207 __get_free_pages(GFP_ATOMIC, get_order(size));
208 }
209
210 static inline void nl_pid_hash_free(struct hlist_head *table, size_t size)
211 {
212 if (size <= PAGE_SIZE)
213 kfree(table);
214 else
215 free_pages((unsigned long)table, get_order(size));
216 }
217
218 static int nl_pid_hash_rehash(struct nl_pid_hash *hash, int grow)
219 {
220 unsigned int omask, mask, shift;
221 size_t osize, size;
222 struct hlist_head *otable, *table;
223 int i;
224
225 omask = mask = hash->mask;
226 osize = size = (mask + 1) * sizeof(*table);
227 shift = hash->shift;
228
229 if (grow) {
230 if (++shift > hash->max_shift)
231 return 0;
232 mask = mask * 2 + 1;
233 size *= 2;
234 }
235
236 table = nl_pid_hash_alloc(size);
237 if (!table)
238 return 0;
239
240 memset(table, 0, size);
241 otable = hash->table;
242 hash->table = table;
243 hash->mask = mask;
244 hash->shift = shift;
245 get_random_bytes(&hash->rnd, sizeof(hash->rnd));
246
247 for (i = 0; i <= omask; i++) {
248 struct sock *sk;
249 struct hlist_node *node, *tmp;
250
251 sk_for_each_safe(sk, node, tmp, &otable[i])
252 __sk_add_node(sk, nl_pid_hashfn(hash, nlk_sk(sk)->pid));
253 }
254
255 nl_pid_hash_free(otable, osize);
256 hash->rehash_time = jiffies + 10 * 60 * HZ;
257 return 1;
258 }
259
260 static inline int nl_pid_hash_dilute(struct nl_pid_hash *hash, int len)
261 {
262 int avg = hash->entries >> hash->shift;
263
264 if (unlikely(avg > 1) && nl_pid_hash_rehash(hash, 1))
265 return 1;
266
267 if (unlikely(len > avg) && time_after(jiffies, hash->rehash_time)) {
268 nl_pid_hash_rehash(hash, 0);
269 return 1;
270 }
271
272 return 0;
273 }
274
275 static struct proto_ops netlink_ops;
276
277 static int netlink_insert(struct sock *sk, u32 pid)
278 {
279 struct nl_pid_hash *hash = &nl_table[sk->sk_protocol].hash;
280 struct hlist_head *head;
281 int err = -EADDRINUSE;
282 struct sock *osk;
283 struct hlist_node *node;
284 int len;
285
286 netlink_table_grab();
287 head = nl_pid_hashfn(hash, pid);
288 len = 0;
289 sk_for_each(osk, node, head) {
290 if (nlk_sk(osk)->pid == pid)
291 break;
292 len++;
293 }
294 if (node)
295 goto err;
296
297 err = -EBUSY;
298 if (nlk_sk(sk)->pid)
299 goto err;
300
301 err = -ENOMEM;
302 if (BITS_PER_LONG > 32 && unlikely(hash->entries >= UINT_MAX))
303 goto err;
304
305 if (len && nl_pid_hash_dilute(hash, len))
306 head = nl_pid_hashfn(hash, pid);
307 hash->entries++;
308 nlk_sk(sk)->pid = pid;
309 sk_add_node(sk, head);
310 err = 0;
311
312 err:
313 netlink_table_ungrab();
314 return err;
315 }
316
317 static void netlink_remove(struct sock *sk)
318 {
319 netlink_table_grab();
320 nl_table[sk->sk_protocol].hash.entries--;
321 sk_del_node_init(sk);
322 if (nlk_sk(sk)->groups)
323 __sk_del_bind_node(sk);
324 netlink_table_ungrab();
325 }
326
327 static int netlink_create(struct socket *sock, int protocol)
328 {
329 struct sock *sk;
330 struct netlink_opt *nlk;
331
332 sock->state = SS_UNCONNECTED;
333
334 if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM)
335 return -ESOCKTNOSUPPORT;
336
337 if (protocol<0 || protocol >= MAX_LINKS)
338 return -EPROTONOSUPPORT;
339
340 sock->ops = &netlink_ops;
341
342 sk = sk_alloc(PF_NETLINK, GFP_KERNEL, 1, NULL);
343 if (!sk)
344 return -ENOMEM;
345
346 sock_init_data(sock,sk);
347 sk_set_owner(sk, THIS_MODULE);
348
349 nlk = sk->sk_protinfo = kmalloc(sizeof(*nlk), GFP_KERNEL);
350 if (!nlk) {
351 sk_free(sk);
352 return -ENOMEM;
353 }
354 memset(nlk, 0, sizeof(*nlk));
355
356 spin_lock_init(&nlk->cb_lock);
357 init_waitqueue_head(&nlk->wait);
358 sk->sk_destruct = netlink_sock_destruct;
359
360 sk->sk_protocol = protocol;
361 return 0;
362 }
363
364 static int netlink_release(struct socket *sock)
365 {
366 struct sock *sk = sock->sk;
367 struct netlink_opt *nlk;
368
369 if (!sk)
370 return 0;
371
372 netlink_remove(sk);
373 nlk = nlk_sk(sk);
374
375 spin_lock(&nlk->cb_lock);
376 if (nlk->cb) {
377 nlk->cb->done(nlk->cb);
378 netlink_destroy_callback(nlk->cb);
379 nlk->cb = NULL;
380 __sock_put(sk);
381 }
382 spin_unlock(&nlk->cb_lock);
383
384 /* OK. Socket is unlinked, and, therefore,
385 no new packets will arrive */
386
387 sock_orphan(sk);
388 sock->sk = NULL;
389 wake_up_interruptible_all(&nlk->wait);
390
391 skb_queue_purge(&sk->sk_write_queue);
392
393 if (nlk->pid && !nlk->groups) {
394 struct netlink_notify n = {
395 .protocol = sk->sk_protocol,
396 .pid = nlk->pid,
397 };
398 notifier_call_chain(&netlink_chain, NETLINK_URELEASE, &n);
399 }
400
401 sock_put(sk);
402 return 0;
403 }
404
405 static int netlink_autobind(struct socket *sock)
406 {
407 struct sock *sk = sock->sk;
408 struct nl_pid_hash *hash = &nl_table[sk->sk_protocol].hash;
409 struct hlist_head *head;
410 struct sock *osk;
411 struct hlist_node *node;
412 s32 pid = current->pid;
413 int err;
414 static s32 rover = -4097;
415
416 retry:
417 cond_resched();
418 netlink_table_grab();
419 head = nl_pid_hashfn(hash, pid);
420 sk_for_each(osk, node, head) {
421 if (nlk_sk(osk)->pid == pid) {
422 /* Bind collision, search negative pid values. */
423 pid = rover--;
424 if (rover > -4097)
425 rover = -4097;
426 netlink_table_ungrab();
427 goto retry;
428 }
429 }
430 netlink_table_ungrab();
431
432 err = netlink_insert(sk, pid);
433 if (err == -EADDRINUSE)
434 goto retry;
435 nlk_sk(sk)->groups = 0;
436 return 0;
437 }
438
439 static inline int netlink_capable(struct socket *sock, unsigned int flag)
440 {
441 return (nl_table[sock->sk->sk_protocol].nl_nonroot & flag) ||
442 capable(CAP_NET_ADMIN);
443 }
444
445 static int netlink_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
446 {
447 struct sock *sk = sock->sk;
448 struct netlink_opt *nlk = nlk_sk(sk);
449 struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr;
450 int err;
451
452 if (nladdr->nl_family != AF_NETLINK)
453 return -EINVAL;
454
455 /* Only superuser is allowed to listen multicasts */
456 if (nladdr->nl_groups && !netlink_capable(sock, NL_NONROOT_RECV))
457 return -EPERM;
458
459 if (nlk->pid) {
460 if (nladdr->nl_pid != nlk->pid)
461 return -EINVAL;
462 } else {
463 err = nladdr->nl_pid ?
464 netlink_insert(sk, nladdr->nl_pid) :
465 netlink_autobind(sock);
466 if (err)
467 return err;
468 }
469
470 if (!nladdr->nl_groups && !nlk->groups)
471 return 0;
472
473 netlink_table_grab();
474 if (nlk->groups && !nladdr->nl_groups)
475 __sk_del_bind_node(sk);
476 else if (!nlk->groups && nladdr->nl_groups)
477 sk_add_bind_node(sk, &nl_table[sk->sk_protocol].mc_list);
478 nlk->groups = nladdr->nl_groups;
479 netlink_table_ungrab();
480
481 return 0;
482 }
483
484 static int netlink_connect(struct socket *sock, struct sockaddr *addr,
485 int alen, int flags)
486 {
487 int err = 0;
488 struct sock *sk = sock->sk;
489 struct netlink_opt *nlk = nlk_sk(sk);
490 struct sockaddr_nl *nladdr=(struct sockaddr_nl*)addr;
491
492 if (addr->sa_family == AF_UNSPEC) {
493 sk->sk_state = NETLINK_UNCONNECTED;
494 nlk->dst_pid = 0;
495 nlk->dst_groups = 0;
496 return 0;
497 }
498 if (addr->sa_family != AF_NETLINK)
499 return -EINVAL;
500
501 /* Only superuser is allowed to send multicasts */
502 if (nladdr->nl_groups && !netlink_capable(sock, NL_NONROOT_SEND))
503 return -EPERM;
504
505 if (!nlk->pid)
506 err = netlink_autobind(sock);
507
508 if (err == 0) {
509 sk->sk_state = NETLINK_CONNECTED;
510 nlk->dst_pid = nladdr->nl_pid;
511 nlk->dst_groups = nladdr->nl_groups;
512 }
513
514 return err;
515 }
516
517 static int netlink_getname(struct socket *sock, struct sockaddr *addr, int *addr_len, int peer)
518 {
519 struct sock *sk = sock->sk;
520 struct netlink_opt *nlk = nlk_sk(sk);
521 struct sockaddr_nl *nladdr=(struct sockaddr_nl *)addr;
522
523 nladdr->nl_family = AF_NETLINK;
524 nladdr->nl_pad = 0;
525 *addr_len = sizeof(*nladdr);
526
527 if (peer) {
528 nladdr->nl_pid = nlk->dst_pid;
529 nladdr->nl_groups = nlk->dst_groups;
530 } else {
531 nladdr->nl_pid = nlk->pid;
532 nladdr->nl_groups = nlk->groups;
533 }
534 return 0;
535 }
536
537 static void netlink_overrun(struct sock *sk)
538 {
539 if (!test_and_set_bit(0, &nlk_sk(sk)->state)) {
540 sk->sk_err = ENOBUFS;
541 sk->sk_error_report(sk);
542 }
543 }
544
545 static struct sock *netlink_getsockbypid(struct sock *ssk, u32 pid)
546 {
547 int protocol = ssk->sk_protocol;
548 struct sock *sock;
549 struct netlink_opt *nlk;
550
551 sock = netlink_lookup(protocol, pid);
552 if (!sock)
553 return ERR_PTR(-ECONNREFUSED);
554
555 /* Don't bother queuing skb if kernel socket has no input function */
556 nlk = nlk_sk(sock);
557 if ((nlk->pid == 0 && !nlk->data_ready) ||
558 (sock->sk_state == NETLINK_CONNECTED &&
559 nlk->dst_pid != nlk_sk(ssk)->pid)) {
560 sock_put(sock);
561 return ERR_PTR(-ECONNREFUSED);
562 }
563 return sock;
564 }
565
566 struct sock *netlink_getsockbyfilp(struct file *filp)
567 {
568 struct inode *inode = filp->f_dentry->d_inode;
569 struct socket *socket;
570 struct sock *sock;
571
572 if (!inode->i_sock || !(socket = SOCKET_I(inode)))
573 return ERR_PTR(-ENOTSOCK);
574
575 sock = socket->sk;
576 if (sock->sk_family != AF_NETLINK)
577 return ERR_PTR(-EINVAL);
578
579 sock_hold(sock);
580 return sock;
581 }
582
583 /*
584 * Attach a skb to a netlink socket.
585 * The caller must hold a reference to the destination socket. On error, the
586 * reference is dropped. The skb is not send to the destination, just all
587 * all error checks are performed and memory in the queue is reserved.
588 * Return values:
589 * < 0: error. skb freed, reference to sock dropped.
590 * 0: continue
591 * 1: repeat lookup - reference dropped while waiting for socket memory.
592 */
593 int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock, long timeo)
594 {
595 struct netlink_opt *nlk;
596
597 nlk = nlk_sk(sk);
598
599 #ifdef NL_EMULATE_DEV
600 if (nlk->handler)
601 return 0;
602 #endif
603 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
604 test_bit(0, &nlk->state)) {
605 DECLARE_WAITQUEUE(wait, current);
606 if (!timeo) {
607 if (!nlk->pid)
608 netlink_overrun(sk);
609 sock_put(sk);
610 kfree_skb(skb);
611 return -EAGAIN;
612 }
613
614 __set_current_state(TASK_INTERRUPTIBLE);
615 add_wait_queue(&nlk->wait, &wait);
616
617 if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
618 test_bit(0, &nlk->state)) &&
619 !sock_flag(sk, SOCK_DEAD))
620 timeo = schedule_timeout(timeo);
621
622 __set_current_state(TASK_RUNNING);
623 remove_wait_queue(&nlk->wait, &wait);
624 sock_put(sk);
625
626 if (signal_pending(current)) {
627 kfree_skb(skb);
628 return sock_intr_errno(timeo);
629 }
630 return 1;
631 }
632 skb_set_owner_r(skb, sk);
633 return 0;
634 }
635
636 int netlink_sendskb(struct sock *sk, struct sk_buff *skb, int protocol)
637 {
638 struct netlink_opt *nlk;
639 int len = skb->len;
640
641 nlk = nlk_sk(sk);
642 #ifdef NL_EMULATE_DEV
643 if (nlk->handler) {
644 skb_orphan(skb);
645 len = nlk->handler(protocol, skb);
646 sock_put(sk);
647 return len;
648 }
649 #endif
650
651 skb_queue_tail(&sk->sk_receive_queue, skb);
652 sk->sk_data_ready(sk, len);
653 sock_put(sk);
654 return len;
655 }
656
657 void netlink_detachskb(struct sock *sk, struct sk_buff *skb)
658 {
659 kfree_skb(skb);
660 sock_put(sk);
661 }
662
663 static inline struct sk_buff *netlink_trim(struct sk_buff *skb, int allocation)
664 {
665 int delta;
666
667 skb_orphan(skb);
668
669 delta = skb->end - skb->tail;
670 if (delta * 2 < skb->truesize)
671 return skb;
672
673 if (skb_shared(skb)) {
674 struct sk_buff *nskb = skb_clone(skb, allocation);
675 if (!nskb)
676 return skb;
677 kfree_skb(skb);
678 skb = nskb;
679 }
680
681 if (!pskb_expand_head(skb, 0, -delta, allocation))
682 skb->truesize -= delta;
683
684 return skb;
685 }
686
687 int netlink_unicast(struct sock *ssk, struct sk_buff *skb, u32 pid, int nonblock)
688 {
689 struct sock *sk;
690 int err;
691 long timeo;
692
693 skb = netlink_trim(skb, gfp_any());
694
695 timeo = sock_sndtimeo(ssk, nonblock);
696 retry:
697 sk = netlink_getsockbypid(ssk, pid);
698 if (IS_ERR(sk)) {
699 kfree_skb(skb);
700 return PTR_ERR(sk);
701 }
702 err = netlink_attachskb(sk, skb, nonblock, timeo);
703 if (err == 1)
704 goto retry;
705 if (err)
706 return err;
707
708 return netlink_sendskb(sk, skb, ssk->sk_protocol);
709 }
710
711 static __inline__ int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb)
712 {
713 struct netlink_opt *nlk = nlk_sk(sk);
714 #ifdef NL_EMULATE_DEV
715 if (nlk->handler) {
716 nlk->handler(sk->sk_protocol, skb);
717 return 0;
718 } else
719 #endif
720 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
721 !test_bit(0, &nlk->state)) {
722 skb_set_owner_r(skb, sk);
723 skb_queue_tail(&sk->sk_receive_queue, skb);
724 sk->sk_data_ready(sk, skb->len);
725 return atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf;
726 }
727 return -1;
728 }
729
730 struct netlink_broadcast_data {
731 struct sock *exclude_sk;
732 u32 pid;
733 u32 group;
734 int failure;
735 int congested;
736 int delivered;
737 int allocation;
738 struct sk_buff *skb, *skb2;
739 };
740
741 static inline int do_one_broadcast(struct sock *sk,
742 struct netlink_broadcast_data *p)
743 {
744 struct netlink_opt *nlk = nlk_sk(sk);
745 int val;
746
747 if (p->exclude_sk == sk)
748 goto out;
749
750 if (nlk->pid == p->pid || !(nlk->groups & p->group))
751 goto out;
752
753 if (p->failure) {
754 netlink_overrun(sk);
755 goto out;
756 }
757
758 sock_hold(sk);
759 if (p->skb2 == NULL) {
760 if (atomic_read(&p->skb->users) != 1) {
761 p->skb2 = skb_clone(p->skb, p->allocation);
762 } else {
763 p->skb2 = p->skb;
764 atomic_inc(&p->skb->users);
765 }
766 }
767 if (p->skb2 == NULL) {
768 netlink_overrun(sk);
769 /* Clone failed. Notify ALL listeners. */
770 p->failure = 1;
771 } else if ((val = netlink_broadcast_deliver(sk, p->skb2)) < 0) {
772 netlink_overrun(sk);
773 } else {
774 p->congested |= val;
775 p->delivered = 1;
776 p->skb2 = NULL;
777 }
778 sock_put(sk);
779
780 out:
781 return 0;
782 }
783
784 int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid,
785 u32 group, int allocation)
786 {
787 struct netlink_broadcast_data info;
788 struct hlist_node *node;
789 struct sock *sk;
790
791 skb = netlink_trim(skb, allocation);
792
793 info.exclude_sk = ssk;
794 info.pid = pid;
795 info.group = group;
796 info.failure = 0;
797 info.congested = 0;
798 info.delivered = 0;
799 info.allocation = allocation;
800 info.skb = skb;
801 info.skb2 = NULL;
802
803 /* While we sleep in clone, do not allow to change socket list */
804
805 netlink_lock_table();
806
807 sk_for_each_bound(sk, node, &nl_table[ssk->sk_protocol].mc_list)
808 do_one_broadcast(sk, &info);
809
810 netlink_unlock_table();
811
812 if (info.skb2)
813 kfree_skb(info.skb2);
814 kfree_skb(skb);
815
816 if (info.delivered) {
817 if (info.congested && (allocation & __GFP_WAIT))
818 yield();
819 return 0;
820 }
821 if (info.failure)
822 return -ENOBUFS;
823 return -ESRCH;
824 }
825
826 struct netlink_set_err_data {
827 struct sock *exclude_sk;
828 u32 pid;
829 u32 group;
830 int code;
831 };
832
833 static inline int do_one_set_err(struct sock *sk,
834 struct netlink_set_err_data *p)
835 {
836 struct netlink_opt *nlk = nlk_sk(sk);
837
838 if (sk == p->exclude_sk)
839 goto out;
840
841 if (nlk->pid == p->pid || !(nlk->groups & p->group))
842 goto out;
843
844 sk->sk_err = p->code;
845 sk->sk_error_report(sk);
846 out:
847 return 0;
848 }
849
850 void netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code)
851 {
852 struct netlink_set_err_data info;
853 struct hlist_node *node;
854 struct sock *sk;
855
856 info.exclude_sk = ssk;
857 info.pid = pid;
858 info.group = group;
859 info.code = code;
860
861 read_lock(&nl_table_lock);
862
863 sk_for_each_bound(sk, node, &nl_table[ssk->sk_protocol].mc_list)
864 do_one_set_err(sk, &info);
865
866 read_unlock(&nl_table_lock);
867 }
868
869 static inline void netlink_rcv_wake(struct sock *sk)
870 {
871 struct netlink_opt *nlk = nlk_sk(sk);
872
873 if (!skb_queue_len(&sk->sk_receive_queue))
874 clear_bit(0, &nlk->state);
875 if (!test_bit(0, &nlk->state))
876 wake_up_interruptible(&nlk->wait);
877 }
878
879 static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
880 struct msghdr *msg, size_t len)
881 {
882 struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
883 struct sock *sk = sock->sk;
884 struct netlink_opt *nlk = nlk_sk(sk);
885 struct sockaddr_nl *addr=msg->msg_name;
886 u32 dst_pid;
887 u32 dst_groups;
888 struct sk_buff *skb;
889 int err;
890 struct scm_cookie scm;
891
892 if (msg->msg_flags&MSG_OOB)
893 return -EOPNOTSUPP;
894
895 if (NULL == siocb->scm)
896 siocb->scm = &scm;
897 err = scm_send(sock, msg, siocb->scm);
898 if (err < 0)
899 return err;
900
901 if (msg->msg_namelen) {
902 if (addr->nl_family != AF_NETLINK)
903 return -EINVAL;
904 dst_pid = addr->nl_pid;
905 dst_groups = addr->nl_groups;
906 if (dst_groups && !netlink_capable(sock, NL_NONROOT_SEND))
907 return -EPERM;
908 } else {
909 dst_pid = nlk->dst_pid;
910 dst_groups = nlk->dst_groups;
911 }
912
913 if (!nlk->pid) {
914 err = netlink_autobind(sock);
915 if (err)
916 goto out;
917 }
918
919 err = -EMSGSIZE;
920 if (len > sk->sk_sndbuf - 32)
921 goto out;
922 err = -ENOBUFS;
923 skb = alloc_skb(len, GFP_KERNEL);
924 if (skb==NULL)
925 goto out;
926
927 NETLINK_CB(skb).pid = nlk->pid;
928 NETLINK_CB(skb).groups = nlk->groups;
929 NETLINK_CB(skb).dst_pid = dst_pid;
930 NETLINK_CB(skb).dst_groups = dst_groups;
931 memcpy(NETLINK_CREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
932
933 /* What can I do? Netlink is asynchronous, so that
934 we will have to save current capabilities to
935 check them, when this message will be delivered
936 to corresponding kernel module. --ANK (980802)
937 */
938
939 err = -EFAULT;
940 if (memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len)) {
941 kfree_skb(skb);
942 goto out;
943 }
944
945 err = security_netlink_send(sk, skb);
946 if (err) {
947 kfree_skb(skb);
948 goto out;
949 }
950
951 if (dst_groups) {
952 atomic_inc(&skb->users);
953 netlink_broadcast(sk, skb, dst_pid, dst_groups, GFP_KERNEL);
954 }
955 err = netlink_unicast(sk, skb, dst_pid, msg->msg_flags&MSG_DONTWAIT);
956
957 out:
958 return err;
959 }
960
961 static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
962 struct msghdr *msg, size_t len,
963 int flags)
964 {
965 struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
966 struct scm_cookie scm;
967 struct sock *sk = sock->sk;
968 struct netlink_opt *nlk = nlk_sk(sk);
969 int noblock = flags&MSG_DONTWAIT;
970 size_t copied;
971 struct sk_buff *skb;
972 int err;
973
974 if (flags&MSG_OOB)
975 return -EOPNOTSUPP;
976
977 copied = 0;
978
979 skb = skb_recv_datagram(sk,flags,noblock,&err);
980 if (skb==NULL)
981 goto out;
982
983 msg->msg_namelen = 0;
984
985 copied = skb->len;
986 if (len < copied) {
987 msg->msg_flags |= MSG_TRUNC;
988 copied = len;
989 }
990
991 skb->h.raw = skb->data;
992 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
993
994 if (msg->msg_name) {
995 struct sockaddr_nl *addr = (struct sockaddr_nl*)msg->msg_name;
996 addr->nl_family = AF_NETLINK;
997 addr->nl_pad = 0;
998 addr->nl_pid = NETLINK_CB(skb).pid;
999 addr->nl_groups = NETLINK_CB(skb).dst_groups;
1000 msg->msg_namelen = sizeof(*addr);
1001 }
1002
1003 if (NULL == siocb->scm) {
1004 memset(&scm, 0, sizeof(scm));
1005 siocb->scm = &scm;
1006 }
1007 siocb->scm->creds = *NETLINK_CREDS(skb);
1008 skb_free_datagram(sk, skb);
1009
1010 if (nlk->cb && atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2)
1011 netlink_dump(sk);
1012
1013 scm_recv(sock, msg, siocb->scm, flags);
1014
1015 out:
1016 netlink_rcv_wake(sk);
1017 return err ? : copied;
1018 }
1019
1020 static void netlink_data_ready(struct sock *sk, int len)
1021 {
1022 struct netlink_opt *nlk = nlk_sk(sk);
1023
1024 if (nlk->data_ready)
1025 nlk->data_ready(sk, len);
1026 netlink_rcv_wake(sk);
1027 }
1028
1029 /*
1030 * We export these functions to other modules. They provide a
1031 * complete set of kernel non-blocking support for message
1032 * queueing.
1033 */
1034
1035 struct sock *
1036 netlink_kernel_create(int unit, void (*input)(struct sock *sk, int len))
1037 {
1038 struct socket *sock;
1039 struct sock *sk;
1040
1041 if (!nl_table)
1042 return NULL;
1043
1044 if (unit<0 || unit>=MAX_LINKS)
1045 return NULL;
1046
1047 if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock))
1048 return NULL;
1049
1050 if (netlink_create(sock, unit) < 0) {
1051 sock_release(sock);
1052 return NULL;
1053 }
1054 sk = sock->sk;
1055 sk->sk_data_ready = netlink_data_ready;
1056 if (input)
1057 nlk_sk(sk)->data_ready = input;
1058
1059 if (netlink_insert(sk, 0)) {
1060 sock_release(sock);
1061 return NULL;
1062 }
1063 return sk;
1064 }
1065
1066 void netlink_set_nonroot(int protocol, unsigned int flags)
1067 {
1068 if ((unsigned int)protocol < MAX_LINKS)
1069 nl_table[protocol].nl_nonroot = flags;
1070 }
1071
1072 static void netlink_destroy_callback(struct netlink_callback *cb)
1073 {
1074 if (cb->skb)
1075 kfree_skb(cb->skb);
1076 kfree(cb);
1077 }
1078
1079 /*
1080 * It looks a bit ugly.
1081 * It would be better to create kernel thread.
1082 */
1083
1084 static int netlink_dump(struct sock *sk)
1085 {
1086 struct netlink_opt *nlk = nlk_sk(sk);
1087 struct netlink_callback *cb;
1088 struct sk_buff *skb;
1089 struct nlmsghdr *nlh;
1090 int len;
1091
1092 skb = sock_rmalloc(sk, NLMSG_GOODSIZE, 0, GFP_KERNEL);
1093 if (!skb)
1094 return -ENOBUFS;
1095
1096 spin_lock(&nlk->cb_lock);
1097
1098 cb = nlk->cb;
1099 if (cb == NULL) {
1100 spin_unlock(&nlk->cb_lock);
1101 kfree_skb(skb);
1102 return -EINVAL;
1103 }
1104
1105 len = cb->dump(skb, cb);
1106
1107 if (len > 0) {
1108 spin_unlock(&nlk->cb_lock);
1109 skb_queue_tail(&sk->sk_receive_queue, skb);
1110 sk->sk_data_ready(sk, len);
1111 return 0;
1112 }
1113
1114 nlh = __nlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, NLMSG_DONE, sizeof(int));
1115 nlh->nlmsg_flags |= NLM_F_MULTI;
1116 memcpy(NLMSG_DATA(nlh), &len, sizeof(len));
1117 skb_queue_tail(&sk->sk_receive_queue, skb);
1118 sk->sk_data_ready(sk, skb->len);
1119
1120 cb->done(cb);
1121 nlk->cb = NULL;
1122 spin_unlock(&nlk->cb_lock);
1123
1124 netlink_destroy_callback(cb);
1125 sock_put(sk);
1126 return 0;
1127 }
1128
1129 int netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
1130 struct nlmsghdr *nlh,
1131 int (*dump)(struct sk_buff *skb, struct netlink_callback*),
1132 int (*done)(struct netlink_callback*))
1133 {
1134 struct netlink_callback *cb;
1135 struct sock *sk;
1136 struct netlink_opt *nlk;
1137
1138 cb = kmalloc(sizeof(*cb), GFP_KERNEL);
1139 if (cb == NULL)
1140 return -ENOBUFS;
1141
1142 memset(cb, 0, sizeof(*cb));
1143 cb->dump = dump;
1144 cb->done = done;
1145 cb->nlh = nlh;
1146 atomic_inc(&skb->users);
1147 cb->skb = skb;
1148
1149 sk = netlink_lookup(ssk->sk_protocol, NETLINK_CB(skb).pid);
1150 if (sk == NULL) {
1151 netlink_destroy_callback(cb);
1152 return -ECONNREFUSED;
1153 }
1154 nlk = nlk_sk(sk);
1155 /* A dump is in progress... */
1156 spin_lock(&nlk->cb_lock);
1157 if (nlk->cb) {
1158 spin_unlock(&nlk->cb_lock);
1159 netlink_destroy_callback(cb);
1160 sock_put(sk);
1161 return -EBUSY;
1162 }
1163 nlk->cb = cb;
1164 spin_unlock(&nlk->cb_lock);
1165
1166 netlink_dump(sk);
1167 return 0;
1168 }
1169
1170 void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
1171 {
1172 struct sk_buff *skb;
1173 struct nlmsghdr *rep;
1174 struct nlmsgerr *errmsg;
1175 int size;
1176
1177 if (err == 0)
1178 size = NLMSG_SPACE(sizeof(struct nlmsgerr));
1179 else
1180 size = NLMSG_SPACE(4 + NLMSG_ALIGN(nlh->nlmsg_len));
1181
1182 skb = alloc_skb(size, GFP_KERNEL);
1183 if (!skb) {
1184 struct sock *sk;
1185
1186 sk = netlink_lookup(in_skb->sk->sk_protocol,
1187 NETLINK_CB(in_skb).pid);
1188 if (sk) {
1189 sk->sk_err = ENOBUFS;
1190 sk->sk_error_report(sk);
1191 sock_put(sk);
1192 }
1193 return;
1194 }
1195
1196 rep = __nlmsg_put(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq,
1197 NLMSG_ERROR, sizeof(struct nlmsgerr));
1198 errmsg = NLMSG_DATA(rep);
1199 errmsg->error = err;
1200 memcpy(&errmsg->msg, nlh, err ? nlh->nlmsg_len : sizeof(struct nlmsghdr));
1201 netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1202 }
1203
1204
1205 #ifdef CONFIG_PROC_FS
1206 struct nl_seq_iter {
1207 int link;
1208 int hash_idx;
1209 };
1210
1211 static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos)
1212 {
1213 struct nl_seq_iter *iter = seq->private;
1214 int i, j;
1215 struct sock *s;
1216 struct hlist_node *node;
1217 loff_t off = 0;
1218
1219 for (i=0; i<MAX_LINKS; i++) {
1220 struct nl_pid_hash *hash = &nl_table[i].hash;
1221
1222 for (j = 0; j <= hash->mask; j++) {
1223 sk_for_each(s, node, &hash->table[j]) {
1224 if (off == pos) {
1225 iter->link = i;
1226 iter->hash_idx = j;
1227 return s;
1228 }
1229 ++off;
1230 }
1231 }
1232 }
1233 return NULL;
1234 }
1235
1236 static void *netlink_seq_start(struct seq_file *seq, loff_t *pos)
1237 {
1238 read_lock(&nl_table_lock);
1239 return *pos ? netlink_seq_socket_idx(seq, *pos - 1) : SEQ_START_TOKEN;
1240 }
1241
1242 static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1243 {
1244 struct sock *s;
1245 struct nl_seq_iter *iter;
1246 int i, j;
1247
1248 ++*pos;
1249
1250 if (v == SEQ_START_TOKEN)
1251 return netlink_seq_socket_idx(seq, 0);
1252
1253 s = sk_next(v);
1254 if (s)
1255 return s;
1256
1257 iter = seq->private;
1258 i = iter->link;
1259 j = iter->hash_idx + 1;
1260
1261 do {
1262 struct nl_pid_hash *hash = &nl_table[i].hash;
1263
1264 for (; j <= hash->mask; j++) {
1265 s = sk_head(&hash->table[j]);
1266 if (s) {
1267 iter->link = i;
1268 iter->hash_idx = j;
1269 return s;
1270 }
1271 }
1272
1273 j = 0;
1274 } while (++i < MAX_LINKS);
1275
1276 return NULL;
1277 }
1278
1279 static void netlink_seq_stop(struct seq_file *seq, void *v)
1280 {
1281 read_unlock(&nl_table_lock);
1282 }
1283
1284
1285 static int netlink_seq_show(struct seq_file *seq, void *v)
1286 {
1287 if (v == SEQ_START_TOKEN)
1288 seq_puts(seq,
1289 "sk Eth Pid Groups "
1290 "Rmem Wmem Dump Locks\n");
1291 else {
1292 struct sock *s = v;
1293 struct netlink_opt *nlk = nlk_sk(s);
1294
1295 seq_printf(seq, "%p %-3d %-6d %08x %-8d %-8d %p %d\n",
1296 s,
1297 s->sk_protocol,
1298 nlk->pid,
1299 nlk->groups,
1300 atomic_read(&s->sk_rmem_alloc),
1301 atomic_read(&s->sk_wmem_alloc),
1302 nlk->cb,
1303 atomic_read(&s->sk_refcnt)
1304 );
1305
1306 }
1307 return 0;
1308 }
1309
1310 static struct seq_operations netlink_seq_ops = {
1311 .start = netlink_seq_start,
1312 .next = netlink_seq_next,
1313 .stop = netlink_seq_stop,
1314 .show = netlink_seq_show,
1315 };
1316
1317
1318 static int netlink_seq_open(struct inode *inode, struct file *file)
1319 {
1320 struct seq_file *seq;
1321 struct nl_seq_iter *iter;
1322 int err;
1323
1324 iter = kmalloc(sizeof(*iter), GFP_KERNEL);
1325 if (!iter)
1326 return -ENOMEM;
1327
1328 err = seq_open(file, &netlink_seq_ops);
1329 if (err) {
1330 kfree(iter);
1331 return err;
1332 }
1333
1334 memset(iter, 0, sizeof(*iter));
1335 seq = file->private_data;
1336 seq->private = iter;
1337 return 0;
1338 }
1339
1340 static struct file_operations netlink_seq_fops = {
1341 .owner = THIS_MODULE,
1342 .open = netlink_seq_open,
1343 .read = seq_read,
1344 .llseek = seq_lseek,
1345 .release = seq_release_private,
1346 };
1347
1348 #endif
1349
1350 int netlink_register_notifier(struct notifier_block *nb)
1351 {
1352 return notifier_chain_register(&netlink_chain, nb);
1353 }
1354
1355 int netlink_unregister_notifier(struct notifier_block *nb)
1356 {
1357 return notifier_chain_unregister(&netlink_chain, nb);
1358 }
1359
1360 static struct proto_ops netlink_ops = {
1361 .family = PF_NETLINK,
1362 .owner = THIS_MODULE,
1363 .release = netlink_release,
1364 .bind = netlink_bind,
1365 .connect = netlink_connect,
1366 .socketpair = sock_no_socketpair,
1367 .accept = sock_no_accept,
1368 .getname = netlink_getname,
1369 .poll = datagram_poll,
1370 .ioctl = sock_no_ioctl,
1371 .listen = sock_no_listen,
1372 .shutdown = sock_no_shutdown,
1373 .setsockopt = sock_no_setsockopt,
1374 .getsockopt = sock_no_getsockopt,
1375 .sendmsg = netlink_sendmsg,
1376 .recvmsg = netlink_recvmsg,
1377 .mmap = sock_no_mmap,
1378 .sendpage = sock_no_sendpage,
1379 };
1380
1381 static struct net_proto_family netlink_family_ops = {
1382 .family = PF_NETLINK,
1383 .create = netlink_create,
1384 .owner = THIS_MODULE, /* for consistency 8) */
1385 };
1386
1387 extern void netlink_skb_parms_too_large(void);
1388
1389 static int __init netlink_proto_init(void)
1390 {
1391 struct sk_buff *dummy_skb;
1392 int i;
1393 unsigned long max;
1394 unsigned int order;
1395
1396 if (sizeof(struct netlink_skb_parms) > sizeof(dummy_skb->cb))
1397 netlink_skb_parms_too_large();
1398
1399 nl_table = kmalloc(sizeof(*nl_table) * MAX_LINKS, GFP_KERNEL);
1400 if (!nl_table) {
1401 enomem:
1402 printk(KERN_CRIT "netlink_init: Cannot allocate nl_table\n");
1403 return -ENOMEM;
1404 }
1405
1406 memset(nl_table, 0, sizeof(*nl_table) * MAX_LINKS);
1407
1408 if (num_physpages >= (128 * 1024))
1409 max = num_physpages >> (21 - PAGE_SHIFT);
1410 else
1411 max = num_physpages >> (23 - PAGE_SHIFT);
1412
1413 order = get_bitmask_order(max) - 1 + PAGE_SHIFT;
1414 max = (1UL << order) / sizeof(struct hlist_head);
1415 order = get_bitmask_order(max > UINT_MAX ? UINT_MAX : max) - 1;
1416
1417 for (i = 0; i < MAX_LINKS; i++) {
1418 struct nl_pid_hash *hash = &nl_table[i].hash;
1419
1420 hash->table = nl_pid_hash_alloc(1 * sizeof(*hash->table));
1421 if (!hash->table) {
1422 while (i-- > 0)
1423 nl_pid_hash_free(nl_table[i].hash.table,
1424 1 * sizeof(*hash->table));
1425 kfree(nl_table);
1426 goto enomem;
1427 }
1428 memset(hash->table, 0, 1 * sizeof(*hash->table));
1429 hash->max_shift = order;
1430 hash->shift = 0;
1431 hash->mask = 0;
1432 hash->rehash_time = jiffies;
1433 }
1434
1435 sock_register(&netlink_family_ops);
1436 #ifdef CONFIG_PROC_FS
1437 proc_net_fops_create("netlink", 0, &netlink_seq_fops);
1438 #endif
1439 /* The netlink device handler may be needed early. */
1440 rtnetlink_init();
1441 return 0;
1442 }
1443
1444 static void __exit netlink_proto_exit(void)
1445 {
1446 sock_unregister(PF_NETLINK);
1447 proc_net_remove("netlink");
1448 kfree(nl_table);
1449 nl_table = NULL;
1450 }
1451
1452 core_initcall(netlink_proto_init);
1453 module_exit(netlink_proto_exit);
1454
1455 MODULE_LICENSE("GPL");
1456
1457 MODULE_ALIAS_NETPROTO(PF_NETLINK);
1458
1459 EXPORT_SYMBOL(netlink_ack);
1460 EXPORT_SYMBOL(netlink_broadcast);
1461 EXPORT_SYMBOL(netlink_dump_start);
1462 EXPORT_SYMBOL(netlink_kernel_create);
1463 EXPORT_SYMBOL(netlink_register_notifier);
1464 EXPORT_SYMBOL(netlink_set_err);
1465 EXPORT_SYMBOL(netlink_set_nonroot);
1466 EXPORT_SYMBOL(netlink_unicast);
1467 EXPORT_SYMBOL(netlink_unregister_notifier);
1468
1469
|
This page was automatically generated by the
LXR engine.
|