1 /*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * IPv4 Forwarding Information Base: semantics.
7 *
8 * Version: $Id: fib_semantics.c,v 1.19 2002/01/12 07:54:56 davem Exp $
9 *
10 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 */
17
18 #include <linux/config.h>
19 #include <asm/uaccess.h>
20 #include <asm/system.h>
21 #include <linux/bitops.h>
22 #include <linux/types.h>
23 #include <linux/kernel.h>
24 #include <linux/jiffies.h>
25 #include <linux/mm.h>
26 #include <linux/string.h>
27 #include <linux/socket.h>
28 #include <linux/sockios.h>
29 #include <linux/errno.h>
30 #include <linux/in.h>
31 #include <linux/inet.h>
32 #include <linux/netdevice.h>
33 #include <linux/if_arp.h>
34 #include <linux/proc_fs.h>
35 #include <linux/skbuff.h>
36 #include <linux/netlink.h>
37 #include <linux/init.h>
38
39 #include <net/ip.h>
40 #include <net/protocol.h>
41 #include <net/route.h>
42 #include <net/tcp.h>
43 #include <net/sock.h>
44 #include <net/ip_fib.h>
45
46 #include "fib_lookup.h"
47
48 #define FSprintk(a...)
49
50 static DEFINE_RWLOCK(fib_info_lock);
51 static struct hlist_head *fib_info_hash;
52 static struct hlist_head *fib_info_laddrhash;
53 static unsigned int fib_hash_size;
54 static unsigned int fib_info_cnt;
55
56 #define DEVINDEX_HASHBITS 8
57 #define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
58 static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
59
60 #ifdef CONFIG_IP_ROUTE_MULTIPATH
61
62 static DEFINE_SPINLOCK(fib_multipath_lock);
63
64 #define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \
65 for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
66
67 #define change_nexthops(fi) { int nhsel; struct fib_nh * nh; \
68 for (nhsel=0, nh = (struct fib_nh*)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++)
69
70 #else /* CONFIG_IP_ROUTE_MULTIPATH */
71
72 /* Hope, that gcc will optimize it to get rid of dummy loop */
73
74 #define for_nexthops(fi) { int nhsel=0; const struct fib_nh * nh = (fi)->fib_nh; \
75 for (nhsel=0; nhsel < 1; nhsel++)
76
77 #define change_nexthops(fi) { int nhsel=0; struct fib_nh * nh = (struct fib_nh*)((fi)->fib_nh); \
78 for (nhsel=0; nhsel < 1; nhsel++)
79
80 #endif /* CONFIG_IP_ROUTE_MULTIPATH */
81
82 #define endfor_nexthops(fi) }
83
84
85 static struct
86 {
87 int error;
88 u8 scope;
89 } fib_props[RTA_MAX + 1] = {
90 {
91 .error = 0,
92 .scope = RT_SCOPE_NOWHERE,
93 }, /* RTN_UNSPEC */
94 {
95 .error = 0,
96 .scope = RT_SCOPE_UNIVERSE,
97 }, /* RTN_UNICAST */
98 {
99 .error = 0,
100 .scope = RT_SCOPE_HOST,
101 }, /* RTN_LOCAL */
102 {
103 .error = 0,
104 .scope = RT_SCOPE_LINK,
105 }, /* RTN_BROADCAST */
106 {
107 .error = 0,
108 .scope = RT_SCOPE_LINK,
109 }, /* RTN_ANYCAST */
110 {
111 .error = 0,
112 .scope = RT_SCOPE_UNIVERSE,
113 }, /* RTN_MULTICAST */
114 {
115 .error = -EINVAL,
116 .scope = RT_SCOPE_UNIVERSE,
117 }, /* RTN_BLACKHOLE */
118 {
119 .error = -EHOSTUNREACH,
120 .scope = RT_SCOPE_UNIVERSE,
121 }, /* RTN_UNREACHABLE */
122 {
123 .error = -EACCES,
124 .scope = RT_SCOPE_UNIVERSE,
125 }, /* RTN_PROHIBIT */
126 {
127 .error = -EAGAIN,
128 .scope = RT_SCOPE_UNIVERSE,
129 }, /* RTN_THROW */
130 {
131 .error = -EINVAL,
132 .scope = RT_SCOPE_NOWHERE,
133 }, /* RTN_NAT */
134 {
135 .error = -EINVAL,
136 .scope = RT_SCOPE_NOWHERE,
137 }, /* RTN_XRESOLVE */
138 };
139
140
141 /* Release a nexthop info record */
142
143 void free_fib_info(struct fib_info *fi)
144 {
145 if (fi->fib_dead == 0) {
146 printk("Freeing alive fib_info %p\n", fi);
147 return;
148 }
149 change_nexthops(fi) {
150 if (nh->nh_dev)
151 dev_put(nh->nh_dev);
152 nh->nh_dev = NULL;
153 } endfor_nexthops(fi);
154 fib_info_cnt--;
155 kfree(fi);
156 }
157
158 void fib_release_info(struct fib_info *fi)
159 {
160 write_lock(&fib_info_lock);
161 if (fi && --fi->fib_treeref == 0) {
162 hlist_del(&fi->fib_hash);
163 if (fi->fib_prefsrc)
164 hlist_del(&fi->fib_lhash);
165 change_nexthops(fi) {
166 if (!nh->nh_dev)
167 continue;
168 hlist_del(&nh->nh_hash);
169 } endfor_nexthops(fi)
170 fi->fib_dead = 1;
171 fib_info_put(fi);
172 }
173 write_unlock(&fib_info_lock);
174 }
175
176 static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
177 {
178 const struct fib_nh *onh = ofi->fib_nh;
179
180 for_nexthops(fi) {
181 if (nh->nh_oif != onh->nh_oif ||
182 nh->nh_gw != onh->nh_gw ||
183 nh->nh_scope != onh->nh_scope ||
184 #ifdef CONFIG_IP_ROUTE_MULTIPATH
185 nh->nh_weight != onh->nh_weight ||
186 #endif
187 #ifdef CONFIG_NET_CLS_ROUTE
188 nh->nh_tclassid != onh->nh_tclassid ||
189 #endif
190 ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD))
191 return -1;
192 onh++;
193 } endfor_nexthops(fi);
194 return 0;
195 }
196
197 static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
198 {
199 unsigned int mask = (fib_hash_size - 1);
200 unsigned int val = fi->fib_nhs;
201
202 val ^= fi->fib_protocol;
203 val ^= fi->fib_prefsrc;
204 val ^= fi->fib_priority;
205
206 return (val ^ (val >> 7) ^ (val >> 12)) & mask;
207 }
208
209 static struct fib_info *fib_find_info(const struct fib_info *nfi)
210 {
211 struct hlist_head *head;
212 struct hlist_node *node;
213 struct fib_info *fi;
214 unsigned int hash;
215
216 hash = fib_info_hashfn(nfi);
217 head = &fib_info_hash[hash];
218
219 hlist_for_each_entry(fi, node, head, fib_hash) {
220 if (fi->fib_nhs != nfi->fib_nhs)
221 continue;
222 if (nfi->fib_protocol == fi->fib_protocol &&
223 nfi->fib_prefsrc == fi->fib_prefsrc &&
224 nfi->fib_priority == fi->fib_priority &&
225 memcmp(nfi->fib_metrics, fi->fib_metrics,
226 sizeof(fi->fib_metrics)) == 0 &&
227 ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 &&
228 (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
229 return fi;
230 }
231
232 return NULL;
233 }
234
235 static inline unsigned int fib_devindex_hashfn(unsigned int val)
236 {
237 unsigned int mask = DEVINDEX_HASHSIZE - 1;
238
239 return (val ^
240 (val >> DEVINDEX_HASHBITS) ^
241 (val >> (DEVINDEX_HASHBITS * 2))) & mask;
242 }
243
244 /* Check, that the gateway is already configured.
245 Used only by redirect accept routine.
246 */
247
248 int ip_fib_check_default(u32 gw, struct net_device *dev)
249 {
250 struct hlist_head *head;
251 struct hlist_node *node;
252 struct fib_nh *nh;
253 unsigned int hash;
254
255 read_lock(&fib_info_lock);
256
257 hash = fib_devindex_hashfn(dev->ifindex);
258 head = &fib_info_devhash[hash];
259 hlist_for_each_entry(nh, node, head, nh_hash) {
260 if (nh->nh_dev == dev &&
261 nh->nh_gw == gw &&
262 !(nh->nh_flags&RTNH_F_DEAD)) {
263 read_unlock(&fib_info_lock);
264 return 0;
265 }
266 }
267
268 read_unlock(&fib_info_lock);
269
270 return -1;
271 }
272
273 void rtmsg_fib(int event, u32 key, struct fib_alias *fa,
274 int z, int tb_id,
275 struct nlmsghdr *n, struct netlink_skb_parms *req)
276 {
277 struct sk_buff *skb;
278 u32 pid = req ? req->pid : 0;
279 int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
280
281 skb = alloc_skb(size, GFP_KERNEL);
282 if (!skb)
283 return;
284
285 if (fib_dump_info(skb, pid, n->nlmsg_seq, event, tb_id,
286 fa->fa_type, fa->fa_scope, &key, z,
287 fa->fa_tos,
288 fa->fa_info) < 0) {
289 kfree_skb(skb);
290 return;
291 }
292 NETLINK_CB(skb).dst_groups = RTMGRP_IPV4_ROUTE;
293 if (n->nlmsg_flags&NLM_F_ECHO)
294 atomic_inc(&skb->users);
295 netlink_broadcast(rtnl, skb, pid, RTMGRP_IPV4_ROUTE, GFP_KERNEL);
296 if (n->nlmsg_flags&NLM_F_ECHO)
297 netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT);
298 }
299
300 /* Return the first fib alias matching TOS with
301 * priority less than or equal to PRIO.
302 */
303 struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio)
304 {
305 if (fah) {
306 struct fib_alias *fa;
307 list_for_each_entry(fa, fah, fa_list) {
308 if (fa->fa_tos > tos)
309 continue;
310 if (fa->fa_info->fib_priority >= prio ||
311 fa->fa_tos < tos)
312 return fa;
313 }
314 }
315 return NULL;
316 }
317
318 int fib_detect_death(struct fib_info *fi, int order,
319 struct fib_info **last_resort, int *last_idx, int *dflt)
320 {
321 struct neighbour *n;
322 int state = NUD_NONE;
323
324 n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev);
325 if (n) {
326 state = n->nud_state;
327 neigh_release(n);
328 }
329 if (state==NUD_REACHABLE)
330 return 0;
331 if ((state&NUD_VALID) && order != *dflt)
332 return 0;
333 if ((state&NUD_VALID) ||
334 (*last_idx<0 && order > *dflt)) {
335 *last_resort = fi;
336 *last_idx = order;
337 }
338 return 1;
339 }
340
341 #ifdef CONFIG_IP_ROUTE_MULTIPATH
342
343 static u32 fib_get_attr32(struct rtattr *attr, int attrlen, int type)
344 {
345 while (RTA_OK(attr,attrlen)) {
346 if (attr->rta_type == type)
347 return *(u32*)RTA_DATA(attr);
348 attr = RTA_NEXT(attr, attrlen);
349 }
350 return 0;
351 }
352
353 static int
354 fib_count_nexthops(struct rtattr *rta)
355 {
356 int nhs = 0;
357 struct rtnexthop *nhp = RTA_DATA(rta);
358 int nhlen = RTA_PAYLOAD(rta);
359
360 while (nhlen >= (int)sizeof(struct rtnexthop)) {
361 if ((nhlen -= nhp->rtnh_len) < 0)
362 return 0;
363 nhs++;
364 nhp = RTNH_NEXT(nhp);
365 };
366 return nhs;
367 }
368
369 static int
370 fib_get_nhs(struct fib_info *fi, const struct rtattr *rta, const struct rtmsg *r)
371 {
372 struct rtnexthop *nhp = RTA_DATA(rta);
373 int nhlen = RTA_PAYLOAD(rta);
374
375 change_nexthops(fi) {
376 int attrlen = nhlen - sizeof(struct rtnexthop);
377 if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
378 return -EINVAL;
379 nh->nh_flags = (r->rtm_flags&~0xFF) | nhp->rtnh_flags;
380 nh->nh_oif = nhp->rtnh_ifindex;
381 nh->nh_weight = nhp->rtnh_hops + 1;
382 if (attrlen) {
383 nh->nh_gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY);
384 #ifdef CONFIG_NET_CLS_ROUTE
385 nh->nh_tclassid = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW);
386 #endif
387 }
388 nhp = RTNH_NEXT(nhp);
389 } endfor_nexthops(fi);
390 return 0;
391 }
392
393 #endif
394
395 int fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct kern_rta *rta,
396 struct fib_info *fi)
397 {
398 #ifdef CONFIG_IP_ROUTE_MULTIPATH
399 struct rtnexthop *nhp;
400 int nhlen;
401 #endif
402
403 if (rta->rta_priority &&
404 *rta->rta_priority != fi->fib_priority)
405 return 1;
406
407 if (rta->rta_oif || rta->rta_gw) {
408 if ((!rta->rta_oif || *rta->rta_oif == fi->fib_nh->nh_oif) &&
409 (!rta->rta_gw || memcmp(rta->rta_gw, &fi->fib_nh->nh_gw, 4) == 0))
410 return 0;
411 return 1;
412 }
413
414 #ifdef CONFIG_IP_ROUTE_MULTIPATH
415 if (rta->rta_mp == NULL)
416 return 0;
417 nhp = RTA_DATA(rta->rta_mp);
418 nhlen = RTA_PAYLOAD(rta->rta_mp);
419
420 for_nexthops(fi) {
421 int attrlen = nhlen - sizeof(struct rtnexthop);
422 u32 gw;
423
424 if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
425 return -EINVAL;
426 if (nhp->rtnh_ifindex && nhp->rtnh_ifindex != nh->nh_oif)
427 return 1;
428 if (attrlen) {
429 gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY);
430 if (gw && gw != nh->nh_gw)
431 return 1;
432 #ifdef CONFIG_NET_CLS_ROUTE
433 gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW);
434 if (gw && gw != nh->nh_tclassid)
435 return 1;
436 #endif
437 }
438 nhp = RTNH_NEXT(nhp);
439 } endfor_nexthops(fi);
440 #endif
441 return 0;
442 }
443
444
445 /*
446 Picture
447 -------
448
449 Semantics of nexthop is very messy by historical reasons.
450 We have to take into account, that:
451 a) gateway can be actually local interface address,
452 so that gatewayed route is direct.
453 b) gateway must be on-link address, possibly
454 described not by an ifaddr, but also by a direct route.
455 c) If both gateway and interface are specified, they should not
456 contradict.
457 d) If we use tunnel routes, gateway could be not on-link.
458
459 Attempt to reconcile all of these (alas, self-contradictory) conditions
460 results in pretty ugly and hairy code with obscure logic.
461
462 I chose to generalized it instead, so that the size
463 of code does not increase practically, but it becomes
464 much more general.
465 Every prefix is assigned a "scope" value: "host" is local address,
466 "link" is direct route,
467 [ ... "site" ... "interior" ... ]
468 and "universe" is true gateway route with global meaning.
469
470 Every prefix refers to a set of "nexthop"s (gw, oif),
471 where gw must have narrower scope. This recursion stops
472 when gw has LOCAL scope or if "nexthop" is declared ONLINK,
473 which means that gw is forced to be on link.
474
475 Code is still hairy, but now it is apparently logically
476 consistent and very flexible. F.e. as by-product it allows
477 to co-exists in peace independent exterior and interior
478 routing processes.
479
480 Normally it looks as following.
481
482 {universe prefix} -> (gw, oif) [scope link]
483 |
484 |-> {link prefix} -> (gw, oif) [scope local]
485 |
486 |-> {local prefix} (terminal node)
487 */
488
489 static int fib_check_nh(const struct rtmsg *r, struct fib_info *fi, struct fib_nh *nh)
490 {
491 int err;
492
493 if (nh->nh_gw) {
494 struct fib_result res;
495
496 #ifdef CONFIG_IP_ROUTE_PERVASIVE
497 if (nh->nh_flags&RTNH_F_PERVASIVE)
498 return 0;
499 #endif
500 if (nh->nh_flags&RTNH_F_ONLINK) {
501 struct net_device *dev;
502
503 if (r->rtm_scope >= RT_SCOPE_LINK)
504 return -EINVAL;
505 if (inet_addr_type(nh->nh_gw) != RTN_UNICAST)
506 return -EINVAL;
507 if ((dev = __dev_get_by_index(nh->nh_oif)) == NULL)
508 return -ENODEV;
509 if (!(dev->flags&IFF_UP))
510 return -ENETDOWN;
511 nh->nh_dev = dev;
512 dev_hold(dev);
513 nh->nh_scope = RT_SCOPE_LINK;
514 return 0;
515 }
516 {
517 struct flowi fl = { .nl_u = { .ip4_u =
518 { .daddr = nh->nh_gw,
519 .scope = r->rtm_scope + 1 } },
520 .oif = nh->nh_oif };
521
522 /* It is not necessary, but requires a bit of thinking */
523 if (fl.fl4_scope < RT_SCOPE_LINK)
524 fl.fl4_scope = RT_SCOPE_LINK;
525 if ((err = fib_lookup(&fl, &res)) != 0)
526 return err;
527 }
528 err = -EINVAL;
529 if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
530 goto out;
531 nh->nh_scope = res.scope;
532 nh->nh_oif = FIB_RES_OIF(res);
533 if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL)
534 goto out;
535 dev_hold(nh->nh_dev);
536 err = -ENETDOWN;
537 if (!(nh->nh_dev->flags & IFF_UP))
538 goto out;
539 err = 0;
540 out:
541 fib_res_put(&res);
542 return err;
543 } else {
544 struct in_device *in_dev;
545
546 if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK))
547 return -EINVAL;
548
549 in_dev = inetdev_by_index(nh->nh_oif);
550 if (in_dev == NULL)
551 return -ENODEV;
552 if (!(in_dev->dev->flags&IFF_UP)) {
553 in_dev_put(in_dev);
554 return -ENETDOWN;
555 }
556 nh->nh_dev = in_dev->dev;
557 dev_hold(nh->nh_dev);
558 nh->nh_scope = RT_SCOPE_HOST;
559 in_dev_put(in_dev);
560 }
561 return 0;
562 }
563
564 static inline unsigned int fib_laddr_hashfn(u32 val)
565 {
566 unsigned int mask = (fib_hash_size - 1);
567
568 return (val ^ (val >> 7) ^ (val >> 14)) & mask;
569 }
570
571 static struct hlist_head *fib_hash_alloc(int bytes)
572 {
573 if (bytes <= PAGE_SIZE)
574 return kmalloc(bytes, GFP_KERNEL);
575 else
576 return (struct hlist_head *)
577 __get_free_pages(GFP_KERNEL, get_order(bytes));
578 }
579
580 static void fib_hash_free(struct hlist_head *hash, int bytes)
581 {
582 if (!hash)
583 return;
584
585 if (bytes <= PAGE_SIZE)
586 kfree(hash);
587 else
588 free_pages((unsigned long) hash, get_order(bytes));
589 }
590
591 static void fib_hash_move(struct hlist_head *new_info_hash,
592 struct hlist_head *new_laddrhash,
593 unsigned int new_size)
594 {
595 unsigned int old_size = fib_hash_size;
596 unsigned int i;
597
598 write_lock(&fib_info_lock);
599 fib_hash_size = new_size;
600
601 for (i = 0; i < old_size; i++) {
602 struct hlist_head *head = &fib_info_hash[i];
603 struct hlist_node *node, *n;
604 struct fib_info *fi;
605
606 hlist_for_each_entry_safe(fi, node, n, head, fib_hash) {
607 struct hlist_head *dest;
608 unsigned int new_hash;
609
610 hlist_del(&fi->fib_hash);
611
612 new_hash = fib_info_hashfn(fi);
613 dest = &new_info_hash[new_hash];
614 hlist_add_head(&fi->fib_hash, dest);
615 }
616 }
617 fib_info_hash = new_info_hash;
618
619 for (i = 0; i < old_size; i++) {
620 struct hlist_head *lhead = &fib_info_laddrhash[i];
621 struct hlist_node *node, *n;
622 struct fib_info *fi;
623
624 hlist_for_each_entry_safe(fi, node, n, lhead, fib_lhash) {
625 struct hlist_head *ldest;
626 unsigned int new_hash;
627
628 hlist_del(&fi->fib_lhash);
629
630 new_hash = fib_laddr_hashfn(fi->fib_prefsrc);
631 ldest = &new_laddrhash[new_hash];
632 hlist_add_head(&fi->fib_lhash, ldest);
633 }
634 }
635 fib_info_laddrhash = new_laddrhash;
636
637 write_unlock(&fib_info_lock);
638 }
639
640 struct fib_info *
641 fib_create_info(const struct rtmsg *r, struct kern_rta *rta,
642 const struct nlmsghdr *nlh, int *errp)
643 {
644 int err;
645 struct fib_info *fi = NULL;
646 struct fib_info *ofi;
647 #ifdef CONFIG_IP_ROUTE_MULTIPATH
648 int nhs = 1;
649 #else
650 const int nhs = 1;
651 #endif
652
653 /* Fast check to catch the most weird cases */
654 if (fib_props[r->rtm_type].scope > r->rtm_scope)
655 goto err_inval;
656
657 #ifdef CONFIG_IP_ROUTE_MULTIPATH
658 if (rta->rta_mp) {
659 nhs = fib_count_nexthops(rta->rta_mp);
660 if (nhs == 0)
661 goto err_inval;
662 }
663 #endif
664
665 err = -ENOBUFS;
666 if (fib_info_cnt >= fib_hash_size) {
667 unsigned int new_size = fib_hash_size << 1;
668 struct hlist_head *new_info_hash;
669 struct hlist_head *new_laddrhash;
670 unsigned int bytes;
671
672 if (!new_size)
673 new_size = 1;
674 bytes = new_size * sizeof(struct hlist_head *);
675 new_info_hash = fib_hash_alloc(bytes);
676 new_laddrhash = fib_hash_alloc(bytes);
677 if (!new_info_hash || !new_laddrhash) {
678 fib_hash_free(new_info_hash, bytes);
679 fib_hash_free(new_laddrhash, bytes);
680 } else {
681 memset(new_info_hash, 0, bytes);
682 memset(new_laddrhash, 0, bytes);
683
684 fib_hash_move(new_info_hash, new_laddrhash, new_size);
685 }
686
687 if (!fib_hash_size)
688 goto failure;
689 }
690
691 fi = kmalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
692 if (fi == NULL)
693 goto failure;
694 fib_info_cnt++;
695 memset(fi, 0, sizeof(*fi)+nhs*sizeof(struct fib_nh));
696
697 fi->fib_protocol = r->rtm_protocol;
698
699 fi->fib_nhs = nhs;
700 change_nexthops(fi) {
701 nh->nh_parent = fi;
702 } endfor_nexthops(fi)
703
704 fi->fib_flags = r->rtm_flags;
705 if (rta->rta_priority)
706 fi->fib_priority = *rta->rta_priority;
707 if (rta->rta_mx) {
708 int attrlen = RTA_PAYLOAD(rta->rta_mx);
709 struct rtattr *attr = RTA_DATA(rta->rta_mx);
710
711 while (RTA_OK(attr, attrlen)) {
712 unsigned flavor = attr->rta_type;
713 if (flavor) {
714 if (flavor > RTAX_MAX)
715 goto err_inval;
716 fi->fib_metrics[flavor-1] = *(unsigned*)RTA_DATA(attr);
717 }
718 attr = RTA_NEXT(attr, attrlen);
719 }
720 }
721 if (rta->rta_prefsrc)
722 memcpy(&fi->fib_prefsrc, rta->rta_prefsrc, 4);
723
724 if (rta->rta_mp) {
725 #ifdef CONFIG_IP_ROUTE_MULTIPATH
726 if ((err = fib_get_nhs(fi, rta->rta_mp, r)) != 0)
727 goto failure;
728 if (rta->rta_oif && fi->fib_nh->nh_oif != *rta->rta_oif)
729 goto err_inval;
730 if (rta->rta_gw && memcmp(&fi->fib_nh->nh_gw, rta->rta_gw, 4))
731 goto err_inval;
732 #ifdef CONFIG_NET_CLS_ROUTE
733 if (rta->rta_flow && memcmp(&fi->fib_nh->nh_tclassid, rta->rta_flow, 4))
734 goto err_inval;
735 #endif
736 #else
737 goto err_inval;
738 #endif
739 } else {
740 struct fib_nh *nh = fi->fib_nh;
741 if (rta->rta_oif)
742 nh->nh_oif = *rta->rta_oif;
743 if (rta->rta_gw)
744 memcpy(&nh->nh_gw, rta->rta_gw, 4);
745 #ifdef CONFIG_NET_CLS_ROUTE
746 if (rta->rta_flow)
747 memcpy(&nh->nh_tclassid, rta->rta_flow, 4);
748 #endif
749 nh->nh_flags = r->rtm_flags;
750 #ifdef CONFIG_IP_ROUTE_MULTIPATH
751 nh->nh_weight = 1;
752 #endif
753 }
754
755 if (fib_props[r->rtm_type].error) {
756 if (rta->rta_gw || rta->rta_oif || rta->rta_mp)
757 goto err_inval;
758 goto link_it;
759 }
760
761 if (r->rtm_scope > RT_SCOPE_HOST)
762 goto err_inval;
763
764 if (r->rtm_scope == RT_SCOPE_HOST) {
765 struct fib_nh *nh = fi->fib_nh;
766
767 /* Local address is added. */
768 if (nhs != 1 || nh->nh_gw)
769 goto err_inval;
770 nh->nh_scope = RT_SCOPE_NOWHERE;
771 nh->nh_dev = dev_get_by_index(fi->fib_nh->nh_oif);
772 err = -ENODEV;
773 if (nh->nh_dev == NULL)
774 goto failure;
775 } else {
776 change_nexthops(fi) {
777 if ((err = fib_check_nh(r, fi, nh)) != 0)
778 goto failure;
779 } endfor_nexthops(fi)
780 }
781
782 if (fi->fib_prefsrc) {
783 if (r->rtm_type != RTN_LOCAL || rta->rta_dst == NULL ||
784 memcmp(&fi->fib_prefsrc, rta->rta_dst, 4))
785 if (inet_addr_type(fi->fib_prefsrc) != RTN_LOCAL)
786 goto err_inval;
787 }
788
789 link_it:
790 if ((ofi = fib_find_info(fi)) != NULL) {
791 fi->fib_dead = 1;
792 free_fib_info(fi);
793 ofi->fib_treeref++;
794 return ofi;
795 }
796
797 fi->fib_treeref++;
798 atomic_inc(&fi->fib_clntref);
799 write_lock(&fib_info_lock);
800 hlist_add_head(&fi->fib_hash,
801 &fib_info_hash[fib_info_hashfn(fi)]);
802 if (fi->fib_prefsrc) {
803 struct hlist_head *head;
804
805 head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)];
806 hlist_add_head(&fi->fib_lhash, head);
807 }
808 change_nexthops(fi) {
809 struct hlist_head *head;
810 unsigned int hash;
811
812 if (!nh->nh_dev)
813 continue;
814 hash = fib_devindex_hashfn(nh->nh_dev->ifindex);
815 head = &fib_info_devhash[hash];
816 hlist_add_head(&nh->nh_hash, head);
817 } endfor_nexthops(fi)
818 write_unlock(&fib_info_lock);
819 return fi;
820
821 err_inval:
822 err = -EINVAL;
823
824 failure:
825 *errp = err;
826 if (fi) {
827 fi->fib_dead = 1;
828 free_fib_info(fi);
829 }
830 return NULL;
831 }
832
833 int fib_semantic_match(struct list_head *head, const struct flowi *flp,
834 struct fib_result *res, int prefixlen)
835 {
836 struct fib_alias *fa;
837 int nh_sel = 0;
838
839 list_for_each_entry(fa, head, fa_list) {
840 int err;
841
842 if (fa->fa_tos &&
843 fa->fa_tos != flp->fl4_tos)
844 continue;
845
846 if (fa->fa_scope < flp->fl4_scope)
847 continue;
848
849 fa->fa_state |= FA_S_ACCESSED;
850
851 err = fib_props[fa->fa_type].error;
852 if (err == 0) {
853 struct fib_info *fi = fa->fa_info;
854
855 if (fi->fib_flags & RTNH_F_DEAD)
856 continue;
857
858 switch (fa->fa_type) {
859 case RTN_UNICAST:
860 case RTN_LOCAL:
861 case RTN_BROADCAST:
862 case RTN_ANYCAST:
863 case RTN_MULTICAST:
864 for_nexthops(fi) {
865 if (nh->nh_flags&RTNH_F_DEAD)
866 continue;
867 if (!flp->oif || flp->oif == nh->nh_oif)
868 break;
869 }
870 #ifdef CONFIG_IP_ROUTE_MULTIPATH
871 if (nhsel < fi->fib_nhs) {
872 nh_sel = nhsel;
873 goto out_fill_res;
874 }
875 #else
876 if (nhsel < 1) {
877 goto out_fill_res;
878 }
879 #endif
880 endfor_nexthops(fi);
881 continue;
882
883 default:
884 printk(KERN_DEBUG "impossible 102\n");
885 return -EINVAL;
886 };
887 }
888 return err;
889 }
890 return 1;
891
892 out_fill_res:
893 res->prefixlen = prefixlen;
894 res->nh_sel = nh_sel;
895 res->type = fa->fa_type;
896 res->scope = fa->fa_scope;
897 res->fi = fa->fa_info;
898 atomic_inc(&res->fi->fib_clntref);
899 return 0;
900 }
901
902 /* Find appropriate source address to this destination */
903
904 u32 __fib_res_prefsrc(struct fib_result *res)
905 {
906 return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope);
907 }
908
909 int
910 fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
911 u8 tb_id, u8 type, u8 scope, void *dst, int dst_len, u8 tos,
912 struct fib_info *fi)
913 {
914 struct rtmsg *rtm;
915 struct nlmsghdr *nlh;
916 unsigned char *b = skb->tail;
917
918 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*rtm));
919 rtm = NLMSG_DATA(nlh);
920 rtm->rtm_family = AF_INET;
921 rtm->rtm_dst_len = dst_len;
922 rtm->rtm_src_len = 0;
923 rtm->rtm_tos = tos;
924 rtm->rtm_table = tb_id;
925 rtm->rtm_type = type;
926 rtm->rtm_flags = fi->fib_flags;
927 rtm->rtm_scope = scope;
928 if (rtm->rtm_dst_len)
929 RTA_PUT(skb, RTA_DST, 4, dst);
930 rtm->rtm_protocol = fi->fib_protocol;
931 if (fi->fib_priority)
932 RTA_PUT(skb, RTA_PRIORITY, 4, &fi->fib_priority);
933 #ifdef CONFIG_NET_CLS_ROUTE
934 if (fi->fib_nh[0].nh_tclassid)
935 RTA_PUT(skb, RTA_FLOW, 4, &fi->fib_nh[0].nh_tclassid);
936 #endif
937 if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
938 goto rtattr_failure;
939 if (fi->fib_prefsrc)
940 RTA_PUT(skb, RTA_PREFSRC, 4, &fi->fib_prefsrc);
941 if (fi->fib_nhs == 1) {
942 if (fi->fib_nh->nh_gw)
943 RTA_PUT(skb, RTA_GATEWAY, 4, &fi->fib_nh->nh_gw);
944 if (fi->fib_nh->nh_oif)
945 RTA_PUT(skb, RTA_OIF, sizeof(int), &fi->fib_nh->nh_oif);
946 }
947 #ifdef CONFIG_IP_ROUTE_MULTIPATH
948 if (fi->fib_nhs > 1) {
949 struct rtnexthop *nhp;
950 struct rtattr *mp_head;
951 if (skb_tailroom(skb) <= RTA_SPACE(0))
952 goto rtattr_failure;
953 mp_head = (struct rtattr*)skb_put(skb, RTA_SPACE(0));
954
955 for_nexthops(fi) {
956 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
957 goto rtattr_failure;
958 nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
959 nhp->rtnh_flags = nh->nh_flags & 0xFF;
960 nhp->rtnh_hops = nh->nh_weight-1;
961 nhp->rtnh_ifindex = nh->nh_oif;
962 if (nh->nh_gw)
963 RTA_PUT(skb, RTA_GATEWAY, 4, &nh->nh_gw);
964 nhp->rtnh_len = skb->tail - (unsigned char*)nhp;
965 } endfor_nexthops(fi);
966 mp_head->rta_type = RTA_MULTIPATH;
967 mp_head->rta_len = skb->tail - (u8*)mp_head;
968 }
969 #endif
970 nlh->nlmsg_len = skb->tail - b;
971 return skb->len;
972
973 nlmsg_failure:
974 rtattr_failure:
975 skb_trim(skb, b - skb->data);
976 return -1;
977 }
978
979 #ifndef CONFIG_IP_NOSIOCRT
980
981 int
982 fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm,
983 struct kern_rta *rta, struct rtentry *r)
984 {
985 int plen;
986 u32 *ptr;
987
988 memset(rtm, 0, sizeof(*rtm));
989 memset(rta, 0, sizeof(*rta));
990
991 if (r->rt_dst.sa_family != AF_INET)
992 return -EAFNOSUPPORT;
993
994 /* Check mask for validity:
995 a) it must be contiguous.
996 b) destination must have all host bits clear.
997 c) if application forgot to set correct family (AF_INET),
998 reject request unless it is absolutely clear i.e.
999 both family and mask are zero.
1000 */
1001 plen = 32;
1002 ptr = &((struct sockaddr_in*)&r->rt_dst)->sin_addr.s_addr;
1003 if (!(r->rt_flags&RTF_HOST)) {
1004 u32 mask = ((struct sockaddr_in*)&r->rt_genmask)->sin_addr.s_addr;
1005 if (r->rt_genmask.sa_family != AF_INET) {
1006 if (mask || r->rt_genmask.sa_family)
1007 return -EAFNOSUPPORT;
1008 }
1009 if (bad_mask(mask, *ptr))
1010 return -EINVAL;
1011 plen = inet_mask_len(mask);
1012 }
1013
1014 nl->nlmsg_flags = NLM_F_REQUEST;
1015 nl->nlmsg_pid = 0;
1016 nl->nlmsg_seq = 0;
1017 nl->nlmsg_len = NLMSG_LENGTH(sizeof(*rtm));
1018 if (cmd == SIOCDELRT) {
1019 nl->nlmsg_type = RTM_DELROUTE;
1020 nl->nlmsg_flags = 0;
1021 } else {
1022 nl->nlmsg_type = RTM_NEWROUTE;
1023 nl->nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE;
1024 rtm->rtm_protocol = RTPROT_BOOT;
1025 }
1026
1027 rtm->rtm_dst_len = plen;
1028 rta->rta_dst = ptr;
1029
1030 if (r->rt_metric) {
1031 *(u32*)&r->rt_pad3 = r->rt_metric - 1;
1032 rta->rta_priority = (u32*)&r->rt_pad3;
1033 }
1034 if (r->rt_flags&RTF_REJECT) {
1035 rtm->rtm_scope = RT_SCOPE_HOST;
1036 rtm->rtm_type = RTN_UNREACHABLE;
1037 return 0;
1038 }
1039 rtm->rtm_scope = RT_SCOPE_NOWHERE;
1040 rtm->rtm_type = RTN_UNICAST;
1041
1042 if (r->rt_dev) {
1043 char *colon;
1044 struct net_device *dev;
1045 char devname[IFNAMSIZ];
1046
1047 if (copy_from_user(devname, r->rt_dev, IFNAMSIZ-1))
1048 return -EFAULT;
1049 devname[IFNAMSIZ-1] = 0;
1050 colon = strchr(devname, ':');
1051 if (colon)
1052 *colon = 0;
1053 dev = __dev_get_by_name(devname);
1054 if (!dev)
1055 return -ENODEV;
1056 rta->rta_oif = &dev->ifindex;
1057 if (colon) {
1058 struct in_ifaddr *ifa;
1059 struct in_device *in_dev = __in_dev_get(dev);
1060 if (!in_dev)
1061 return -ENODEV;
1062 *colon = ':';
1063 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
1064 if (strcmp(ifa->ifa_label, devname) == 0)
1065 break;
1066 if (ifa == NULL)
1067 return -ENODEV;
1068 rta->rta_prefsrc = &ifa->ifa_local;
1069 }
1070 }
1071
1072 ptr = &((struct sockaddr_in*)&r->rt_gateway)->sin_addr.s_addr;
1073 if (r->rt_gateway.sa_family == AF_INET && *ptr) {
1074 rta->rta_gw = ptr;
1075 if (r->rt_flags&RTF_GATEWAY && inet_addr_type(*ptr) == RTN_UNICAST)
1076 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1077 }
1078
1079 if (cmd == SIOCDELRT)
1080 return 0;
1081
1082 if (r->rt_flags&RTF_GATEWAY && rta->rta_gw == NULL)
1083 return -EINVAL;
1084
1085 if (rtm->rtm_scope == RT_SCOPE_NOWHERE)
1086 rtm->rtm_scope = RT_SCOPE_LINK;
1087
1088 if (r->rt_flags&(RTF_MTU|RTF_WINDOW|RTF_IRTT)) {
1089 struct rtattr *rec;
1090 struct rtattr *mx = kmalloc(RTA_LENGTH(3*RTA_LENGTH(4)), GFP_KERNEL);
1091 if (mx == NULL)
1092 return -ENOMEM;
1093 rta->rta_mx = mx;
1094 mx->rta_type = RTA_METRICS;
1095 mx->rta_len = RTA_LENGTH(0);
1096 if (r->rt_flags&RTF_MTU) {
1097 rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
1098 rec->rta_type = RTAX_ADVMSS;
1099 rec->rta_len = RTA_LENGTH(4);
1100 mx->rta_len += RTA_LENGTH(4);
1101 *(u32*)RTA_DATA(rec) = r->rt_mtu - 40;
1102 }
1103 if (r->rt_flags&RTF_WINDOW) {
1104 rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
1105 rec->rta_type = RTAX_WINDOW;
1106 rec->rta_len = RTA_LENGTH(4);
1107 mx->rta_len += RTA_LENGTH(4);
1108 *(u32*)RTA_DATA(rec) = r->rt_window;
1109 }
1110 if (r->rt_flags&RTF_IRTT) {
1111 rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
1112 rec->rta_type = RTAX_RTT;
1113 rec->rta_len = RTA_LENGTH(4);
1114 mx->rta_len += RTA_LENGTH(4);
1115 *(u32*)RTA_DATA(rec) = r->rt_irtt<<3;
1116 }
1117 }
1118 return 0;
1119 }
1120
1121 #endif
1122
1123 /*
1124 Update FIB if:
1125 - local address disappeared -> we must delete all the entries
1126 referring to it.
1127 - device went down -> we must shutdown all nexthops going via it.
1128 */
1129
1130 int fib_sync_down(u32 local, struct net_device *dev, int force)
1131 {
1132 int ret = 0;
1133 int scope = RT_SCOPE_NOWHERE;
1134
1135 if (force)
1136 scope = -1;
1137
1138 if (local && fib_info_laddrhash) {
1139 unsigned int hash = fib_laddr_hashfn(local);
1140 struct hlist_head *head = &fib_info_laddrhash[hash];
1141 struct hlist_node *node;
1142 struct fib_info *fi;
1143
1144 hlist_for_each_entry(fi, node, head, fib_lhash) {
1145 if (fi->fib_prefsrc == local) {
1146 fi->fib_flags |= RTNH_F_DEAD;
1147 ret++;
1148 }
1149 }
1150 }
1151
1152 if (dev) {
1153 struct fib_info *prev_fi = NULL;
1154 unsigned int hash = fib_devindex_hashfn(dev->ifindex);
1155 struct hlist_head *head = &fib_info_devhash[hash];
1156 struct hlist_node *node;
1157 struct fib_nh *nh;
1158
1159 hlist_for_each_entry(nh, node, head, nh_hash) {
1160 struct fib_info *fi = nh->nh_parent;
1161 int dead;
1162
1163 BUG_ON(!fi->fib_nhs);
1164 if (nh->nh_dev != dev || fi == prev_fi)
1165 continue;
1166 prev_fi = fi;
1167 dead = 0;
1168 change_nexthops(fi) {
1169 if (nh->nh_flags&RTNH_F_DEAD)
1170 dead++;
1171 else if (nh->nh_dev == dev &&
1172 nh->nh_scope != scope) {
1173 nh->nh_flags |= RTNH_F_DEAD;
1174 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1175 spin_lock_bh(&fib_multipath_lock);
1176 fi->fib_power -= nh->nh_power;
1177 nh->nh_power = 0;
1178 spin_unlock_bh(&fib_multipath_lock);
1179 #endif
1180 dead++;
1181 }
1182 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1183 if (force > 1 && nh->nh_dev == dev) {
1184 dead = fi->fib_nhs;
1185 break;
1186 }
1187 #endif
1188 } endfor_nexthops(fi)
1189 if (dead == fi->fib_nhs) {
1190 fi->fib_flags |= RTNH_F_DEAD;
1191 ret++;
1192 }
1193 }
1194 }
1195
1196 return ret;
1197 }
1198
1199 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1200
1201 /*
1202 Dead device goes up. We wake up dead nexthops.
1203 It takes sense only on multipath routes.
1204 */
1205
1206 int fib_sync_up(struct net_device *dev)
1207 {
1208 struct fib_info *prev_fi;
1209 unsigned int hash;
1210 struct hlist_head *head;
1211 struct hlist_node *node;
1212 struct fib_nh *nh;
1213 int ret;
1214
1215 if (!(dev->flags&IFF_UP))
1216 return 0;
1217
1218 prev_fi = NULL;
1219 hash = fib_devindex_hashfn(dev->ifindex);
1220 head = &fib_info_devhash[hash];
1221 ret = 0;
1222
1223 hlist_for_each_entry(nh, node, head, nh_hash) {
1224 struct fib_info *fi = nh->nh_parent;
1225 int alive;
1226
1227 BUG_ON(!fi->fib_nhs);
1228 if (nh->nh_dev != dev || fi == prev_fi)
1229 continue;
1230
1231 prev_fi = fi;
1232 alive = 0;
1233 change_nexthops(fi) {
1234 if (!(nh->nh_flags&RTNH_F_DEAD)) {
1235 alive++;
1236 continue;
1237 }
1238 if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP))
1239 continue;
1240 if (nh->nh_dev != dev || __in_dev_get(dev) == NULL)
1241 continue;
1242 alive++;
1243 spin_lock_bh(&fib_multipath_lock);
1244 nh->nh_power = 0;
1245 nh->nh_flags &= ~RTNH_F_DEAD;
1246 spin_unlock_bh(&fib_multipath_lock);
1247 } endfor_nexthops(fi)
1248
1249 if (alive > 0) {
1250 fi->fib_flags &= ~RTNH_F_DEAD;
1251 ret++;
1252 }
1253 }
1254
1255 return ret;
1256 }
1257
1258 /*
1259 The algorithm is suboptimal, but it provides really
1260 fair weighted route distribution.
1261 */
1262
1263 void fib_select_multipath(const struct flowi *flp, struct fib_result *res)
1264 {
1265 struct fib_info *fi = res->fi;
1266 int w;
1267
1268 spin_lock_bh(&fib_multipath_lock);
1269 if (fi->fib_power <= 0) {
1270 int power = 0;
1271 change_nexthops(fi) {
1272 if (!(nh->nh_flags&RTNH_F_DEAD)) {
1273 power += nh->nh_weight;
1274 nh->nh_power = nh->nh_weight;
1275 }
1276 } endfor_nexthops(fi);
1277 fi->fib_power = power;
1278 if (power <= 0) {
1279 spin_unlock_bh(&fib_multipath_lock);
1280 /* Race condition: route has just become dead. */
1281 res->nh_sel = 0;
1282 return;
1283 }
1284 }
1285
1286
1287 /* w should be random number [0..fi->fib_power-1],
1288 it is pretty bad approximation.
1289 */
1290
1291 w = jiffies % fi->fib_power;
1292
1293 change_nexthops(fi) {
1294 if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) {
1295 if ((w -= nh->nh_power) <= 0) {
1296 nh->nh_power--;
1297 fi->fib_power--;
1298 res->nh_sel = nhsel;
1299 spin_unlock_bh(&fib_multipath_lock);
1300 return;
1301 }
1302 }
1303 } endfor_nexthops(fi);
1304
1305 /* Race condition: route has just become dead. */
1306 res->nh_sel = 0;
1307 spin_unlock_bh(&fib_multipath_lock);
1308 }
1309 #endif
1310
|
This page was automatically generated by the
LXR engine.
|