1 /*
2 * IPv6 output functions
3 * Linux INET6 implementation
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * $Id: ip6_output.c,v 1.34 2002/02/01 22:01:04 davem Exp $
9 *
10 * Based on linux/net/ipv4/ip_output.c
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 * Changes:
18 * A.N.Kuznetsov : airthmetics in fragmentation.
19 * extension headers are implemented.
20 * route changes now work.
21 * ip6_forward does not confuse sniffers.
22 * etc.
23 *
24 * H. von Brand : Added missing #include <linux/string.h>
25 * Imran Patel : frag id should be in NBO
26 * Kazunori MIYAZAWA @USAGI
27 * : add ip6_append_data and related functions
28 * for datagram xmit
29 */
30
31 #include <linux/config.h>
32 #include <linux/errno.h>
33 #include <linux/types.h>
34 #include <linux/string.h>
35 #include <linux/socket.h>
36 #include <linux/net.h>
37 #include <linux/netdevice.h>
38 #include <linux/if_arp.h>
39 #include <linux/in6.h>
40 #include <linux/tcp.h>
41 #include <linux/route.h>
42
43 #include <linux/netfilter.h>
44 #include <linux/netfilter_ipv6.h>
45
46 #include <net/sock.h>
47 #include <net/snmp.h>
48
49 #include <net/ipv6.h>
50 #include <net/ndisc.h>
51 #include <net/protocol.h>
52 #include <net/ip6_route.h>
53 #include <net/addrconf.h>
54 #include <net/rawv6.h>
55 #include <net/icmp.h>
56 #include <net/xfrm.h>
57 #include <net/checksum.h>
58
59 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
60
61 static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *fhdr)
62 {
63 static u32 ipv6_fragmentation_id = 1;
64 static DEFINE_SPINLOCK(ip6_id_lock);
65
66 spin_lock_bh(&ip6_id_lock);
67 fhdr->identification = htonl(ipv6_fragmentation_id);
68 if (++ipv6_fragmentation_id == 0)
69 ipv6_fragmentation_id = 1;
70 spin_unlock_bh(&ip6_id_lock);
71 }
72
73 static inline int ip6_output_finish(struct sk_buff *skb)
74 {
75
76 struct dst_entry *dst = skb->dst;
77 struct hh_cache *hh = dst->hh;
78
79 if (hh) {
80 int hh_alen;
81
82 read_lock_bh(&hh->hh_lock);
83 hh_alen = HH_DATA_ALIGN(hh->hh_len);
84 memcpy(skb->data - hh_alen, hh->hh_data, hh_alen);
85 read_unlock_bh(&hh->hh_lock);
86 skb_push(skb, hh->hh_len);
87 return hh->hh_output(skb);
88 } else if (dst->neighbour)
89 return dst->neighbour->output(skb);
90
91 IP6_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
92 kfree_skb(skb);
93 return -EINVAL;
94
95 }
96
97 /* dev_loopback_xmit for use with netfilter. */
98 static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
99 {
100 newskb->mac.raw = newskb->data;
101 __skb_pull(newskb, newskb->nh.raw - newskb->data);
102 newskb->pkt_type = PACKET_LOOPBACK;
103 newskb->ip_summed = CHECKSUM_UNNECESSARY;
104 BUG_TRAP(newskb->dst);
105
106 netif_rx(newskb);
107 return 0;
108 }
109
110
111 static int ip6_output2(struct sk_buff *skb)
112 {
113 struct dst_entry *dst = skb->dst;
114 struct net_device *dev = dst->dev;
115
116 skb->protocol = htons(ETH_P_IPV6);
117 skb->dev = dev;
118
119 if (ipv6_addr_is_multicast(&skb->nh.ipv6h->daddr)) {
120 struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL;
121
122 if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) &&
123 ipv6_chk_mcast_addr(dev, &skb->nh.ipv6h->daddr,
124 &skb->nh.ipv6h->saddr)) {
125 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
126
127 /* Do not check for IFF_ALLMULTI; multicast routing
128 is not supported in any case.
129 */
130 if (newskb)
131 NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, newskb, NULL,
132 newskb->dev,
133 ip6_dev_loopback_xmit);
134
135 if (skb->nh.ipv6h->hop_limit == 0) {
136 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
137 kfree_skb(skb);
138 return 0;
139 }
140 }
141
142 IP6_INC_STATS(IPSTATS_MIB_OUTMCASTPKTS);
143 }
144
145 return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb,NULL, skb->dev,ip6_output_finish);
146 }
147
148 int ip6_output(struct sk_buff *skb)
149 {
150 if (skb->len > dst_pmtu(skb->dst))
151 return ip6_fragment(skb, ip6_output2);
152 else
153 return ip6_output2(skb);
154 }
155
156 #ifdef CONFIG_NETFILTER
157 int ip6_route_me_harder(struct sk_buff *skb)
158 {
159 struct ipv6hdr *iph = skb->nh.ipv6h;
160 struct dst_entry *dst;
161 struct flowi fl = {
162 .oif = skb->sk ? skb->sk->sk_bound_dev_if : 0,
163 .nl_u =
164 { .ip6_u =
165 { .daddr = iph->daddr,
166 .saddr = iph->saddr, } },
167 .proto = iph->nexthdr,
168 };
169
170 dst = ip6_route_output(skb->sk, &fl);
171
172 if (dst->error) {
173 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
174 LIMIT_NETDEBUG(
175 printk(KERN_DEBUG "ip6_route_me_harder: No more route.\n"));
176 dst_release(dst);
177 return -EINVAL;
178 }
179
180 /* Drop old route. */
181 dst_release(skb->dst);
182
183 skb->dst = dst;
184 return 0;
185 }
186 #endif
187
188 static inline int ip6_maybe_reroute(struct sk_buff *skb)
189 {
190 #ifdef CONFIG_NETFILTER
191 if (skb->nfcache & NFC_ALTERED){
192 if (ip6_route_me_harder(skb) != 0){
193 kfree_skb(skb);
194 return -EINVAL;
195 }
196 }
197 #endif /* CONFIG_NETFILTER */
198 return dst_output(skb);
199 }
200
201 /*
202 * xmit an sk_buff (used by TCP)
203 */
204
205 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
206 struct ipv6_txoptions *opt, int ipfragok)
207 {
208 struct ipv6_pinfo *np = sk ? inet6_sk(sk) : NULL;
209 struct in6_addr *first_hop = &fl->fl6_dst;
210 struct dst_entry *dst = skb->dst;
211 struct ipv6hdr *hdr;
212 u8 proto = fl->proto;
213 int seg_len = skb->len;
214 int hlimit;
215 u32 mtu;
216
217 if (opt) {
218 int head_room;
219
220 /* First: exthdrs may take lots of space (~8K for now)
221 MAX_HEADER is not enough.
222 */
223 head_room = opt->opt_nflen + opt->opt_flen;
224 seg_len += head_room;
225 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
226
227 if (skb_headroom(skb) < head_room) {
228 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
229 kfree_skb(skb);
230 skb = skb2;
231 if (skb == NULL) {
232 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
233 return -ENOBUFS;
234 }
235 if (sk)
236 skb_set_owner_w(skb, sk);
237 }
238 if (opt->opt_flen)
239 ipv6_push_frag_opts(skb, opt, &proto);
240 if (opt->opt_nflen)
241 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
242 }
243
244 hdr = skb->nh.ipv6h = (struct ipv6hdr*)skb_push(skb, sizeof(struct ipv6hdr));
245
246 /*
247 * Fill in the IPv6 header
248 */
249
250 *(u32*)hdr = htonl(0x60000000) | fl->fl6_flowlabel;
251 hlimit = -1;
252 if (np)
253 hlimit = np->hop_limit;
254 if (hlimit < 0)
255 hlimit = dst_metric(dst, RTAX_HOPLIMIT);
256
257 hdr->payload_len = htons(seg_len);
258 hdr->nexthdr = proto;
259 hdr->hop_limit = hlimit;
260
261 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
262 ipv6_addr_copy(&hdr->daddr, first_hop);
263
264 mtu = dst_pmtu(dst);
265 if ((skb->len <= mtu) || ipfragok) {
266 IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
267 return NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, ip6_maybe_reroute);
268 }
269
270 if (net_ratelimit())
271 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
272 skb->dev = dst->dev;
273 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
274 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
275 kfree_skb(skb);
276 return -EMSGSIZE;
277 }
278
279 /*
280 * To avoid extra problems ND packets are send through this
281 * routine. It's code duplication but I really want to avoid
282 * extra checks since ipv6_build_header is used by TCP (which
283 * is for us performance critical)
284 */
285
286 int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
287 struct in6_addr *saddr, struct in6_addr *daddr,
288 int proto, int len)
289 {
290 struct ipv6_pinfo *np = inet6_sk(sk);
291 struct ipv6hdr *hdr;
292 int totlen;
293
294 skb->protocol = htons(ETH_P_IPV6);
295 skb->dev = dev;
296
297 totlen = len + sizeof(struct ipv6hdr);
298
299 hdr = (struct ipv6hdr *) skb_put(skb, sizeof(struct ipv6hdr));
300 skb->nh.ipv6h = hdr;
301
302 *(u32*)hdr = htonl(0x60000000);
303
304 hdr->payload_len = htons(len);
305 hdr->nexthdr = proto;
306 hdr->hop_limit = np->hop_limit;
307
308 ipv6_addr_copy(&hdr->saddr, saddr);
309 ipv6_addr_copy(&hdr->daddr, daddr);
310
311 return 0;
312 }
313
314 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
315 {
316 struct ip6_ra_chain *ra;
317 struct sock *last = NULL;
318
319 read_lock(&ip6_ra_lock);
320 for (ra = ip6_ra_chain; ra; ra = ra->next) {
321 struct sock *sk = ra->sk;
322 if (sk && ra->sel == sel) {
323 if (last) {
324 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
325 if (skb2)
326 rawv6_rcv(last, skb2);
327 }
328 last = sk;
329 }
330 }
331
332 if (last) {
333 rawv6_rcv(last, skb);
334 read_unlock(&ip6_ra_lock);
335 return 1;
336 }
337 read_unlock(&ip6_ra_lock);
338 return 0;
339 }
340
341 static inline int ip6_forward_finish(struct sk_buff *skb)
342 {
343 return dst_output(skb);
344 }
345
346 int ip6_forward(struct sk_buff *skb)
347 {
348 struct dst_entry *dst = skb->dst;
349 struct ipv6hdr *hdr = skb->nh.ipv6h;
350 struct inet6_skb_parm *opt = IP6CB(skb);
351
352 if (ipv6_devconf.forwarding == 0)
353 goto error;
354
355 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
356 IP6_INC_STATS(IPSTATS_MIB_INDISCARDS);
357 goto drop;
358 }
359
360 skb->ip_summed = CHECKSUM_NONE;
361
362 /*
363 * We DO NOT make any processing on
364 * RA packets, pushing them to user level AS IS
365 * without ane WARRANTY that application will be able
366 * to interpret them. The reason is that we
367 * cannot make anything clever here.
368 *
369 * We are not end-node, so that if packet contains
370 * AH/ESP, we cannot make anything.
371 * Defragmentation also would be mistake, RA packets
372 * cannot be fragmented, because there is no warranty
373 * that different fragments will go along one path. --ANK
374 */
375 if (opt->ra) {
376 u8 *ptr = skb->nh.raw + opt->ra;
377 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
378 return 0;
379 }
380
381 /*
382 * check and decrement ttl
383 */
384 if (hdr->hop_limit <= 1) {
385 /* Force OUTPUT device used as source address */
386 skb->dev = dst->dev;
387 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
388 0, skb->dev);
389
390 kfree_skb(skb);
391 return -ETIMEDOUT;
392 }
393
394 if (!xfrm6_route_forward(skb)) {
395 IP6_INC_STATS(IPSTATS_MIB_INDISCARDS);
396 goto drop;
397 }
398
399 /* IPv6 specs say nothing about it, but it is clear that we cannot
400 send redirects to source routed frames.
401 */
402 if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0) {
403 struct in6_addr *target = NULL;
404 struct rt6_info *rt;
405 struct neighbour *n = dst->neighbour;
406
407 /*
408 * incoming and outgoing devices are the same
409 * send a redirect.
410 */
411
412 rt = (struct rt6_info *) dst;
413 if ((rt->rt6i_flags & RTF_GATEWAY))
414 target = (struct in6_addr*)&n->primary_key;
415 else
416 target = &hdr->daddr;
417
418 /* Limit redirects both by destination (here)
419 and by source (inside ndisc_send_redirect)
420 */
421 if (xrlim_allow(dst, 1*HZ))
422 ndisc_send_redirect(skb, n, target);
423 } else if (ipv6_addr_type(&hdr->saddr)&(IPV6_ADDR_MULTICAST|IPV6_ADDR_LOOPBACK
424 |IPV6_ADDR_LINKLOCAL)) {
425 /* This check is security critical. */
426 goto error;
427 }
428
429 if (skb->len > dst_pmtu(dst)) {
430 /* Again, force OUTPUT device used as source address */
431 skb->dev = dst->dev;
432 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_pmtu(dst), skb->dev);
433 IP6_INC_STATS_BH(IPSTATS_MIB_INTOOBIGERRORS);
434 IP6_INC_STATS_BH(IPSTATS_MIB_FRAGFAILS);
435 kfree_skb(skb);
436 return -EMSGSIZE;
437 }
438
439 if (skb_cow(skb, dst->dev->hard_header_len)) {
440 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
441 goto drop;
442 }
443
444 hdr = skb->nh.ipv6h;
445
446 /* Mangling hops number delayed to point after skb COW */
447
448 hdr->hop_limit--;
449
450 IP6_INC_STATS_BH(IPSTATS_MIB_OUTFORWDATAGRAMS);
451 return NF_HOOK(PF_INET6,NF_IP6_FORWARD, skb, skb->dev, dst->dev, ip6_forward_finish);
452
453 error:
454 IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS);
455 drop:
456 kfree_skb(skb);
457 return -EINVAL;
458 }
459
460 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
461 {
462 to->pkt_type = from->pkt_type;
463 to->priority = from->priority;
464 to->protocol = from->protocol;
465 to->security = from->security;
466 dst_release(to->dst);
467 to->dst = dst_clone(from->dst);
468 to->dev = from->dev;
469
470 #ifdef CONFIG_NET_SCHED
471 to->tc_index = from->tc_index;
472 #endif
473 #ifdef CONFIG_NETFILTER
474 to->nfmark = from->nfmark;
475 /* Connection association is same as pre-frag packet */
476 to->nfct = from->nfct;
477 nf_conntrack_get(to->nfct);
478 to->nfctinfo = from->nfctinfo;
479 #ifdef CONFIG_BRIDGE_NETFILTER
480 nf_bridge_put(to->nf_bridge);
481 to->nf_bridge = from->nf_bridge;
482 nf_bridge_get(to->nf_bridge);
483 #endif
484 #ifdef CONFIG_NETFILTER_DEBUG
485 to->nf_debug = from->nf_debug;
486 #endif
487 #endif
488 }
489
490 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
491 {
492 u16 offset = sizeof(struct ipv6hdr);
493 struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr*)(skb->nh.ipv6h + 1);
494 unsigned int packet_len = skb->tail - skb->nh.raw;
495 int found_rhdr = 0;
496 *nexthdr = &skb->nh.ipv6h->nexthdr;
497
498 while (offset + 1 <= packet_len) {
499
500 switch (**nexthdr) {
501
502 case NEXTHDR_HOP:
503 case NEXTHDR_ROUTING:
504 case NEXTHDR_DEST:
505 if (**nexthdr == NEXTHDR_ROUTING) found_rhdr = 1;
506 if (**nexthdr == NEXTHDR_DEST && found_rhdr) return offset;
507 offset += ipv6_optlen(exthdr);
508 *nexthdr = &exthdr->nexthdr;
509 exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
510 break;
511 default :
512 return offset;
513 }
514 }
515
516 return offset;
517 }
518
519 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
520 {
521 struct net_device *dev;
522 struct sk_buff *frag;
523 struct rt6_info *rt = (struct rt6_info*)skb->dst;
524 struct ipv6hdr *tmp_hdr;
525 struct frag_hdr *fh;
526 unsigned int mtu, hlen, left, len;
527 u32 frag_id = 0;
528 int ptr, offset = 0, err=0;
529 u8 *prevhdr, nexthdr = 0;
530
531 dev = rt->u.dst.dev;
532 hlen = ip6_find_1stfragopt(skb, &prevhdr);
533 nexthdr = *prevhdr;
534
535 mtu = dst_pmtu(&rt->u.dst) - hlen - sizeof(struct frag_hdr);
536
537 if (skb_shinfo(skb)->frag_list) {
538 int first_len = skb_pagelen(skb);
539
540 if (first_len - hlen > mtu ||
541 ((first_len - hlen) & 7) ||
542 skb_cloned(skb))
543 goto slow_path;
544
545 for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
546 /* Correct geometry. */
547 if (frag->len > mtu ||
548 ((frag->len & 7) && frag->next) ||
549 skb_headroom(frag) < hlen)
550 goto slow_path;
551
552 /* Correct socket ownership. */
553 if (frag->sk == NULL)
554 goto slow_path;
555
556 /* Partially cloned skb? */
557 if (skb_shared(frag))
558 goto slow_path;
559 }
560
561 err = 0;
562 offset = 0;
563 frag = skb_shinfo(skb)->frag_list;
564 skb_shinfo(skb)->frag_list = NULL;
565 /* BUILD HEADER */
566
567 tmp_hdr = kmalloc(hlen, GFP_ATOMIC);
568 if (!tmp_hdr) {
569 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
570 return -ENOMEM;
571 }
572
573 *prevhdr = NEXTHDR_FRAGMENT;
574 memcpy(tmp_hdr, skb->nh.raw, hlen);
575 __skb_pull(skb, hlen);
576 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
577 skb->nh.raw = __skb_push(skb, hlen);
578 memcpy(skb->nh.raw, tmp_hdr, hlen);
579
580 ipv6_select_ident(skb, fh);
581 fh->nexthdr = nexthdr;
582 fh->reserved = 0;
583 fh->frag_off = htons(IP6_MF);
584 frag_id = fh->identification;
585
586 first_len = skb_pagelen(skb);
587 skb->data_len = first_len - skb_headlen(skb);
588 skb->len = first_len;
589 skb->nh.ipv6h->payload_len = htons(first_len - sizeof(struct ipv6hdr));
590
591
592 for (;;) {
593 /* Prepare header of the next frame,
594 * before previous one went down. */
595 if (frag) {
596 frag->ip_summed = CHECKSUM_NONE;
597 frag->h.raw = frag->data;
598 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
599 frag->nh.raw = __skb_push(frag, hlen);
600 memcpy(frag->nh.raw, tmp_hdr, hlen);
601 offset += skb->len - hlen - sizeof(struct frag_hdr);
602 fh->nexthdr = nexthdr;
603 fh->reserved = 0;
604 fh->frag_off = htons(offset);
605 if (frag->next != NULL)
606 fh->frag_off |= htons(IP6_MF);
607 fh->identification = frag_id;
608 frag->nh.ipv6h->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
609 ip6_copy_metadata(frag, skb);
610 }
611
612 err = output(skb);
613 if (err || !frag)
614 break;
615
616 skb = frag;
617 frag = skb->next;
618 skb->next = NULL;
619 }
620
621 if (tmp_hdr)
622 kfree(tmp_hdr);
623
624 if (err == 0) {
625 IP6_INC_STATS(IPSTATS_MIB_FRAGOKS);
626 return 0;
627 }
628
629 while (frag) {
630 skb = frag->next;
631 kfree_skb(frag);
632 frag = skb;
633 }
634
635 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
636 return err;
637 }
638
639 slow_path:
640 left = skb->len - hlen; /* Space per frame */
641 ptr = hlen; /* Where to start from */
642
643 /*
644 * Fragment the datagram.
645 */
646
647 *prevhdr = NEXTHDR_FRAGMENT;
648
649 /*
650 * Keep copying data until we run out.
651 */
652 while(left > 0) {
653 len = left;
654 /* IF: it doesn't fit, use 'mtu' - the data space left */
655 if (len > mtu)
656 len = mtu;
657 /* IF: we are not sending upto and including the packet end
658 then align the next start on an eight byte boundary */
659 if (len < left) {
660 len &= ~7;
661 }
662 /*
663 * Allocate buffer.
664 */
665
666 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_RESERVED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
667 NETDEBUG(printk(KERN_INFO "IPv6: frag: no memory for new fragment!\n"));
668 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
669 err = -ENOMEM;
670 goto fail;
671 }
672
673 /*
674 * Set up data on packet
675 */
676
677 ip6_copy_metadata(frag, skb);
678 skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
679 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
680 frag->nh.raw = frag->data;
681 fh = (struct frag_hdr*)(frag->data + hlen);
682 frag->h.raw = frag->data + hlen + sizeof(struct frag_hdr);
683
684 /*
685 * Charge the memory for the fragment to any owner
686 * it might possess
687 */
688 if (skb->sk)
689 skb_set_owner_w(frag, skb->sk);
690
691 /*
692 * Copy the packet header into the new buffer.
693 */
694 memcpy(frag->nh.raw, skb->data, hlen);
695
696 /*
697 * Build fragment header.
698 */
699 fh->nexthdr = nexthdr;
700 fh->reserved = 0;
701 if (frag_id) {
702 ipv6_select_ident(skb, fh);
703 frag_id = fh->identification;
704 } else
705 fh->identification = frag_id;
706
707 /*
708 * Copy a block of the IP datagram.
709 */
710 if (skb_copy_bits(skb, ptr, frag->h.raw, len))
711 BUG();
712 left -= len;
713
714 fh->frag_off = htons(offset);
715 if (left > 0)
716 fh->frag_off |= htons(IP6_MF);
717 frag->nh.ipv6h->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
718
719 ptr += len;
720 offset += len;
721
722 /*
723 * Put this fragment into the sending queue.
724 */
725
726 IP6_INC_STATS(IPSTATS_MIB_FRAGCREATES);
727
728 err = output(frag);
729 if (err)
730 goto fail;
731 }
732 kfree_skb(skb);
733 IP6_INC_STATS(IPSTATS_MIB_FRAGOKS);
734 return err;
735
736 fail:
737 kfree_skb(skb);
738 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
739 return err;
740 }
741
742 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
743 {
744 int err = 0;
745
746 *dst = NULL;
747 if (sk) {
748 struct ipv6_pinfo *np = inet6_sk(sk);
749
750 *dst = sk_dst_check(sk, np->dst_cookie);
751 if (*dst) {
752 struct rt6_info *rt = (struct rt6_info*)*dst;
753
754 /* Yes, checking route validity in not connected
755 case is not very simple. Take into account,
756 that we do not support routing by source, TOS,
757 and MSG_DONTROUTE --ANK (980726)
758
759 1. If route was host route, check that
760 cached destination is current.
761 If it is network route, we still may
762 check its validity using saved pointer
763 to the last used address: daddr_cache.
764 We do not want to save whole address now,
765 (because main consumer of this service
766 is tcp, which has not this problem),
767 so that the last trick works only on connected
768 sockets.
769 2. oif also should be the same.
770 */
771
772 if (((rt->rt6i_dst.plen != 128 ||
773 !ipv6_addr_equal(&fl->fl6_dst, &rt->rt6i_dst.addr))
774 && (np->daddr_cache == NULL ||
775 !ipv6_addr_equal(&fl->fl6_dst, np->daddr_cache)))
776 || (fl->oif && fl->oif != (*dst)->dev->ifindex)) {
777 dst_release(*dst);
778 *dst = NULL;
779 }
780 }
781 }
782
783 if (*dst == NULL)
784 *dst = ip6_route_output(sk, fl);
785
786 if ((err = (*dst)->error))
787 goto out_err_release;
788
789 if (ipv6_addr_any(&fl->fl6_src)) {
790 err = ipv6_get_saddr(*dst, &fl->fl6_dst, &fl->fl6_src);
791
792 if (err) {
793 #if IP6_DEBUG >= 2
794 printk(KERN_DEBUG "ip6_dst_lookup: "
795 "no available source address\n");
796 #endif
797 goto out_err_release;
798 }
799 }
800
801 return 0;
802
803 out_err_release:
804 dst_release(*dst);
805 *dst = NULL;
806 return err;
807 }
808
809 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb),
810 void *from, int length, int transhdrlen,
811 int hlimit, struct ipv6_txoptions *opt, struct flowi *fl, struct rt6_info *rt,
812 unsigned int flags)
813 {
814 struct inet_sock *inet = inet_sk(sk);
815 struct ipv6_pinfo *np = inet6_sk(sk);
816 struct sk_buff *skb;
817 unsigned int maxfraglen, fragheaderlen;
818 int exthdrlen;
819 int hh_len;
820 int mtu;
821 int copy;
822 int err;
823 int offset = 0;
824 int csummode = CHECKSUM_NONE;
825
826 if (flags&MSG_PROBE)
827 return 0;
828 if (skb_queue_empty(&sk->sk_write_queue)) {
829 /*
830 * setup for corking
831 */
832 if (opt) {
833 if (np->cork.opt == NULL) {
834 np->cork.opt = kmalloc(opt->tot_len,
835 sk->sk_allocation);
836 if (unlikely(np->cork.opt == NULL))
837 return -ENOBUFS;
838 } else if (np->cork.opt->tot_len < opt->tot_len) {
839 printk(KERN_DEBUG "ip6_append_data: invalid option length\n");
840 return -EINVAL;
841 }
842 memcpy(np->cork.opt, opt, opt->tot_len);
843 inet->cork.flags |= IPCORK_OPT;
844 /* need source address above miyazawa*/
845 }
846 dst_hold(&rt->u.dst);
847 np->cork.rt = rt;
848 inet->cork.fl = *fl;
849 np->cork.hop_limit = hlimit;
850 inet->cork.fragsize = mtu = dst_pmtu(&rt->u.dst);
851 inet->cork.length = 0;
852 sk->sk_sndmsg_page = NULL;
853 sk->sk_sndmsg_off = 0;
854 exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0);
855 length += exthdrlen;
856 transhdrlen += exthdrlen;
857 } else {
858 rt = np->cork.rt;
859 fl = &inet->cork.fl;
860 if (inet->cork.flags & IPCORK_OPT)
861 opt = np->cork.opt;
862 transhdrlen = 0;
863 exthdrlen = 0;
864 mtu = inet->cork.fragsize;
865 }
866
867 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
868
869 fragheaderlen = sizeof(struct ipv6hdr) + (opt ? opt->opt_nflen : 0);
870 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
871
872 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
873 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
874 ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
875 return -EMSGSIZE;
876 }
877 }
878
879 /*
880 * Let's try using as much space as possible.
881 * Use MTU if total length of the message fits into the MTU.
882 * Otherwise, we need to reserve fragment header and
883 * fragment alignment (= 8-15 octects, in total).
884 *
885 * Note that we may need to "move" the data from the tail of
886 * of the buffer to the new fragment when we split
887 * the message.
888 *
889 * FIXME: It may be fragmented into multiple chunks
890 * at once if non-fragmentable extension headers
891 * are too large.
892 * --yoshfuji
893 */
894
895 inet->cork.length += length;
896
897 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
898 goto alloc_new_skb;
899
900 while (length > 0) {
901 /* Check if the remaining data fits into current packet. */
902 copy = mtu - skb->len;
903 if (copy < length)
904 copy = maxfraglen - skb->len;
905
906 if (copy <= 0) {
907 char *data;
908 unsigned int datalen;
909 unsigned int fraglen;
910 unsigned int fraggap;
911 unsigned int alloclen;
912 struct sk_buff *skb_prev;
913 alloc_new_skb:
914 skb_prev = skb;
915
916 /* There's no room in the current skb */
917 if (skb_prev)
918 fraggap = skb_prev->len - maxfraglen;
919 else
920 fraggap = 0;
921
922 /*
923 * If remaining data exceeds the mtu,
924 * we know we need more fragment(s).
925 */
926 datalen = length + fraggap;
927 if (datalen > mtu - fragheaderlen)
928 datalen = maxfraglen - fragheaderlen;
929
930 fraglen = datalen + fragheaderlen;
931 if ((flags & MSG_MORE) &&
932 !(rt->u.dst.dev->features&NETIF_F_SG))
933 alloclen = mtu;
934 else
935 alloclen = datalen + fragheaderlen;
936
937 /*
938 * The last fragment gets additional space at tail.
939 * Note: we overallocate on fragments with MSG_MODE
940 * because we have no idea if we're the last one.
941 */
942 if (datalen == length + fraggap)
943 alloclen += rt->u.dst.trailer_len;
944
945 /*
946 * We just reserve space for fragment header.
947 * Note: this may be overallocation if the message
948 * (without MSG_MORE) fits into the MTU.
949 */
950 alloclen += sizeof(struct frag_hdr);
951
952 if (transhdrlen) {
953 skb = sock_alloc_send_skb(sk,
954 alloclen + hh_len,
955 (flags & MSG_DONTWAIT), &err);
956 } else {
957 skb = NULL;
958 if (atomic_read(&sk->sk_wmem_alloc) <=
959 2 * sk->sk_sndbuf)
960 skb = sock_wmalloc(sk,
961 alloclen + hh_len, 1,
962 sk->sk_allocation);
963 if (unlikely(skb == NULL))
964 err = -ENOBUFS;
965 }
966 if (skb == NULL)
967 goto error;
968 /*
969 * Fill in the control structures
970 */
971 skb->ip_summed = csummode;
972 skb->csum = 0;
973 /* reserve for fragmentation */
974 skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
975
976 /*
977 * Find where to start putting bytes
978 */
979 data = skb_put(skb, fraglen);
980 skb->nh.raw = data + exthdrlen;
981 data += fragheaderlen;
982 skb->h.raw = data + exthdrlen;
983
984 if (fraggap) {
985 skb->csum = skb_copy_and_csum_bits(
986 skb_prev, maxfraglen,
987 data + transhdrlen, fraggap, 0);
988 skb_prev->csum = csum_sub(skb_prev->csum,
989 skb->csum);
990 data += fraggap;
991 skb_trim(skb_prev, maxfraglen);
992 }
993 copy = datalen - transhdrlen - fraggap;
994 if (copy < 0) {
995 err = -EINVAL;
996 kfree_skb(skb);
997 goto error;
998 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
999 err = -EFAULT;
1000 kfree_skb(skb);
1001 goto error;
1002 }
1003
1004 offset += copy;
1005 length -= datalen - fraggap;
1006 transhdrlen = 0;
1007 exthdrlen = 0;
1008 csummode = CHECKSUM_NONE;
1009
1010 /*
1011 * Put the packet on the pending queue
1012 */
1013 __skb_queue_tail(&sk->sk_write_queue, skb);
1014 continue;
1015 }
1016
1017 if (copy > length)
1018 copy = length;
1019
1020 if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
1021 unsigned int off;
1022
1023 off = skb->len;
1024 if (getfrag(from, skb_put(skb, copy),
1025 offset, copy, off, skb) < 0) {
1026 __skb_trim(skb, off);
1027 err = -EFAULT;
1028 goto error;
1029 }
1030 } else {
1031 int i = skb_shinfo(skb)->nr_frags;
1032 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1033 struct page *page = sk->sk_sndmsg_page;
1034 int off = sk->sk_sndmsg_off;
1035 unsigned int left;
1036
1037 if (page && (left = PAGE_SIZE - off) > 0) {
1038 if (copy >= left)
1039 copy = left;
1040 if (page != frag->page) {
1041 if (i == MAX_SKB_FRAGS) {
1042 err = -EMSGSIZE;
1043 goto error;
1044 }
1045 get_page(page);
1046 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1047 frag = &skb_shinfo(skb)->frags[i];
1048 }
1049 } else if(i < MAX_SKB_FRAGS) {
1050 if (copy > PAGE_SIZE)
1051 copy = PAGE_SIZE;
1052 page = alloc_pages(sk->sk_allocation, 0);
1053 if (page == NULL) {
1054 err = -ENOMEM;
1055 goto error;
1056 }
1057 sk->sk_sndmsg_page = page;
1058 sk->sk_sndmsg_off = 0;
1059
1060 skb_fill_page_desc(skb, i, page, 0, 0);
1061 frag = &skb_shinfo(skb)->frags[i];
1062 skb->truesize += PAGE_SIZE;
1063 atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc);
1064 } else {
1065 err = -EMSGSIZE;
1066 goto error;
1067 }
1068 if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1069 err = -EFAULT;
1070 goto error;
1071 }
1072 sk->sk_sndmsg_off += copy;
1073 frag->size += copy;
1074 skb->len += copy;
1075 skb->data_len += copy;
1076 }
1077 offset += copy;
1078 length -= copy;
1079 }
1080 return 0;
1081 error:
1082 inet->cork.length -= length;
1083 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
1084 return err;
1085 }
1086
1087 int ip6_push_pending_frames(struct sock *sk)
1088 {
1089 struct sk_buff *skb, *tmp_skb;
1090 struct sk_buff **tail_skb;
1091 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1092 struct inet_sock *inet = inet_sk(sk);
1093 struct ipv6_pinfo *np = inet6_sk(sk);
1094 struct ipv6hdr *hdr;
1095 struct ipv6_txoptions *opt = np->cork.opt;
1096 struct rt6_info *rt = np->cork.rt;
1097 struct flowi *fl = &inet->cork.fl;
1098 unsigned char proto = fl->proto;
1099 int err = 0;
1100
1101 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1102 goto out;
1103 tail_skb = &(skb_shinfo(skb)->frag_list);
1104
1105 /* move skb->data to ip header from ext header */
1106 if (skb->data < skb->nh.raw)
1107 __skb_pull(skb, skb->nh.raw - skb->data);
1108 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1109 __skb_pull(tmp_skb, skb->h.raw - skb->nh.raw);
1110 *tail_skb = tmp_skb;
1111 tail_skb = &(tmp_skb->next);
1112 skb->len += tmp_skb->len;
1113 skb->data_len += tmp_skb->len;
1114 #if 0 /* Logically correct, but useless work, ip_fragment() will have to undo */
1115 skb->truesize += tmp_skb->truesize;
1116 __sock_put(tmp_skb->sk);
1117 tmp_skb->destructor = NULL;
1118 tmp_skb->sk = NULL;
1119 #endif
1120 }
1121
1122 ipv6_addr_copy(final_dst, &fl->fl6_dst);
1123 __skb_pull(skb, skb->h.raw - skb->nh.raw);
1124 if (opt && opt->opt_flen)
1125 ipv6_push_frag_opts(skb, opt, &proto);
1126 if (opt && opt->opt_nflen)
1127 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1128
1129 skb->nh.ipv6h = hdr = (struct ipv6hdr*) skb_push(skb, sizeof(struct ipv6hdr));
1130
1131 *(u32*)hdr = fl->fl6_flowlabel | htonl(0x60000000);
1132
1133 if (skb->len <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN)
1134 hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
1135 else
1136 hdr->payload_len = 0;
1137 hdr->hop_limit = np->cork.hop_limit;
1138 hdr->nexthdr = proto;
1139 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1140 ipv6_addr_copy(&hdr->daddr, final_dst);
1141
1142 skb->dst = dst_clone(&rt->u.dst);
1143 IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
1144 err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dst->dev, dst_output);
1145 if (err) {
1146 if (err > 0)
1147 err = inet->recverr ? net_xmit_errno(err) : 0;
1148 if (err)
1149 goto error;
1150 }
1151
1152 out:
1153 inet->cork.flags &= ~IPCORK_OPT;
1154 if (np->cork.opt) {
1155 kfree(np->cork.opt);
1156 np->cork.opt = NULL;
1157 }
1158 if (np->cork.rt) {
1159 dst_release(&np->cork.rt->u.dst);
1160 np->cork.rt = NULL;
1161 }
1162 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1163 return err;
1164 error:
1165 goto out;
1166 }
1167
1168 void ip6_flush_pending_frames(struct sock *sk)
1169 {
1170 struct inet_sock *inet = inet_sk(sk);
1171 struct ipv6_pinfo *np = inet6_sk(sk);
1172 struct sk_buff *skb;
1173
1174 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1175 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
1176 kfree_skb(skb);
1177 }
1178
1179 inet->cork.flags &= ~IPCORK_OPT;
1180
1181 if (np->cork.opt) {
1182 kfree(np->cork.opt);
1183 np->cork.opt = NULL;
1184 }
1185 if (np->cork.rt) {
1186 dst_release(&np->cork.rt->u.dst);
1187 np->cork.rt = NULL;
1188 }
1189 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1190 }
1191
|
This page was automatically generated by the
LXR engine.
|