Linux kernel & device driver programming

Cross-Referenced Linux and Device Driver Code

[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ]
Version: [ 2.6.11.8 ] [ 2.6.25 ] [ 2.6.25.8 ] [ 2.6.31.13 ] Architecture: [ i386 ]
  1 /*
  2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
  3  *              operating system.  INET is implemented using the  BSD Socket
  4  *              interface as the means of communication with the user level.
  5  *
  6  *              Implementation of the Transmission Control Protocol(TCP).
  7  *
  8  * Version:     $Id: tcp_ipv4.c,v 1.240 2002/02/01 22:01:04 davem Exp $
  9  *
 10  *              IPv4 specific functions
 11  *
 12  *
 13  *              code split from:
 14  *              linux/ipv4/tcp.c
 15  *              linux/ipv4/tcp_input.c
 16  *              linux/ipv4/tcp_output.c
 17  *
 18  *              See tcp.c for author information
 19  *
 20  *      This program is free software; you can redistribute it and/or
 21  *      modify it under the terms of the GNU General Public License
 22  *      as published by the Free Software Foundation; either version
 23  *      2 of the License, or (at your option) any later version.
 24  */
 25 
 26 /*
 27  * Changes:
 28  *              David S. Miller :       New socket lookup architecture.
 29  *                                      This code is dedicated to John Dyson.
 30  *              David S. Miller :       Change semantics of established hash,
 31  *                                      half is devoted to TIME_WAIT sockets
 32  *                                      and the rest go in the other half.
 33  *              Andi Kleen :            Add support for syncookies and fixed
 34  *                                      some bugs: ip options weren't passed to
 35  *                                      the TCP layer, missed a check for an
 36  *                                      ACK bit.
 37  *              Andi Kleen :            Implemented fast path mtu discovery.
 38  *                                      Fixed many serious bugs in the
 39  *                                      open_request handling and moved
 40  *                                      most of it into the af independent code.
 41  *                                      Added tail drop and some other bugfixes.
 42  *                                      Added new listen sematics.
 43  *              Mike McLagan    :       Routing by source
 44  *      Juan Jose Ciarlante:            ip_dynaddr bits
 45  *              Andi Kleen:             various fixes.
 46  *      Vitaly E. Lavrov        :       Transparent proxy revived after year
 47  *                                      coma.
 48  *      Andi Kleen              :       Fix new listen.
 49  *      Andi Kleen              :       Fix accept error reporting.
 50  *      YOSHIFUJI Hideaki @USAGI and:   Support IPV6_V6ONLY socket option, which
 51  *      Alexey Kuznetsov                allow both IPv4 and IPv6 sockets to bind
 52  *                                      a single port at the same time.
 53  */
 54 
 55 #include <linux/config.h>
 56 
 57 #include <linux/types.h>
 58 #include <linux/fcntl.h>
 59 #include <linux/module.h>
 60 #include <linux/random.h>
 61 #include <linux/cache.h>
 62 #include <linux/jhash.h>
 63 #include <linux/init.h>
 64 #include <linux/times.h>
 65 
 66 #include <net/icmp.h>
 67 #include <net/tcp.h>
 68 #include <net/ipv6.h>
 69 #include <net/inet_common.h>
 70 #include <net/xfrm.h>
 71 
 72 #include <linux/inet.h>
 73 #include <linux/ipv6.h>
 74 #include <linux/stddef.h>
 75 #include <linux/proc_fs.h>
 76 #include <linux/seq_file.h>
 77 
 78 extern int sysctl_ip_dynaddr;
 79 int sysctl_tcp_tw_reuse;
 80 int sysctl_tcp_low_latency;
 81 
 82 /* Check TCP sequence numbers in ICMP packets. */
 83 #define ICMP_MIN_LENGTH 8
 84 
 85 /* Socket used for sending RSTs */
 86 static struct socket *tcp_socket;
 87 
 88 void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len,
 89                        struct sk_buff *skb);
 90 
 91 struct tcp_hashinfo __cacheline_aligned tcp_hashinfo = {
 92         .__tcp_lhash_lock       =       RW_LOCK_UNLOCKED,
 93         .__tcp_lhash_users      =       ATOMIC_INIT(0),
 94         .__tcp_lhash_wait
 95           = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.__tcp_lhash_wait),
 96         .__tcp_portalloc_lock   =       SPIN_LOCK_UNLOCKED
 97 };
 98 
 99 /*
100  * This array holds the first and last local port number.
101  * For high-usage systems, use sysctl to change this to
102  * 32768-61000
103  */
104 int sysctl_local_port_range[2] = { 1024, 4999 };
105 int tcp_port_rover = 1024 - 1;
106 
107 static __inline__ int tcp_hashfn(__u32 laddr, __u16 lport,
108                                  __u32 faddr, __u16 fport)
109 {
110         int h = (laddr ^ lport) ^ (faddr ^ fport);
111         h ^= h >> 16;
112         h ^= h >> 8;
113         return h & (tcp_ehash_size - 1);
114 }
115 
116 static __inline__ int tcp_sk_hashfn(struct sock *sk)
117 {
118         struct inet_sock *inet = inet_sk(sk);
119         __u32 laddr = inet->rcv_saddr;
120         __u16 lport = inet->num;
121         __u32 faddr = inet->daddr;
122         __u16 fport = inet->dport;
123 
124         return tcp_hashfn(laddr, lport, faddr, fport);
125 }
126 
127 /* Allocate and initialize a new TCP local port bind bucket.
128  * The bindhash mutex for snum's hash chain must be held here.
129  */
130 struct tcp_bind_bucket *tcp_bucket_create(struct tcp_bind_hashbucket *head,
131                                           unsigned short snum)
132 {
133         struct tcp_bind_bucket *tb = kmem_cache_alloc(tcp_bucket_cachep,
134                                                       SLAB_ATOMIC);
135         if (tb) {
136                 tb->port = snum;
137                 tb->fastreuse = 0;
138                 INIT_HLIST_HEAD(&tb->owners);
139                 hlist_add_head(&tb->node, &head->chain);
140         }
141         return tb;
142 }
143 
144 /* Caller must hold hashbucket lock for this tb with local BH disabled */
145 void tcp_bucket_destroy(struct tcp_bind_bucket *tb)
146 {
147         if (hlist_empty(&tb->owners)) {
148                 __hlist_del(&tb->node);
149                 kmem_cache_free(tcp_bucket_cachep, tb);
150         }
151 }
152 
153 /* Caller must disable local BH processing. */
154 static __inline__ void __tcp_inherit_port(struct sock *sk, struct sock *child)
155 {
156         struct tcp_bind_hashbucket *head =
157                                 &tcp_bhash[tcp_bhashfn(inet_sk(child)->num)];
158         struct tcp_bind_bucket *tb;
159 
160         spin_lock(&head->lock);
161         tb = tcp_sk(sk)->bind_hash;
162         sk_add_bind_node(child, &tb->owners);
163         tcp_sk(child)->bind_hash = tb;
164         spin_unlock(&head->lock);
165 }
166 
167 inline void tcp_inherit_port(struct sock *sk, struct sock *child)
168 {
169         local_bh_disable();
170         __tcp_inherit_port(sk, child);
171         local_bh_enable();
172 }
173 
174 void tcp_bind_hash(struct sock *sk, struct tcp_bind_bucket *tb,
175                    unsigned short snum)
176 {
177         inet_sk(sk)->num = snum;
178         sk_add_bind_node(sk, &tb->owners);
179         tcp_sk(sk)->bind_hash = tb;
180 }
181 
182 static inline int tcp_bind_conflict(struct sock *sk, struct tcp_bind_bucket *tb)
183 {
184         const u32 sk_rcv_saddr = tcp_v4_rcv_saddr(sk);
185         struct sock *sk2;
186         struct hlist_node *node;
187         int reuse = sk->sk_reuse;
188 
189         sk_for_each_bound(sk2, node, &tb->owners) {
190                 if (sk != sk2 &&
191                     !tcp_v6_ipv6only(sk2) &&
192                     (!sk->sk_bound_dev_if ||
193                      !sk2->sk_bound_dev_if ||
194                      sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) {
195                         if (!reuse || !sk2->sk_reuse ||
196                             sk2->sk_state == TCP_LISTEN) {
197                                 const u32 sk2_rcv_saddr = tcp_v4_rcv_saddr(sk2);
198                                 if (!sk2_rcv_saddr || !sk_rcv_saddr ||
199                                     sk2_rcv_saddr == sk_rcv_saddr)
200                                         break;
201                         }
202                 }
203         }
204         return node != NULL;
205 }
206 
207 /* Obtain a reference to a local port for the given sock,
208  * if snum is zero it means select any available local port.
209  */
210 static int tcp_v4_get_port(struct sock *sk, unsigned short snum)
211 {
212         struct tcp_bind_hashbucket *head;
213         struct hlist_node *node;
214         struct tcp_bind_bucket *tb;
215         int ret;
216 
217         local_bh_disable();
218         if (!snum) {
219                 int low = sysctl_local_port_range[0];
220                 int high = sysctl_local_port_range[1];
221                 int remaining = (high - low) + 1;
222                 int rover;
223 
224                 spin_lock(&tcp_portalloc_lock);
225                 rover = tcp_port_rover;
226                 do {
227                         rover++;
228                         if (rover < low || rover > high)
229                                 rover = low;
230                         head = &tcp_bhash[tcp_bhashfn(rover)];
231                         spin_lock(&head->lock);
232                         tb_for_each(tb, node, &head->chain)
233                                 if (tb->port == rover)
234                                         goto next;
235                         break;
236                 next:
237                         spin_unlock(&head->lock);
238                 } while (--remaining > 0);
239                 tcp_port_rover = rover;
240                 spin_unlock(&tcp_portalloc_lock);
241 
242                 /* Exhausted local port range during search? */
243                 ret = 1;
244                 if (remaining <= 0)
245                         goto fail;
246 
247                 /* OK, here is the one we will use.  HEAD is
248                  * non-NULL and we hold it's mutex.
249                  */
250                 snum = rover;
251         } else {
252                 head = &tcp_bhash[tcp_bhashfn(snum)];
253                 spin_lock(&head->lock);
254                 tb_for_each(tb, node, &head->chain)
255                         if (tb->port == snum)
256                                 goto tb_found;
257         }
258         tb = NULL;
259         goto tb_not_found;
260 tb_found:
261         if (!hlist_empty(&tb->owners)) {
262                 if (sk->sk_reuse > 1)
263                         goto success;
264                 if (tb->fastreuse > 0 &&
265                     sk->sk_reuse && sk->sk_state != TCP_LISTEN) {
266                         goto success;
267                 } else {
268                         ret = 1;
269                         if (tcp_bind_conflict(sk, tb))
270                                 goto fail_unlock;
271                 }
272         }
273 tb_not_found:
274         ret = 1;
275         if (!tb && (tb = tcp_bucket_create(head, snum)) == NULL)
276                 goto fail_unlock;
277         if (hlist_empty(&tb->owners)) {
278                 if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
279                         tb->fastreuse = 1;
280                 else
281                         tb->fastreuse = 0;
282         } else if (tb->fastreuse &&
283                    (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
284                 tb->fastreuse = 0;
285 success:
286         if (!tcp_sk(sk)->bind_hash)
287                 tcp_bind_hash(sk, tb, snum);
288         BUG_TRAP(tcp_sk(sk)->bind_hash == tb);
289         ret = 0;
290 
291 fail_unlock:
292         spin_unlock(&head->lock);
293 fail:
294         local_bh_enable();
295         return ret;
296 }
297 
298 /* Get rid of any references to a local port held by the
299  * given sock.
300  */
301 static void __tcp_put_port(struct sock *sk)
302 {
303         struct inet_sock *inet = inet_sk(sk);
304         struct tcp_bind_hashbucket *head = &tcp_bhash[tcp_bhashfn(inet->num)];
305         struct tcp_bind_bucket *tb;
306 
307         spin_lock(&head->lock);
308         tb = tcp_sk(sk)->bind_hash;
309         __sk_del_bind_node(sk);
310         tcp_sk(sk)->bind_hash = NULL;
311         inet->num = 0;
312         tcp_bucket_destroy(tb);
313         spin_unlock(&head->lock);
314 }
315 
316 void tcp_put_port(struct sock *sk)
317 {
318         local_bh_disable();
319         __tcp_put_port(sk);
320         local_bh_enable();
321 }
322 
323 /* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it can be very bad on SMP.
324  * Look, when several writers sleep and reader wakes them up, all but one
325  * immediately hit write lock and grab all the cpus. Exclusive sleep solves
326  * this, _but_ remember, it adds useless work on UP machines (wake up each
327  * exclusive lock release). It should be ifdefed really.
328  */
329 
330 void tcp_listen_wlock(void)
331 {
332         write_lock(&tcp_lhash_lock);
333 
334         if (atomic_read(&tcp_lhash_users)) {
335                 DEFINE_WAIT(wait);
336 
337                 for (;;) {
338                         prepare_to_wait_exclusive(&tcp_lhash_wait,
339                                                 &wait, TASK_UNINTERRUPTIBLE);
340                         if (!atomic_read(&tcp_lhash_users))
341                                 break;
342                         write_unlock_bh(&tcp_lhash_lock);
343                         schedule();
344                         write_lock_bh(&tcp_lhash_lock);
345                 }
346 
347                 finish_wait(&tcp_lhash_wait, &wait);
348         }
349 }
350 
351 static __inline__ void __tcp_v4_hash(struct sock *sk, const int listen_possible)
352 {
353         struct hlist_head *list;
354         rwlock_t *lock;
355 
356         BUG_TRAP(sk_unhashed(sk));
357         if (listen_possible && sk->sk_state == TCP_LISTEN) {
358                 list = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)];
359                 lock = &tcp_lhash_lock;
360                 tcp_listen_wlock();
361         } else {
362                 list = &tcp_ehash[(sk->sk_hashent = tcp_sk_hashfn(sk))].chain;
363                 lock = &tcp_ehash[sk->sk_hashent].lock;
364                 write_lock(lock);
365         }
366         __sk_add_node(sk, list);
367         sock_prot_inc_use(sk->sk_prot);
368         write_unlock(lock);
369         if (listen_possible && sk->sk_state == TCP_LISTEN)
370                 wake_up(&tcp_lhash_wait);
371 }
372 
373 static void tcp_v4_hash(struct sock *sk)
374 {
375         if (sk->sk_state != TCP_CLOSE) {
376                 local_bh_disable();
377                 __tcp_v4_hash(sk, 1);
378                 local_bh_enable();
379         }
380 }
381 
382 void tcp_unhash(struct sock *sk)
383 {
384         rwlock_t *lock;
385 
386         if (sk_unhashed(sk))
387                 goto ende;
388 
389         if (sk->sk_state == TCP_LISTEN) {
390                 local_bh_disable();
391                 tcp_listen_wlock();
392                 lock = &tcp_lhash_lock;
393         } else {
394                 struct tcp_ehash_bucket *head = &tcp_ehash[sk->sk_hashent];
395                 lock = &head->lock;
396                 write_lock_bh(&head->lock);
397         }
398 
399         if (__sk_del_node_init(sk))
400                 sock_prot_dec_use(sk->sk_prot);
401         write_unlock_bh(lock);
402 
403  ende:
404         if (sk->sk_state == TCP_LISTEN)
405                 wake_up(&tcp_lhash_wait);
406 }
407 
408 /* Don't inline this cruft.  Here are some nice properties to
409  * exploit here.  The BSD API does not allow a listening TCP
410  * to specify the remote port nor the remote address for the
411  * connection.  So always assume those are both wildcarded
412  * during the search since they can never be otherwise.
413  */
414 static struct sock *__tcp_v4_lookup_listener(struct hlist_head *head, u32 daddr,
415                                              unsigned short hnum, int dif)
416 {
417         struct sock *result = NULL, *sk;
418         struct hlist_node *node;
419         int score, hiscore;
420 
421         hiscore=-1;
422         sk_for_each(sk, node, head) {
423                 struct inet_sock *inet = inet_sk(sk);
424 
425                 if (inet->num == hnum && !ipv6_only_sock(sk)) {
426                         __u32 rcv_saddr = inet->rcv_saddr;
427 
428                         score = (sk->sk_family == PF_INET ? 1 : 0);
429                         if (rcv_saddr) {
430                                 if (rcv_saddr != daddr)
431                                         continue;
432                                 score+=2;
433                         }
434                         if (sk->sk_bound_dev_if) {
435                                 if (sk->sk_bound_dev_if != dif)
436                                         continue;
437                                 score+=2;
438                         }
439                         if (score == 5)
440                                 return sk;
441                         if (score > hiscore) {
442                                 hiscore = score;
443                                 result = sk;
444                         }
445                 }
446         }
447         return result;
448 }
449 
450 /* Optimize the common listener case. */
451 static inline struct sock *tcp_v4_lookup_listener(u32 daddr,
452                 unsigned short hnum, int dif)
453 {
454         struct sock *sk = NULL;
455         struct hlist_head *head;
456 
457         read_lock(&tcp_lhash_lock);
458         head = &tcp_listening_hash[tcp_lhashfn(hnum)];
459         if (!hlist_empty(head)) {
460                 struct inet_sock *inet = inet_sk((sk = __sk_head(head)));
461 
462                 if (inet->num == hnum && !sk->sk_node.next &&
463                     (!inet->rcv_saddr || inet->rcv_saddr == daddr) &&
464                     (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) &&
465                     !sk->sk_bound_dev_if)
466                         goto sherry_cache;
467                 sk = __tcp_v4_lookup_listener(head, daddr, hnum, dif);
468         }
469         if (sk) {
470 sherry_cache:
471                 sock_hold(sk);
472         }
473         read_unlock(&tcp_lhash_lock);
474         return sk;
475 }
476 
477 /* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so
478  * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM
479  *
480  * Local BH must be disabled here.
481  */
482 
483 static inline struct sock *__tcp_v4_lookup_established(u32 saddr, u16 sport,
484                                                        u32 daddr, u16 hnum,
485                                                        int dif)
486 {
487         struct tcp_ehash_bucket *head;
488         TCP_V4_ADDR_COOKIE(acookie, saddr, daddr)
489         __u32 ports = TCP_COMBINED_PORTS(sport, hnum);
490         struct sock *sk;
491         struct hlist_node *node;
492         /* Optimize here for direct hit, only listening connections can
493          * have wildcards anyways.
494          */
495         int hash = tcp_hashfn(daddr, hnum, saddr, sport);
496         head = &tcp_ehash[hash];
497         read_lock(&head->lock);
498         sk_for_each(sk, node, &head->chain) {
499                 if (TCP_IPV4_MATCH(sk, acookie, saddr, daddr, ports, dif))
500                         goto hit; /* You sunk my battleship! */
501         }
502 
503         /* Must check for a TIME_WAIT'er before going to listener hash. */
504         sk_for_each(sk, node, &(head + tcp_ehash_size)->chain) {
505                 if (TCP_IPV4_TW_MATCH(sk, acookie, saddr, daddr, ports, dif))
506                         goto hit;
507         }
508         sk = NULL;
509 out:
510         read_unlock(&head->lock);
511         return sk;
512 hit:
513         sock_hold(sk);
514         goto out;
515 }
516 
517 static inline struct sock *__tcp_v4_lookup(u32 saddr, u16 sport,
518                                            u32 daddr, u16 hnum, int dif)
519 {
520         struct sock *sk = __tcp_v4_lookup_established(saddr, sport,
521                                                       daddr, hnum, dif);
522 
523         return sk ? : tcp_v4_lookup_listener(daddr, hnum, dif);
524 }
525 
526 inline struct sock *tcp_v4_lookup(u32 saddr, u16 sport, u32 daddr,
527                                   u16 dport, int dif)
528 {
529         struct sock *sk;
530 
531         local_bh_disable();
532         sk = __tcp_v4_lookup(saddr, sport, daddr, ntohs(dport), dif);
533         local_bh_enable();
534 
535         return sk;
536 }
537 
538 EXPORT_SYMBOL_GPL(tcp_v4_lookup);
539 
540 static inline __u32 tcp_v4_init_sequence(struct sock *sk, struct sk_buff *skb)
541 {
542         return secure_tcp_sequence_number(skb->nh.iph->daddr,
543                                           skb->nh.iph->saddr,
544                                           skb->h.th->dest,
545                                           skb->h.th->source);
546 }
547 
548 /* called with local bh disabled */
549 static int __tcp_v4_check_established(struct sock *sk, __u16 lport,
550                                       struct tcp_tw_bucket **twp)
551 {
552         struct inet_sock *inet = inet_sk(sk);
553         u32 daddr = inet->rcv_saddr;
554         u32 saddr = inet->daddr;
555         int dif = sk->sk_bound_dev_if;
556         TCP_V4_ADDR_COOKIE(acookie, saddr, daddr)
557         __u32 ports = TCP_COMBINED_PORTS(inet->dport, lport);
558         int hash = tcp_hashfn(daddr, lport, saddr, inet->dport);
559         struct tcp_ehash_bucket *head = &tcp_ehash[hash];
560         struct sock *sk2;
561         struct hlist_node *node;
562         struct tcp_tw_bucket *tw;
563 
564         write_lock(&head->lock);
565 
566         /* Check TIME-WAIT sockets first. */
567         sk_for_each(sk2, node, &(head + tcp_ehash_size)->chain) {
568                 tw = (struct tcp_tw_bucket *)sk2;
569 
570                 if (TCP_IPV4_TW_MATCH(sk2, acookie, saddr, daddr, ports, dif)) {
571                         struct tcp_sock *tp = tcp_sk(sk);
572 
573                         /* With PAWS, it is safe from the viewpoint
574                            of data integrity. Even without PAWS it
575                            is safe provided sequence spaces do not
576                            overlap i.e. at data rates <= 80Mbit/sec.
577 
578                            Actually, the idea is close to VJ's one,
579                            only timestamp cache is held not per host,
580                            but per port pair and TW bucket is used
581                            as state holder.
582 
583                            If TW bucket has been already destroyed we
584                            fall back to VJ's scheme and use initial
585                            timestamp retrieved from peer table.
586                          */
587                         if (tw->tw_ts_recent_stamp &&
588                             (!twp || (sysctl_tcp_tw_reuse &&
589                                       xtime.tv_sec -
590                                       tw->tw_ts_recent_stamp > 1))) {
591                                 if ((tp->write_seq =
592                                                 tw->tw_snd_nxt + 65535 + 2) == 0)
593                                         tp->write_seq = 1;
594                                 tp->rx_opt.ts_recent       = tw->tw_ts_recent;
595                                 tp->rx_opt.ts_recent_stamp = tw->tw_ts_recent_stamp;
596                                 sock_hold(sk2);
597                                 goto unique;
598                         } else
599                                 goto not_unique;
600                 }
601         }
602         tw = NULL;
603 
604         /* And established part... */
605         sk_for_each(sk2, node, &head->chain) {
606                 if (TCP_IPV4_MATCH(sk2, acookie, saddr, daddr, ports, dif))
607                         goto not_unique;
608         }
609 
610 unique:
611         /* Must record num and sport now. Otherwise we will see
612          * in hash table socket with a funny identity. */
613         inet->num = lport;
614         inet->sport = htons(lport);
615         sk->sk_hashent = hash;
616         BUG_TRAP(sk_unhashed(sk));
617         __sk_add_node(sk, &head->chain);
618         sock_prot_inc_use(sk->sk_prot);
619         write_unlock(&head->lock);
620 
621         if (twp) {
622                 *twp = tw;
623                 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
624         } else if (tw) {
625                 /* Silly. Should hash-dance instead... */
626                 tcp_tw_deschedule(tw);
627                 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
628 
629                 tcp_tw_put(tw);
630         }
631 
632         return 0;
633 
634 not_unique:
635         write_unlock(&head->lock);
636         return -EADDRNOTAVAIL;
637 }
638 
639 static inline u32 connect_port_offset(const struct sock *sk)
640 {
641         const struct inet_sock *inet = inet_sk(sk);
642 
643         return secure_tcp_port_ephemeral(inet->rcv_saddr, inet->daddr, 
644                                          inet->dport);
645 }
646 
647 /*
648  * Bind a port for a connect operation and hash it.
649  */
650 static inline int tcp_v4_hash_connect(struct sock *sk)
651 {
652         unsigned short snum = inet_sk(sk)->num;
653         struct tcp_bind_hashbucket *head;
654         struct tcp_bind_bucket *tb;
655         int ret;
656 
657         if (!snum) {
658                 int low = sysctl_local_port_range[0];
659                 int high = sysctl_local_port_range[1];
660                 int range = high - low;
661                 int i;
662                 int port;
663                 static u32 hint;
664                 u32 offset = hint + connect_port_offset(sk);
665                 struct hlist_node *node;
666                 struct tcp_tw_bucket *tw = NULL;
667 
668                 local_bh_disable();
669                 for (i = 1; i <= range; i++) {
670                         port = low + (i + offset) % range;
671                         head = &tcp_bhash[tcp_bhashfn(port)];
672                         spin_lock(&head->lock);
673 
674                         /* Does not bother with rcv_saddr checks,
675                          * because the established check is already
676                          * unique enough.
677                          */
678                         tb_for_each(tb, node, &head->chain) {
679                                 if (tb->port == port) {
680                                         BUG_TRAP(!hlist_empty(&tb->owners));
681                                         if (tb->fastreuse >= 0)
682                                                 goto next_port;
683                                         if (!__tcp_v4_check_established(sk,
684                                                                         port,
685                                                                         &tw))
686                                                 goto ok;
687                                         goto next_port;
688                                 }
689                         }
690 
691                         tb = tcp_bucket_create(head, port);
692                         if (!tb) {
693                                 spin_unlock(&head->lock);
694                                 break;
695                         }
696                         tb->fastreuse = -1;
697                         goto ok;
698 
699                 next_port:
700                         spin_unlock(&head->lock);
701                 }
702                 local_bh_enable();
703 
704                 return -EADDRNOTAVAIL;
705 
706 ok:
707                 hint += i;
708 
709                 /* Head lock still held and bh's disabled */
710                 tcp_bind_hash(sk, tb, port);
711                 if (sk_unhashed(sk)) {
712                         inet_sk(sk)->sport = htons(port);
713                         __tcp_v4_hash(sk, 0);
714                 }
715                 spin_unlock(&head->lock);
716 
717                 if (tw) {
718                         tcp_tw_deschedule(tw);
719                         tcp_tw_put(tw);
720                 }
721 
722                 ret = 0;
723                 goto out;
724         }
725 
726         head  = &tcp_bhash[tcp_bhashfn(snum)];
727         tb  = tcp_sk(sk)->bind_hash;
728         spin_lock_bh(&head->lock);
729         if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
730                 __tcp_v4_hash(sk, 0);
731                 spin_unlock_bh(&head->lock);
732                 return 0;
733         } else {
734                 spin_unlock(&head->lock);
735                 /* No definite answer... Walk to established hash table */
736                 ret = __tcp_v4_check_established(sk, snum, NULL);
737 out:
738                 local_bh_enable();
739                 return ret;
740         }
741 }
742 
743 /* This will initiate an outgoing connection. */
744 int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
745 {
746         struct inet_sock *inet = inet_sk(sk);
747         struct tcp_sock *tp = tcp_sk(sk);
748         struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
749         struct rtable *rt;
750         u32 daddr, nexthop;
751         int tmp;
752         int err;
753 
754         if (addr_len < sizeof(struct sockaddr_in))
755                 return -EINVAL;
756 
757         if (usin->sin_family != AF_INET)
758                 return -EAFNOSUPPORT;
759 
760         nexthop = daddr = usin->sin_addr.s_addr;
761         if (inet->opt && inet->opt->srr) {
762                 if (!daddr)
763                         return -EINVAL;
764                 nexthop = inet->opt->faddr;
765         }
766 
767         tmp = ip_route_connect(&rt, nexthop, inet->saddr,
768                                RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
769                                IPPROTO_TCP,
770                                inet->sport, usin->sin_port, sk);
771         if (tmp < 0)
772                 return tmp;
773 
774         if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
775                 ip_rt_put(rt);
776                 return -ENETUNREACH;
777         }
778 
779         if (!inet->opt || !inet->opt->srr)
780                 daddr = rt->rt_dst;
781 
782         if (!inet->saddr)
783                 inet->saddr = rt->rt_src;
784         inet->rcv_saddr = inet->saddr;
785 
786         if (tp->rx_opt.ts_recent_stamp && inet->daddr != daddr) {
787                 /* Reset inherited state */
788                 tp->rx_opt.ts_recent       = 0;
789                 tp->rx_opt.ts_recent_stamp = 0;
790                 tp->write_seq              = 0;
791         }
792 
793         if (sysctl_tcp_tw_recycle &&
794             !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) {
795                 struct inet_peer *peer = rt_get_peer(rt);
796 
797                 /* VJ's idea. We save last timestamp seen from
798                  * the destination in peer table, when entering state TIME-WAIT
799                  * and initialize rx_opt.ts_recent from it, when trying new connection.
800                  */
801 
802                 if (peer && peer->tcp_ts_stamp + TCP_PAWS_MSL >= xtime.tv_sec) {
803                         tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
804                         tp->rx_opt.ts_recent = peer->tcp_ts;
805                 }
806         }
807 
808         inet->dport = usin->sin_port;
809         inet->daddr = daddr;
810 
811         tp->ext_header_len = 0;
812         if (inet->opt)
813                 tp->ext_header_len = inet->opt->optlen;
814 
815         tp->rx_opt.mss_clamp = 536;
816 
817         /* Socket identity is still unknown (sport may be zero).
818          * However we set state to SYN-SENT and not releasing socket
819          * lock select source port, enter ourselves into the hash tables and
820          * complete initialization after this.
821          */
822         tcp_set_state(sk, TCP_SYN_SENT);
823         err = tcp_v4_hash_connect(sk);
824         if (err)
825                 goto failure;
826 
827         err = ip_route_newports(&rt, inet->sport, inet->dport, sk);
828         if (err)
829                 goto failure;
830 
831         /* OK, now commit destination to socket.  */
832         __sk_dst_set(sk, &rt->u.dst);
833         tcp_v4_setup_caps(sk, &rt->u.dst);
834         tp->ext2_header_len = rt->u.dst.header_len;
835 
836         if (!tp->write_seq)
837                 tp->write_seq = secure_tcp_sequence_number(inet->saddr,
838                                                            inet->daddr,
839                                                            inet->sport,
840                                                            usin->sin_port);
841 
842         inet->id = tp->write_seq ^ jiffies;
843 
844         err = tcp_connect(sk);
845         rt = NULL;
846         if (err)
847                 goto failure;
848 
849         return 0;
850 
851 failure:
852         /* This unhashes the socket and releases the local port, if necessary. */
853         tcp_set_state(sk, TCP_CLOSE);
854         ip_rt_put(rt);
855         sk->sk_route_caps = 0;
856         inet->dport = 0;
857         return err;
858 }
859 
860 static __inline__ int tcp_v4_iif(struct sk_buff *skb)
861 {
862         return ((struct rtable *)skb->dst)->rt_iif;
863 }
864 
865 static __inline__ u32 tcp_v4_synq_hash(u32 raddr, u16 rport, u32 rnd)
866 {
867         return (jhash_2words(raddr, (u32) rport, rnd) & (TCP_SYNQ_HSIZE - 1));
868 }
869 
870 static struct open_request *tcp_v4_search_req(struct tcp_sock *tp,
871                                               struct open_request ***prevp,
872                                               __u16 rport,
873                                               __u32 raddr, __u32 laddr)
874 {
875         struct tcp_listen_opt *lopt = tp->listen_opt;
876         struct open_request *req, **prev;
877 
878         for (prev = &lopt->syn_table[tcp_v4_synq_hash(raddr, rport, lopt->hash_rnd)];
879              (req = *prev) != NULL;
880              prev = &req->dl_next) {
881                 if (req->rmt_port == rport &&
882                     req->af.v4_req.rmt_addr == raddr &&
883                     req->af.v4_req.loc_addr == laddr &&
884                     TCP_INET_FAMILY(req->class->family)) {
885                         BUG_TRAP(!req->sk);
886                         *prevp = prev;
887                         break;
888                 }
889         }
890 
891         return req;
892 }
893 
894 static void tcp_v4_synq_add(struct sock *sk, struct open_request *req)
895 {
896         struct tcp_sock *tp = tcp_sk(sk);
897         struct tcp_listen_opt *lopt = tp->listen_opt;
898         u32 h = tcp_v4_synq_hash(req->af.v4_req.rmt_addr, req->rmt_port, lopt->hash_rnd);
899 
900         req->expires = jiffies + TCP_TIMEOUT_INIT;
901         req->retrans = 0;
902         req->sk = NULL;
903         req->dl_next = lopt->syn_table[h];
904 
905         write_lock(&tp->syn_wait_lock);
906         lopt->syn_table[h] = req;
907         write_unlock(&tp->syn_wait_lock);
908 
909         tcp_synq_added(sk);
910 }
911 
912 
913 /*
914  * This routine does path mtu discovery as defined in RFC1191.
915  */
916 static inline void do_pmtu_discovery(struct sock *sk, struct iphdr *iph,
917                                      u32 mtu)
918 {
919         struct dst_entry *dst;
920         struct inet_sock *inet = inet_sk(sk);
921         struct tcp_sock *tp = tcp_sk(sk);
922 
923         /* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
924          * send out by Linux are always <576bytes so they should go through
925          * unfragmented).
926          */
927         if (sk->sk_state == TCP_LISTEN)
928                 return;
929 
930         /* We don't check in the destentry if pmtu discovery is forbidden
931          * on this route. We just assume that no packet_to_big packets
932          * are send back when pmtu discovery is not active.
933          * There is a small race when the user changes this flag in the
934          * route, but I think that's acceptable.
935          */
936         if ((dst = __sk_dst_check(sk, 0)) == NULL)
937                 return;
938 
939         dst->ops->update_pmtu(dst, mtu);
940 
941         /* Something is about to be wrong... Remember soft error
942          * for the case, if this connection will not able to recover.
943          */
944         if (mtu < dst_pmtu(dst) && ip_dont_fragment(sk, dst))
945                 sk->sk_err_soft = EMSGSIZE;
946 
947         mtu = dst_pmtu(dst);
948 
949         if (inet->pmtudisc != IP_PMTUDISC_DONT &&
950             tp->pmtu_cookie > mtu) {
951                 tcp_sync_mss(sk, mtu);
952 
953                 /* Resend the TCP packet because it's
954                  * clear that the old packet has been
955                  * dropped. This is the new "fast" path mtu
956                  * discovery.
957                  */
958                 tcp_simple_retransmit(sk);
959         } /* else let the usual retransmit timer handle it */
960 }
961 
962 /*
963  * This routine is called by the ICMP module when it gets some
964  * sort of error condition.  If err < 0 then the socket should
965  * be closed and the error returned to the user.  If err > 0
966  * it's just the icmp type << 8 | icmp code.  After adjustment
967  * header points to the first 8 bytes of the tcp header.  We need
968  * to find the appropriate port.
969  *
970  * The locking strategy used here is very "optimistic". When
971  * someone else accesses the socket the ICMP is just dropped
972  * and for some paths there is no check at all.
973  * A more general error queue to queue errors for later handling
974  * is probably better.
975  *
976  */
977 
978 void tcp_v4_err(struct sk_buff *skb, u32 info)
979 {
980         struct iphdr *iph = (struct iphdr *)skb->data;
981         struct tcphdr *th = (struct tcphdr *)(skb->data + (iph->ihl << 2));
982         struct tcp_sock *tp;
983         struct inet_sock *inet;
984         int type = skb->h.icmph->type;
985         int code = skb->h.icmph->code;
986         struct sock *sk;
987         __u32 seq;
988         int err;
989 
990         if (skb->len < (iph->ihl << 2) + 8) {
991                 ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
992                 return;
993         }
994 
995         sk = tcp_v4_lookup(iph->daddr, th->dest, iph->saddr,
996                            th->source, tcp_v4_iif(skb));
997         if (!sk) {
998                 ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
999                 return;
1000         }
1001         if (sk->sk_state == TCP_TIME_WAIT) {
1002                 tcp_tw_put((struct tcp_tw_bucket *)sk);
1003                 return;
1004         }
1005 
1006         bh_lock_sock(sk);
1007         /* If too many ICMPs get dropped on busy
1008          * servers this needs to be solved differently.
1009          */
1010         if (sock_owned_by_user(sk))
1011                 NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
1012 
1013         if (sk->sk_state == TCP_CLOSE)
1014                 goto out;
1015 
1016         tp = tcp_sk(sk);
1017         seq = ntohl(th->seq);
1018         if (sk->sk_state != TCP_LISTEN &&
1019             !between(seq, tp->snd_una, tp->snd_nxt)) {
1020                 NET_INC_STATS(LINUX_MIB_OUTOFWINDOWICMPS);
1021                 goto out;
1022         }
1023 
1024         switch (type) {
1025         case ICMP_SOURCE_QUENCH:
1026                 /* Just silently ignore these. */
1027                 goto out;
1028         case ICMP_PARAMETERPROB:
1029                 err = EPROTO;
1030                 break;
1031         case ICMP_DEST_UNREACH:
1032                 if (code > NR_ICMP_UNREACH)
1033                         goto out;
1034 
1035                 if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
1036                         if (!sock_owned_by_user(sk))
1037                                 do_pmtu_discovery(sk, iph, info);
1038                         goto out;
1039                 }
1040 
1041                 err = icmp_err_convert[code].errno;
1042                 break;
1043         case ICMP_TIME_EXCEEDED:
1044                 err = EHOSTUNREACH;
1045                 break;
1046         default:
1047                 goto out;
1048         }
1049 
1050         switch (sk->sk_state) {
1051                 struct open_request *req, **prev;
1052         case TCP_LISTEN:
1053                 if (sock_owned_by_user(sk))
1054                         goto out;
1055 
1056                 req = tcp_v4_search_req(tp, &prev, th->dest,
1057                                         iph->daddr, iph->saddr);
1058                 if (!req)
1059                         goto out;
1060 
1061                 /* ICMPs are not backlogged, hence we cannot get
1062                    an established socket here.
1063                  */
1064                 BUG_TRAP(!req->sk);
1065 
1066                 if (seq != req->snt_isn) {
1067                         NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
1068                         goto out;
1069                 }
1070 
1071                 /*
1072                  * Still in SYN_RECV, just remove it silently.
1073                  * There is no good way to pass the error to the newly
1074                  * created socket, and POSIX does not want network
1075                  * errors returned from accept().
1076                  */
1077                 tcp_synq_drop(sk, req, prev);
1078                 goto out;
1079 
1080         case TCP_SYN_SENT:
1081         case TCP_SYN_RECV:  /* Cannot happen.
1082                                It can f.e. if SYNs crossed.
1083                              */
1084                 if (!sock_owned_by_user(sk)) {
1085                         TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
1086                         sk->sk_err = err;
1087 
1088                         sk->sk_error_report(sk);
1089 
1090                         tcp_done(sk);
1091                 } else {
1092                         sk->sk_err_soft = err;
1093                 }
1094                 goto out;
1095         }
1096 
1097         /* If we've already connected we will keep trying
1098          * until we time out, or the user gives up.
1099          *
1100          * rfc1122 4.2.3.9 allows to consider as hard errors
1101          * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
1102          * but it is obsoleted by pmtu discovery).
1103          *
1104          * Note, that in modern internet, where routing is unreliable
1105          * and in each dark corner broken firewalls sit, sending random
1106          * errors ordered by their masters even this two messages finally lose
1107          * their original sense (even Linux sends invalid PORT_UNREACHs)
1108          *
1109          * Now we are in compliance with RFCs.
1110          *                                                      --ANK (980905)
1111          */
1112 
1113         inet = inet_sk(sk);
1114         if (!sock_owned_by_user(sk) && inet->recverr) {
1115                 sk->sk_err = err;
1116                 sk->sk_error_report(sk);
1117         } else  { /* Only an error on timeout */
1118                 sk->sk_err_soft = err;
1119         }
1120 
1121 out:
1122         bh_unlock_sock(sk);
1123         sock_put(sk);
1124 }
1125 
1126 /* This routine computes an IPv4 TCP checksum. */
1127 void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len,
1128                        struct sk_buff *skb)
1129 {
1130         struct inet_sock *inet = inet_sk(sk);
1131 
1132         if (skb->ip_summed == CHECKSUM_HW) {
1133                 th->check = ~tcp_v4_check(th, len, inet->saddr, inet->daddr, 0);
1134                 skb->csum = offsetof(struct tcphdr, check);
1135         } else {
1136                 th->check = tcp_v4_check(th, len, inet->saddr, inet->daddr,
1137                                          csum_partial((char *)th,
1138                                                       th->doff << 2,
1139                                                       skb->csum));
1140         }
1141 }
1142 
1143 /*
1144  *      This routine will send an RST to the other tcp.
1145  *
1146  *      Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
1147  *                    for reset.
1148  *      Answer: if a packet caused RST, it is not for a socket
1149  *              existing in our system, if it is matched to a socket,
1150  *              it is just duplicate segment or bug in other side's TCP.
1151  *              So that we build reply only basing on parameters
1152  *              arrived with segment.
1153  *      Exception: precedence violation. We do not implement it in any case.
1154  */
1155 
1156 static void tcp_v4_send_reset(struct sk_buff *skb)
1157 {
1158         struct tcphdr *th = skb->h.th;
1159         struct tcphdr rth;
1160         struct ip_reply_arg arg;
1161 
1162         /* Never send a reset in response to a reset. */
1163         if (th->rst)
1164                 return;
1165 
1166         if (((struct rtable *)skb->dst)->rt_type != RTN_LOCAL)
1167                 return;
1168 
1169         /* Swap the send and the receive. */
1170         memset(&rth, 0, sizeof(struct tcphdr));
1171         rth.dest   = th->source;
1172         rth.source = th->dest;
1173         rth.doff   = sizeof(struct tcphdr) / 4;
1174         rth.rst    = 1;
1175 
1176         if (th->ack) {
1177                 rth.seq = th->ack_seq;
1178         } else {
1179                 rth.ack = 1;
1180                 rth.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
1181                                     skb->len - (th->doff << 2));
1182         }
1183 
1184         memset(&arg, 0, sizeof arg);
1185         arg.iov[0].iov_base = (unsigned char *)&rth;
1186         arg.iov[0].iov_len  = sizeof rth;
1187         arg.csum = csum_tcpudp_nofold(skb->nh.iph->daddr,
1188                                       skb->nh.iph->saddr, /*XXX*/
1189                                       sizeof(struct tcphdr), IPPROTO_TCP, 0);
1190         arg.csumoffset = offsetof(struct tcphdr, check) / 2;
1191 
1192         ip_send_reply(tcp_socket->sk, skb, &arg, sizeof rth);
1193 
1194         TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1195         TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
1196 }
1197 
1198 /* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
1199    outside socket context is ugly, certainly. What can I do?
1200  */
1201 
1202 static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
1203                             u32 win, u32 ts)
1204 {
1205         struct tcphdr *th = skb->h.th;
1206         struct {
1207                 struct tcphdr th;
1208                 u32 tsopt[3];
1209         } rep;
1210         struct ip_reply_arg arg;
1211 
1212         memset(&rep.th, 0, sizeof(struct tcphdr));
1213         memset(&arg, 0, sizeof arg);
1214 
1215         arg.iov[0].iov_base = (unsigned char *)&rep;
1216         arg.iov[0].iov_len  = sizeof(rep.th);
1217         if (ts) {
1218                 rep.tsopt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1219                                      (TCPOPT_TIMESTAMP << 8) |
1220                                      TCPOLEN_TIMESTAMP);
1221                 rep.tsopt[1] = htonl(tcp_time_stamp);
1222                 rep.tsopt[2] = htonl(ts);
1223                 arg.iov[0].iov_len = sizeof(rep);
1224         }
1225 
1226         /* Swap the send and the receive. */
1227         rep.th.dest    = th->source;
1228         rep.th.source  = th->dest;
1229         rep.th.doff    = arg.iov[0].iov_len / 4;
1230         rep.th.seq     = htonl(seq);
1231         rep.th.ack_seq = htonl(ack);
1232         rep.th.ack     = 1;
1233         rep.th.window  = htons(win);
1234 
1235         arg.csum = csum_tcpudp_nofold(skb->nh.iph->daddr,
1236                                       skb->nh.iph->saddr, /*XXX*/
1237                                       arg.iov[0].iov_len, IPPROTO_TCP, 0);
1238         arg.csumoffset = offsetof(struct tcphdr, check) / 2;
1239 
1240         ip_send_reply(tcp_socket->sk, skb, &arg, arg.iov[0].iov_len);
1241 
1242         TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1243 }
1244 
1245 static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
1246 {
1247         struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
1248 
1249         tcp_v4_send_ack(skb, tw->tw_snd_nxt, tw->tw_rcv_nxt,
1250                         tw->tw_rcv_wnd >> tw->tw_rcv_wscale, tw->tw_ts_recent);
1251 
1252         tcp_tw_put(tw);
1253 }
1254 
1255 static void tcp_v4_or_send_ack(struct sk_buff *skb, struct open_request *req)
1256 {
1257         tcp_v4_send_ack(skb, req->snt_isn + 1, req->rcv_isn + 1, req->rcv_wnd,
1258                         req->ts_recent);
1259 }
1260 
1261 static struct dst_entry* tcp_v4_route_req(struct sock *sk,
1262                                           struct open_request *req)
1263 {
1264         struct rtable *rt;
1265         struct ip_options *opt = req->af.v4_req.opt;
1266         struct flowi fl = { .oif = sk->sk_bound_dev_if,
1267                             .nl_u = { .ip4_u =
1268                                       { .daddr = ((opt && opt->srr) ?
1269                                                   opt->faddr :
1270                                                   req->af.v4_req.rmt_addr),
1271                                         .saddr = req->af.v4_req.loc_addr,
1272                                         .tos = RT_CONN_FLAGS(sk) } },
1273                             .proto = IPPROTO_TCP,
1274                             .uli_u = { .ports =
1275                                        { .sport = inet_sk(sk)->sport,
1276                                          .dport = req->rmt_port } } };
1277 
1278         if (ip_route_output_flow(&rt, &fl, sk, 0)) {
1279                 IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
1280                 return NULL;
1281         }
1282         if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) {
1283                 ip_rt_put(rt);
1284                 IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
1285                 return NULL;
1286         }
1287         return &rt->u.dst;
1288 }
1289 
1290 /*
1291  *      Send a SYN-ACK after having received an ACK.
1292  *      This still operates on a open_request only, not on a big
1293  *      socket.
1294  */
1295 static int tcp_v4_send_synack(struct sock *sk, struct open_request *req,
1296                               struct dst_entry *dst)
1297 {
1298         int err = -1;
1299         struct sk_buff * skb;
1300 
1301         /* First, grab a route. */
1302         if (!dst && (dst = tcp_v4_route_req(sk, req)) == NULL)
1303                 goto out;
1304 
1305         skb = tcp_make_synack(sk, dst, req);
1306 
1307         if (skb) {
1308                 struct tcphdr *th = skb->h.th;
1309 
1310                 th->check = tcp_v4_check(th, skb->len,
1311                                          req->af.v4_req.loc_addr,
1312                                          req->af.v4_req.rmt_addr,
1313                                          csum_partial((char *)th, skb->len,
1314                                                       skb->csum));
1315 
1316                 err = ip_build_and_send_pkt(skb, sk, req->af.v4_req.loc_addr,
1317                                             req->af.v4_req.rmt_addr,
1318                                             req->af.v4_req.opt);
1319                 if (err == NET_XMIT_CN)
1320                         err = 0;
1321         }
1322 
1323 out:
1324         dst_release(dst);
1325         return err;
1326 }
1327 
1328 /*
1329  *      IPv4 open_request destructor.
1330  */
1331 static void tcp_v4_or_free(struct open_request *req)
1332 {
1333         if (req->af.v4_req.opt)
1334                 kfree(req->af.v4_req.opt);
1335 }
1336 
1337 static inline void syn_flood_warning(struct sk_buff *skb)
1338 {
1339         static unsigned long warntime;
1340 
1341         if (time_after(jiffies, (warntime + HZ * 60))) {
1342                 warntime = jiffies;
1343                 printk(KERN_INFO
1344                        "possible SYN flooding on port %d. Sending cookies.\n",
1345                        ntohs(skb->h.th->dest));
1346         }
1347 }
1348 
1349 /*
1350  * Save and compile IPv4 options into the open_request if needed.
1351  */
1352 static inline struct ip_options *tcp_v4_save_options(struct sock *sk,
1353                                                      struct sk_buff *skb)
1354 {
1355         struct ip_options *opt = &(IPCB(skb)->opt);
1356         struct ip_options *dopt = NULL;
1357 
1358         if (opt && opt->optlen) {
1359                 int opt_size = optlength(opt);
1360                 dopt = kmalloc(opt_size, GFP_ATOMIC);
1361                 if (dopt) {
1362                         if (ip_options_echo(dopt, skb)) {
1363                                 kfree(dopt);
1364                                 dopt = NULL;
1365                         }
1366                 }
1367         }
1368         return dopt;
1369 }
1370 
1371 /*
1372  * Maximum number of SYN_RECV sockets in queue per LISTEN socket.
1373  * One SYN_RECV socket costs about 80bytes on a 32bit machine.
1374  * It would be better to replace it with a global counter for all sockets
1375  * but then some measure against one socket starving all other sockets
1376  * would be needed.
1377  *
1378  * It was 128 by default. Experiments with real servers show, that
1379  * it is absolutely not enough even at 100conn/sec. 256 cures most
1380  * of problems. This value is adjusted to 128 for very small machines
1381  * (<=32Mb of memory) and to 1024 on normal or better ones (>=256Mb).
1382  * Further increasing requires to change hash table size.
1383  */
1384 int sysctl_max_syn_backlog = 256;
1385 
1386 struct or_calltable or_ipv4 = {
1387         .family         =       PF_INET,
1388         .rtx_syn_ack    =       tcp_v4_send_synack,
1389         .send_ack       =       tcp_v4_or_send_ack,
1390         .destructor     =       tcp_v4_or_free,
1391         .send_reset     =       tcp_v4_send_reset,
1392 };
1393 
1394 int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1395 {
1396         struct tcp_options_received tmp_opt;
1397         struct open_request *req;
1398         __u32 saddr = skb->nh.iph->saddr;
1399         __u32 daddr = skb->nh.iph->daddr;
1400         __u32 isn = TCP_SKB_CB(skb)->when;
1401         struct dst_entry *dst = NULL;
1402 #ifdef CONFIG_SYN_COOKIES
1403         int want_cookie = 0;
1404 #else
1405 #define want_cookie 0 /* Argh, why doesn't gcc optimize this :( */
1406 #endif
1407 
1408         /* Never answer to SYNs send to broadcast or multicast */
1409         if (((struct rtable *)skb->dst)->rt_flags &
1410             (RTCF_BROADCAST | RTCF_MULTICAST))
1411                 goto drop;
1412 
1413         /* TW buckets are converted to open requests without
1414          * limitations, they conserve resources and peer is
1415          * evidently real one.
1416          */
1417         if (tcp_synq_is_full(sk) && !isn) {
1418 #ifdef CONFIG_SYN_COOKIES
1419                 if (sysctl_tcp_syncookies) {
1420                         want_cookie = 1;
1421                 } else
1422 #endif
1423                 goto drop;
1424         }
1425 
1426         /* Accept backlog is full. If we have already queued enough
1427          * of warm entries in syn queue, drop request. It is better than
1428          * clogging syn queue with openreqs with exponentially increasing
1429          * timeout.
1430          */
1431         if (sk_acceptq_is_full(sk) && tcp_synq_young(sk) > 1)
1432                 goto drop;
1433 
1434         req = tcp_openreq_alloc();
1435         if (!req)
1436                 goto drop;
1437 
1438         tcp_clear_options(&tmp_opt);
1439         tmp_opt.mss_clamp = 536;
1440         tmp_opt.user_mss  = tcp_sk(sk)->rx_opt.user_mss;
1441 
1442         tcp_parse_options(skb, &tmp_opt, 0);
1443 
1444         if (want_cookie) {
1445                 tcp_clear_options(&tmp_opt);
1446                 tmp_opt.saw_tstamp = 0;
1447         }
1448 
1449         if (tmp_opt.saw_tstamp && !tmp_opt.rcv_tsval) {
1450                 /* Some OSes (unknown ones, but I see them on web server, which
1451                  * contains information interesting only for windows'
1452                  * users) do not send their stamp in SYN. It is easy case.
1453                  * We simply do not advertise TS support.
1454                  */
1455                 tmp_opt.saw_tstamp = 0;
1456                 tmp_opt.tstamp_ok  = 0;
1457         }
1458         tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1459 
1460         tcp_openreq_init(req, &tmp_opt, skb);
1461 
1462         req->af.v4_req.loc_addr = daddr;
1463         req->af.v4_req.rmt_addr = saddr;
1464         req->af.v4_req.opt = tcp_v4_save_options(sk, skb);
1465         req->class = &or_ipv4;
1466         if (!want_cookie)
1467                 TCP_ECN_create_request(req, skb->h.th);
1468 
1469         if (want_cookie) {
1470 #ifdef CONFIG_SYN_COOKIES
1471                 syn_flood_warning(skb);
1472 #endif
1473                 isn = cookie_v4_init_sequence(sk, skb, &req->mss);
1474         } else if (!isn) {
1475                 struct inet_peer *peer = NULL;
1476 
1477                 /* VJ's idea. We save last timestamp seen
1478                  * from the destination in peer table, when entering
1479                  * state TIME-WAIT, and check against it before
1480                  * accepting new connection request.
1481                  *
1482                  * If "isn" is not zero, this request hit alive
1483                  * timewait bucket, so that all the necessary checks
1484                  * are made in the function processing timewait state.
1485                  */
1486                 if (tmp_opt.saw_tstamp &&
1487                     sysctl_tcp_tw_recycle &&
1488                     (dst = tcp_v4_route_req(sk, req)) != NULL &&
1489                     (peer = rt_get_peer((struct rtable *)dst)) != NULL &&
1490                     peer->v4daddr == saddr) {
1491                         if (xtime.tv_sec < peer->tcp_ts_stamp + TCP_PAWS_MSL &&
1492                             (s32)(peer->tcp_ts - req->ts_recent) >
1493                                                         TCP_PAWS_WINDOW) {
1494                                 NET_INC_STATS_BH(LINUX_MIB_PAWSPASSIVEREJECTED);
1495                                 dst_release(dst);
1496                                 goto drop_and_free;
1497                         }
1498                 }
1499                 /* Kill the following clause, if you dislike this way. */
1500                 else if (!sysctl_tcp_syncookies &&
1501                          (sysctl_max_syn_backlog - tcp_synq_len(sk) <
1502                           (sysctl_max_syn_backlog >> 2)) &&
1503                          (!peer || !peer->tcp_ts_stamp) &&
1504                          (!dst || !dst_metric(dst, RTAX_RTT))) {
1505                         /* Without syncookies last quarter of
1506                          * backlog is filled with destinations,
1507                          * proven to be alive.
1508                          * It means that we continue to communicate
1509                          * to destinations, already remembered
1510                          * to the moment of synflood.
1511                          */
1512                         NETDEBUG(if (net_ratelimit()) \
1513                                         printk(KERN_DEBUG "TCP: drop open "
1514                                                           "request from %u.%u."
1515                                                           "%u.%u/%u\n", \
1516                                                NIPQUAD(saddr),
1517                                                ntohs(skb->h.th->source)));
1518                         dst_release(dst);
1519                         goto drop_and_free;
1520                 }
1521 
1522                 isn = tcp_v4_init_sequence(sk, skb);
1523         }
1524         req->snt_isn = isn;
1525 
1526         if (tcp_v4_send_synack(sk, req, dst))
1527                 goto drop_and_free;
1528 
1529         if (want_cookie) {
1530                 tcp_openreq_free(req);
1531         } else {
1532                 tcp_v4_synq_add(sk, req);
1533         }
1534         return 0;
1535 
1536 drop_and_free:
1537         tcp_openreq_free(req);
1538 drop:
1539         TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
1540         return 0;
1541 }
1542 
1543 
1544 /*
1545  * The three way handshake has completed - we got a valid synack -
1546  * now create the new socket.
1547  */
1548 struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1549                                   struct open_request *req,
1550                                   struct dst_entry *dst)
1551 {
1552         struct inet_sock *newinet;
1553         struct tcp_sock *newtp;
1554         struct sock *newsk;
1555 
1556         if (sk_acceptq_is_full(sk))
1557                 goto exit_overflow;
1558 
1559         if (!dst && (dst = tcp_v4_route_req(sk, req)) == NULL)
1560                 goto exit;
1561 
1562         newsk = tcp_create_openreq_child(sk, req, skb);
1563         if (!newsk)
1564                 goto exit;
1565 
1566         newsk->sk_dst_cache = dst;
1567         tcp_v4_setup_caps(newsk, dst);
1568 
1569         newtp                 = tcp_sk(newsk);
1570         newinet               = inet_sk(newsk);
1571         newinet->daddr        = req->af.v4_req.rmt_addr;
1572         newinet->rcv_saddr    = req->af.v4_req.loc_addr;
1573         newinet->saddr        = req->af.v4_req.loc_addr;
1574         newinet->opt          = req->af.v4_req.opt;
1575         req->af.v4_req.opt    = NULL;
1576         newinet->mc_index     = tcp_v4_iif(skb);
1577         newinet->mc_ttl       = skb->nh.iph->ttl;
1578         newtp->ext_header_len = 0;
1579         if (newinet->opt)
1580                 newtp->ext_header_len = newinet->opt->optlen;
1581         newtp->ext2_header_len = dst->header_len;
1582         newinet->id = newtp->write_seq ^ jiffies;
1583 
1584         tcp_sync_mss(newsk, dst_pmtu(dst));
1585         newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1586         tcp_initialize_rcv_mss(newsk);
1587 
1588         __tcp_v4_hash(newsk, 0);
1589         __tcp_inherit_port(sk, newsk);
1590 
1591         return newsk;
1592 
1593 exit_overflow:
1594         NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
1595 exit:
1596         NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
1597         dst_release(dst);
1598         return NULL;
1599 }
1600 
1601 static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1602 {
1603         struct tcphdr *th = skb->h.th;
1604         struct iphdr *iph = skb->nh.iph;
1605         struct tcp_sock *tp = tcp_sk(sk);
1606         struct sock *nsk;
1607         struct open_request **prev;
1608         /* Find possible connection requests. */
1609         struct open_request *req = tcp_v4_search_req(tp, &prev, th->source,
1610                                                      iph->saddr, iph->daddr);
1611         if (req)
1612                 return tcp_check_req(sk, skb, req, prev);
1613 
1614         nsk = __tcp_v4_lookup_established(skb->nh.iph->saddr,
1615                                           th->source,
1616                                           skb->nh.iph->daddr,
1617                                           ntohs(th->dest),
1618                                           tcp_v4_iif(skb));
1619 
1620         if (nsk) {
1621                 if (nsk->sk_state != TCP_TIME_WAIT) {
1622                         bh_lock_sock(nsk);
1623                         return nsk;
1624                 }
1625                 tcp_tw_put((struct tcp_tw_bucket *)nsk);
1626                 return NULL;
1627         }
1628 
1629 #ifdef CONFIG_SYN_COOKIES
1630         if (!th->rst && !th->syn && th->ack)
1631                 sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1632 #endif
1633         return sk;
1634 }
1635 
1636 static int tcp_v4_checksum_init(struct sk_buff *skb)
1637 {
1638         if (skb->ip_summed == CHECKSUM_HW) {
1639                 skb->ip_summed = CHECKSUM_UNNECESSARY;
1640                 if (!tcp_v4_check(skb->h.th, skb->len, skb->nh.iph->saddr,
1641                                   skb->nh.iph->daddr, skb->csum))
1642                         return 0;
1643 
1644                 NETDEBUG(if (net_ratelimit())
1645                                 printk(KERN_DEBUG "hw tcp v4 csum failed\n"));
1646                 skb->ip_summed = CHECKSUM_NONE;
1647         }
1648         if (skb->len <= 76) {
1649                 if (tcp_v4_check(skb->h.th, skb->len, skb->nh.iph->saddr,
1650                                  skb->nh.iph->daddr,
1651                                  skb_checksum(skb, 0, skb->len, 0)))
1652                         return -1;
1653                 skb->ip_summed = CHECKSUM_UNNECESSARY;
1654         } else {
1655                 skb->csum = ~tcp_v4_check(skb->h.th, skb->len,
1656                                           skb->nh.iph->saddr,
1657                                           skb->nh.iph->daddr, 0);
1658         }
1659         return 0;
1660 }
1661 
1662 
1663 /* The socket must have it's spinlock held when we get
1664  * here.
1665  *
1666  * We have a potential double-lock case here, so even when
1667  * doing backlog processing we use the BH locking scheme.
1668  * This is because we cannot sleep with the original spinlock
1669  * held.
1670  */
1671 int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1672 {
1673         if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1674                 TCP_CHECK_TIMER(sk);
1675                 if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
1676                         goto reset;
1677                 TCP_CHECK_TIMER(sk);
1678                 return 0;
1679         }
1680 
1681         if (skb->len < (skb->h.th->doff << 2) || tcp_checksum_complete(skb))
1682                 goto csum_err;
1683 
1684         if (sk->sk_state == TCP_LISTEN) {
1685                 struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1686                 if (!nsk)
1687                         goto discard;
1688 
1689                 if (nsk != sk) {
1690                         if (tcp_child_process(sk, nsk, skb))
1691                                 goto reset;
1692                         return 0;
1693                 }
1694         }
1695 
1696         TCP_CHECK_TIMER(sk);
1697         if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
1698                 goto reset;
1699         TCP_CHECK_TIMER(sk);
1700         return 0;
1701 
1702 reset:
1703         tcp_v4_send_reset(skb);
1704 discard:
1705         kfree_skb(skb);
1706         /* Be careful here. If this function gets more complicated and
1707          * gcc suffers from register pressure on the x86, sk (in %ebx)
1708          * might be destroyed here. This current version compiles correctly,
1709          * but you have been warned.
1710          */
1711         return 0;
1712 
1713 csum_err:
1714         TCP_INC_STATS_BH(TCP_MIB_INERRS);
1715         goto discard;
1716 }
1717 
1718 /*
1719  *      From tcp_input.c
1720  */
1721 
1722 int tcp_v4_rcv(struct sk_buff *skb)
1723 {
1724         struct tcphdr *th;
1725         struct sock *sk;
1726         int ret;
1727 
1728         if (skb->pkt_type != PACKET_HOST)
1729                 goto discard_it;
1730 
1731         /* Count it even if it's bad */
1732         TCP_INC_STATS_BH(TCP_MIB_INSEGS);
1733 
1734         if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1735                 goto discard_it;
1736 
1737         th = skb->h.th;
1738 
1739         if (th->doff < sizeof(struct tcphdr) / 4)
1740                 goto bad_packet;
1741         if (!pskb_may_pull(skb, th->doff * 4))
1742                 goto discard_it;
1743 
1744         /* An explanation is required here, I think.
1745          * Packet length and doff are validated by header prediction,
1746          * provided case of th->doff==0 is elimineted.
1747          * So, we defer the checks. */
1748         if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
1749              tcp_v4_checksum_init(skb) < 0))
1750                 goto bad_packet;
1751 
1752         th = skb->h.th;
1753         TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1754         TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1755                                     skb->len - th->doff * 4);
1756         TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1757         TCP_SKB_CB(skb)->when    = 0;
1758         TCP_SKB_CB(skb)->flags   = skb->nh.iph->tos;
1759         TCP_SKB_CB(skb)->sacked  = 0;
1760 
1761         sk = __tcp_v4_lookup(skb->nh.iph->saddr, th->source,
1762                              skb->nh.iph->daddr, ntohs(th->dest),
1763                              tcp_v4_iif(skb));
1764 
1765         if (!sk)
1766                 goto no_tcp_socket;
1767 
1768 process:
1769         if (sk->sk_state == TCP_TIME_WAIT)
1770                 goto do_time_wait;
1771 
1772         if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1773                 goto discard_and_relse;
1774 
1775         if (sk_filter(sk, skb, 0))
1776                 goto discard_and_relse;
1777 
1778         skb->dev = NULL;
1779 
1780         bh_lock_sock(sk);
1781         ret = 0;
1782         if (!sock_owned_by_user(sk)) {
1783                 if (!tcp_prequeue(sk, skb))
1784                         ret = tcp_v4_do_rcv(sk, skb);
1785         } else
1786                 sk_add_backlog(sk, skb);
1787         bh_unlock_sock(sk);
1788 
1789         sock_put(sk);
1790 
1791         return ret;
1792 
1793 no_tcp_socket:
1794         if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1795                 goto discard_it;
1796 
1797         if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1798 bad_packet:
1799                 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1800         } else {
1801                 tcp_v4_send_reset(skb);
1802         }
1803 
1804 discard_it:
1805         /* Discard frame. */
1806         kfree_skb(skb);
1807         return 0;
1808 
1809 discard_and_relse:
1810         sock_put(sk);
1811         goto discard_it;
1812 
1813 do_time_wait:
1814         if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1815                 tcp_tw_put((struct tcp_tw_bucket *) sk);
1816                 goto discard_it;
1817         }
1818 
1819         if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1820                 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1821                 tcp_tw_put((struct tcp_tw_bucket *) sk);
1822                 goto discard_it;
1823         }
1824         switch (tcp_timewait_state_process((struct tcp_tw_bucket *)sk,
1825                                            skb, th, skb->len)) {
1826         case TCP_TW_SYN: {
1827                 struct sock *sk2 = tcp_v4_lookup_listener(skb->nh.iph->daddr,
1828                                                           ntohs(th->dest),
1829                                                           tcp_v4_iif(skb));
1830                 if (sk2) {
1831                         tcp_tw_deschedule((struct tcp_tw_bucket *)sk);
1832                         tcp_tw_put((struct tcp_tw_bucket *)sk);
1833                         sk = sk2;
1834                         goto process;
1835                 }
1836                 /* Fall through to ACK */
1837         }
1838         case TCP_TW_ACK:
1839                 tcp_v4_timewait_ack(sk, skb);
1840                 break;
1841         case TCP_TW_RST:
1842                 goto no_tcp_socket;
1843         case TCP_TW_SUCCESS:;
1844         }
1845         goto discard_it;
1846 }
1847 
1848 /* With per-bucket locks this operation is not-atomic, so that
1849  * this version is not worse.
1850  */
1851 static void __tcp_v4_rehash(struct sock *sk)
1852 {
1853         sk->sk_prot->unhash(sk);
1854         sk->sk_prot->hash(sk);
1855 }
1856 
1857 static int tcp_v4_reselect_saddr(struct sock *sk)
1858 {
1859         struct inet_sock *inet = inet_sk(sk);
1860         int err;
1861         struct rtable *rt;
1862         __u32 old_saddr = inet->saddr;
1863         __u32 new_saddr;
1864         __u32 daddr = inet->daddr;
1865 
1866         if (inet->opt && inet->opt->srr)
1867                 daddr = inet->opt->faddr;
1868 
1869         /* Query new route. */
1870         err = ip_route_connect(&rt, daddr, 0,
1871                                RT_TOS(inet->tos) | sk->sk_localroute,
1872                                sk->sk_bound_dev_if,
1873                                IPPROTO_TCP,
1874                                inet->sport, inet->dport, sk);
1875         if (err)
1876                 return err;
1877 
1878         __sk_dst_set(sk, &rt->u.dst);
1879         tcp_v4_setup_caps(sk, &rt->u.dst);
1880         tcp_sk(sk)->ext2_header_len = rt->u.dst.header_len;
1881 
1882         new_saddr = rt->rt_src;
1883 
1884         if (new_saddr == old_saddr)
1885                 return 0;
1886 
1887         if (sysctl_ip_dynaddr > 1) {
1888                 printk(KERN_INFO "tcp_v4_rebuild_header(): shifting inet->"
1889                                  "saddr from %d.%d.%d.%d to %d.%d.%d.%d\n",
1890                        NIPQUAD(old_saddr),
1891                        NIPQUAD(new_saddr));
1892         }
1893 
1894         inet->saddr = new_saddr;
1895         inet->rcv_saddr = new_saddr;
1896 
1897         /* XXX The only one ugly spot where we need to
1898          * XXX really change the sockets identity after
1899          * XXX it has entered the hashes. -DaveM
1900          *
1901          * Besides that, it does not check for connection
1902          * uniqueness. Wait for troubles.
1903          */
1904         __tcp_v4_rehash(sk);
1905         return 0;
1906 }
1907 
1908 int tcp_v4_rebuild_header(struct sock *sk)
1909 {
1910         struct inet_sock *inet = inet_sk(sk);
1911         struct rtable *rt = (struct rtable *)__sk_dst_check(sk, 0);
1912         u32 daddr;
1913         int err;
1914 
1915         /* Route is OK, nothing to do. */
1916         if (rt)
1917                 return 0;
1918 
1919         /* Reroute. */
1920         daddr = inet->daddr;
1921         if (inet->opt && inet->opt->srr)
1922                 daddr = inet->opt->faddr;
1923 
1924         {
1925                 struct flowi fl = { .oif = sk->sk_bound_dev_if,
1926                                     .nl_u = { .ip4_u =
1927                                               { .daddr = daddr,
1928                                                 .saddr = inet->saddr,
1929                                                 .tos = RT_CONN_FLAGS(sk) } },
1930                                     .proto = IPPROTO_TCP,
1931                                     .uli_u = { .ports =
1932                                                { .sport = inet->sport,
1933                                                  .dport = inet->dport } } };
1934                                                 
1935                 err = ip_route_output_flow(&rt, &fl, sk, 0);
1936         }
1937         if (!err) {
1938                 __sk_dst_set(sk, &rt->u.dst);
1939                 tcp_v4_setup_caps(sk, &rt->u.dst);
1940                 tcp_sk(sk)->ext2_header_len = rt->u.dst.header_len;
1941                 return 0;
1942         }
1943 
1944         /* Routing failed... */
1945         sk->sk_route_caps = 0;
1946 
1947         if (!sysctl_ip_dynaddr ||
1948             sk->sk_state != TCP_SYN_SENT ||
1949             (sk->sk_userlocks & SOCK_BINDADDR_LOCK) ||
1950             (err = tcp_v4_reselect_saddr(sk)) != 0)
1951                 sk->sk_err_soft = -err;
1952 
1953         return err;
1954 }
1955 
1956 static void v4_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
1957 {
1958         struct sockaddr_in *sin = (struct sockaddr_in *) uaddr;
1959         struct inet_sock *inet = inet_sk(sk);
1960 
1961         sin->sin_family         = AF_INET;
1962         sin->sin_addr.s_addr    = inet->daddr;
1963         sin->sin_port           = inet->dport;
1964 }
1965 
1966 /* VJ's idea. Save last timestamp seen from this destination
1967  * and hold it at least for normal timewait interval to use for duplicate
1968  * segment detection in subsequent connections, before they enter synchronized
1969  * state.
1970  */
1971 
1972 int tcp_v4_remember_stamp(struct sock *sk)
1973 {
1974         struct inet_sock *inet = inet_sk(sk);
1975         struct tcp_sock *tp = tcp_sk(sk);
1976         struct rtable *rt = (struct rtable *)__sk_dst_get(sk);
1977         struct inet_peer *peer = NULL;
1978         int release_it = 0;
1979 
1980         if (!rt || rt->rt_dst != inet->daddr) {
1981                 peer = inet_getpeer(inet->daddr, 1);
1982                 release_it = 1;
1983         } else {
1984                 if (!rt->peer)
1985                         rt_bind_peer(rt, 1);
1986                 peer = rt->peer;
1987         }
1988 
1989         if (peer) {
1990                 if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 ||
1991                     (peer->tcp_ts_stamp + TCP_PAWS_MSL < xtime.tv_sec &&
1992                      peer->tcp_ts_stamp <= tp->rx_opt.ts_recent_stamp)) {
1993                         peer->tcp_ts_stamp = tp->rx_opt.ts_recent_stamp;
1994                         peer->tcp_ts = tp->rx_opt.ts_recent;
1995                 }
1996                 if (release_it)
1997                         inet_putpeer(peer);
1998                 return 1;
1999         }
2000 
2001         return 0;
2002 }
2003 
2004 int tcp_v4_tw_remember_stamp(struct tcp_tw_bucket *tw)
2005 {
2006         struct inet_peer *peer = NULL;
2007 
2008         peer = inet_getpeer(tw->tw_daddr, 1);
2009 
2010         if (peer) {
2011                 if ((s32)(peer->tcp_ts - tw->tw_ts_recent) <= 0 ||
2012                     (peer->tcp_ts_stamp + TCP_PAWS_MSL < xtime.tv_sec &&
2013                      peer->tcp_ts_stamp <= tw->tw_ts_recent_stamp)) {
2014                         peer->tcp_ts_stamp = tw->tw_ts_recent_stamp;
2015                         peer->tcp_ts = tw->tw_ts_recent;
2016                 }
2017                 inet_putpeer(peer);
2018                 return 1;
2019         }
2020 
2021         return 0;
2022 }
2023 
2024 struct tcp_func ipv4_specific = {
2025         .queue_xmit     =       ip_queue_xmit,
2026         .send_check     =       tcp_v4_send_check,
2027         .rebuild_header =       tcp_v4_rebuild_header,
2028         .conn_request   =       tcp_v4_conn_request,
2029         .syn_recv_sock  =       tcp_v4_syn_recv_sock,
2030         .remember_stamp =       tcp_v4_remember_stamp,
2031         .net_header_len =       sizeof(struct iphdr),
2032         .setsockopt     =       ip_setsockopt,
2033         .getsockopt     =       ip_getsockopt,
2034         .addr2sockaddr  =       v4_addr2sockaddr,
2035         .sockaddr_len   =       sizeof(struct sockaddr_in),
2036 };
2037 
2038 /* NOTE: A lot of things set to zero explicitly by call to
2039  *       sk_alloc() so need not be done here.
2040  */
2041 static int tcp_v4_init_sock(struct sock *sk)
2042 {
2043         struct tcp_sock *tp = tcp_sk(sk);
2044 
2045         skb_queue_head_init(&tp->out_of_order_queue);
2046         tcp_init_xmit_timers(sk);
2047         tcp_prequeue_init(tp);
2048 
2049         tp->rto  = TCP_TIMEOUT_INIT;
2050         tp->mdev = TCP_TIMEOUT_INIT;
2051 
2052         /* So many TCP implementations out there (incorrectly) count the
2053          * initial SYN frame in their delayed-ACK and congestion control
2054          * algorithms that we must have the following bandaid to talk
2055          * efficiently to them.  -DaveM
2056          */
2057         tp->snd_cwnd = 2;
2058 
2059         /* See draft-stevens-tcpca-spec-01 for discussion of the
2060          * initialization of these values.
2061          */
2062         tp->snd_ssthresh = 0x7fffffff;  /* Infinity */
2063         tp->snd_cwnd_clamp = ~0;
2064         tp->mss_cache_std = tp->mss_cache = 536;
2065 
2066         tp->reordering = sysctl_tcp_reordering;
2067 
2068         sk->sk_state = TCP_CLOSE;
2069 
2070         sk->sk_write_space = sk_stream_write_space;
2071         sk->sk_use_write_queue = 1;
2072 
2073         tp->af_specific = &ipv4_specific;
2074 
2075         sk->sk_sndbuf = sysctl_tcp_wmem[1];
2076         sk->sk_rcvbuf = sysctl_tcp_rmem[1];
2077 
2078         atomic_inc(&tcp_sockets_allocated);
2079 
2080         return 0;
2081 }
2082 
2083 int tcp_v4_destroy_sock(struct sock *sk)
2084 {
2085         struct tcp_sock *tp = tcp_sk(sk);
2086 
2087         tcp_clear_xmit_timers(sk);
2088 
2089         /* Cleanup up the write buffer. */
2090         sk_stream_writequeue_purge(sk);
2091 
2092         /* Cleans up our, hopefully empty, out_of_order_queue. */
2093         __skb_queue_purge(&tp->out_of_order_queue);
2094 
2095         /* Clean prequeue, it must be empty really */
2096         __skb_queue_purge(&tp->ucopy.prequeue);
2097 
2098         /* Clean up a referenced TCP bind bucket. */
2099         if (tp->bind_hash)
2100                 tcp_put_port(sk);
2101 
2102         /*
2103          * If sendmsg cached page exists, toss it.
2104          */
2105         if (sk->sk_sndmsg_page) {
2106                 __free_page(sk->sk_sndmsg_page);
2107                 sk->sk_sndmsg_page = NULL;
2108         }
2109 
2110         atomic_dec(&tcp_sockets_allocated);
2111 
2112         return 0;
2113 }
2114 
2115 EXPORT_SYMBOL(tcp_v4_destroy_sock);
2116 
2117 #ifdef CONFIG_PROC_FS
2118 /* Proc filesystem TCP sock list dumping. */
2119 
2120 static inline struct tcp_tw_bucket *tw_head(struct hlist_head *head)
2121 {
2122         return hlist_empty(head) ? NULL :
2123                 list_entry(head->first, struct tcp_tw_bucket, tw_node);
2124 }
2125 
2126 static inline struct tcp_tw_bucket *tw_next(struct tcp_tw_bucket *tw)
2127 {
2128         return tw->tw_node.next ?
2129                 hlist_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
2130 }
2131 
2132 static void *listening_get_next(struct seq_file *seq, void *cur)
2133 {
2134         struct tcp_sock *tp;
2135         struct hlist_node *node;
2136         struct sock *sk = cur;
2137         struct tcp_iter_state* st = seq->private;
2138 
2139         if (!sk) {
2140                 st->bucket = 0;
2141                 sk = sk_head(&tcp_listening_hash[0]);
2142                 goto get_sk;
2143         }
2144 
2145         ++st->num;
2146 
2147         if (st->state == TCP_SEQ_STATE_OPENREQ) {
2148                 struct open_request *req = cur;
2149 
2150                 tp = tcp_sk(st->syn_wait_sk);
2151                 req = req->dl_next;
2152                 while (1) {
2153                         while (req) {
2154                                 if (req->class->family == st->family) {
2155                                         cur = req;
2156                                         goto out;
2157                                 }
2158                                 req = req->dl_next;
2159                         }
2160                         if (++st->sbucket >= TCP_SYNQ_HSIZE)
2161                                 break;
2162 get_req:
2163                         req = tp->listen_opt->syn_table[st->sbucket];
2164                 }
2165                 sk        = sk_next(st->syn_wait_sk);
2166                 st->state = TCP_SEQ_STATE_LISTENING;
2167                 read_unlock_bh(&tp->syn_wait_lock);
2168         } else {
2169                 tp = tcp_sk(sk);
2170                 read_lock_bh(&tp->syn_wait_lock);
2171                 if (tp->listen_opt && tp->listen_opt->qlen)
2172                         goto start_req;
2173                 read_unlock_bh(&tp->syn_wait_lock);
2174                 sk = sk_next(sk);
2175         }
2176 get_sk:
2177         sk_for_each_from(sk, node) {
2178                 if (sk->sk_family == st->family) {
2179                         cur = sk;
2180                         goto out;
2181                 }
2182                 tp = tcp_sk(sk);
2183                 read_lock_bh(&tp->syn_wait_lock);
2184                 if (tp->listen_opt && tp->listen_opt->qlen) {
2185 start_req:
2186                         st->uid         = sock_i_uid(sk);
2187                         st->syn_wait_sk = sk;
2188                         st->state       = TCP_SEQ_STATE_OPENREQ;
2189                         st->sbucket     = 0;
2190                         goto get_req;
2191                 }
2192                 read_unlock_bh(&tp->syn_wait_lock);
2193         }
2194         if (++st->bucket < TCP_LHTABLE_SIZE) {
2195                 sk = sk_head(&tcp_listening_hash[st->bucket]);
2196                 goto get_sk;
2197         }
2198         cur = NULL;
2199 out:
2200         return cur;
2201 }
2202 
2203 static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
2204 {
2205         void *rc = listening_get_next(seq, NULL);
2206 
2207         while (rc && *pos) {
2208                 rc = listening_get_next(seq, rc);
2209                 --*pos;
2210         }
2211         return rc;
2212 }
2213 
2214 static void *established_get_first(struct seq_file *seq)
2215 {
2216         struct tcp_iter_state* st = seq->private;
2217         void *rc = NULL;
2218 
2219         for (st->bucket = 0; st->bucket < tcp_ehash_size; ++st->bucket) {
2220                 struct sock *sk;
2221                 struct hlist_node *node;
2222                 struct tcp_tw_bucket *tw;
2223 
2224                 /* We can reschedule _before_ having picked the target: */
2225                 cond_resched_softirq();
2226 
2227                 read_lock(&tcp_ehash[st->bucket].lock);
2228                 sk_for_each(sk, node, &tcp_ehash[st->bucket].chain) {
2229                         if (sk->sk_family != st->family) {
2230                                 continue;
2231                         }
2232                         rc = sk;
2233                         goto out;
2234                 }
2235                 st->state = TCP_SEQ_STATE_TIME_WAIT;
2236                 tw_for_each(tw, node,
2237                             &tcp_ehash[st->bucket + tcp_ehash_size].chain) {
2238                         if (tw->tw_family != st->family) {
2239                                 continue;
2240                         }
2241                         rc = tw;
2242                         goto out;
2243                 }
2244                 read_unlock(&tcp_ehash[st->bucket].lock);
2245                 st->state = TCP_SEQ_STATE_ESTABLISHED;
2246         }
2247 out:
2248         return rc;
2249 }
2250 
2251 static void *established_get_next(struct seq_file *seq, void *cur)
2252 {
2253         struct sock *sk = cur;
2254         struct tcp_tw_bucket *tw;
2255         struct hlist_node *node;
2256         struct tcp_iter_state* st = seq->private;
2257 
2258         ++st->num;
2259 
2260         if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
2261                 tw = cur;
2262                 tw = tw_next(tw);
2263 get_tw:
2264                 while (tw && tw->tw_family != st->family) {
2265                         tw = tw_next(tw);
2266                 }
2267                 if (tw) {
2268                         cur = tw;
2269                         goto out;
2270                 }
2271                 read_unlock(&tcp_ehash[st->bucket].lock);
2272                 st->state = TCP_SEQ_STATE_ESTABLISHED;
2273 
2274                 /* We can reschedule between buckets: */
2275                 cond_resched_softirq();
2276 
2277                 if (++st->bucket < tcp_ehash_size) {
2278                         read_lock(&tcp_ehash[st->bucket].lock);
2279                         sk = sk_head(&tcp_ehash[st->bucket].chain);
2280                 } else {
2281                         cur = NULL;
2282                         goto out;
2283                 }
2284         } else
2285                 sk = sk_next(sk);
2286 
2287         sk_for_each_from(sk, node) {
2288                 if (sk->sk_family == st->family)
2289                         goto found;
2290         }
2291 
2292         st->state = TCP_SEQ_STATE_TIME_WAIT;
2293         tw = tw_head(&tcp_ehash[st->bucket + tcp_ehash_size].chain);
2294         goto get_tw;
2295 found:
2296         cur = sk;
2297 out:
2298         return cur;
2299 }
2300 
2301 static void *established_get_idx(struct seq_file *seq, loff_t pos)
2302 {
2303         void *rc = established_get_first(seq);
2304 
2305         while (rc && pos) {
2306                 rc = established_get_next(seq, rc);
2307                 --pos;
2308         }               
2309         return rc;
2310 }
2311 
2312 static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2313 {
2314         void *rc;
2315         struct tcp_iter_state* st = seq->private;
2316 
2317         tcp_listen_lock();
2318         st->state = TCP_SEQ_STATE_LISTENING;
2319         rc        = listening_get_idx(seq, &pos);
2320 
2321         if (!rc) {
2322                 tcp_listen_unlock();
2323                 local_bh_disable();
2324                 st->state = TCP_SEQ_STATE_ESTABLISHED;
2325                 rc        = established_get_idx(seq, pos);
2326         }
2327 
2328         return rc;
2329 }
2330 
2331 static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2332 {
2333         struct tcp_iter_state* st = seq->private;
2334         st->state = TCP_SEQ_STATE_LISTENING;
2335         st->num = 0;
2336         return *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2337 }
2338 
2339 static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2340 {
2341         void *rc = NULL;
2342         struct tcp_iter_state* st;
2343 
2344         if (v == SEQ_START_TOKEN) {
2345                 rc = tcp_get_idx(seq, 0);
2346                 goto out;
2347         }
2348         st = seq->private;
2349 
2350         switch (st->state) {
2351         case TCP_SEQ_STATE_OPENREQ:
2352         case TCP_SEQ_STATE_LISTENING:
2353                 rc = listening_get_next(seq, v);
2354                 if (!rc) {
2355                         tcp_listen_unlock();
2356                         local_bh_disable();
2357                         st->state = TCP_SEQ_STATE_ESTABLISHED;
2358                         rc        = established_get_first(seq);
2359                 }
2360                 break;
2361         case TCP_SEQ_STATE_ESTABLISHED:
2362         case TCP_SEQ_STATE_TIME_WAIT:
2363                 rc = established_get_next(seq, v);
2364                 break;
2365         }
2366 out:
2367         ++*pos;
2368         return rc;
2369 }
2370 
2371 static void tcp_seq_stop(struct seq_file *seq, void *v)
2372 {
2373         struct tcp_iter_state* st = seq->private;
2374 
2375         switch (st->state) {
2376         case TCP_SEQ_STATE_OPENREQ:
2377                 if (v) {
2378                         struct tcp_sock *tp = tcp_sk(st->syn_wait_sk);
2379                         read_unlock_bh(&tp->syn_wait_lock);
2380                 }
2381         case TCP_SEQ_STATE_LISTENING:
2382                 if (v != SEQ_START_TOKEN)
2383                         tcp_listen_unlock();
2384                 break;
2385         case TCP_SEQ_STATE_TIME_WAIT:
2386         case TCP_SEQ_STATE_ESTABLISHED:
2387                 if (v)
2388                         read_unlock(&tcp_ehash[st->bucket].lock);
2389                 local_bh_enable();
2390                 break;
2391         }
2392 }
2393 
2394 static int tcp_seq_open(struct inode *inode, struct file *file)
2395 {
2396         struct tcp_seq_afinfo *afinfo = PDE(inode)->data;
2397         struct seq_file *seq;
2398         struct tcp_iter_state *s;
2399         int rc;
2400 
2401         if (unlikely(afinfo == NULL))
2402                 return -EINVAL;
2403 
2404         s = kmalloc(sizeof(*s), GFP_KERNEL);
2405         if (!s)
2406                 return -ENOMEM;
2407         memset(s, 0, sizeof(*s));
2408         s->family               = afinfo->family;
2409         s->seq_ops.start        = tcp_seq_start;
2410         s->seq_ops.next         = tcp_seq_next;
2411         s->seq_ops.show         = afinfo->seq_show;
2412         s->seq_ops.stop         = tcp_seq_stop;
2413 
2414         rc = seq_open(file, &s->seq_ops);
2415         if (rc)
2416                 goto out_kfree;
2417         seq          = file->private_data;
2418         seq->private = s;
2419 out:
2420         return rc;
2421 out_kfree:
2422         kfree(s);
2423         goto out;
2424 }
2425 
2426 int tcp_proc_register(struct tcp_seq_afinfo *afinfo)
2427 {
2428         int rc = 0;
2429         struct proc_dir_entry *p;
2430 
2431         if (!afinfo)
2432                 return -EINVAL;
2433         afinfo->seq_fops->owner         = afinfo->owner;
2434         afinfo->seq_fops->open          = tcp_seq_open;
2435         afinfo->seq_fops->read          = seq_read;
2436         afinfo->seq_fops->llseek        = seq_lseek;
2437         afinfo->seq_fops->release       = seq_release_private;
2438         
2439         p = proc_net_fops_create(afinfo->name, S_IRUGO, afinfo->seq_fops);
2440         if (p)
2441                 p->data = afinfo;
2442         else
2443                 rc = -ENOMEM;
2444         return rc;
2445 }
2446 
2447 void tcp_proc_unregister(struct tcp_seq_afinfo *afinfo)
2448 {
2449         if (!afinfo)
2450                 return;
2451         proc_net_remove(afinfo->name);
2452         memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops)); 
2453 }
2454 
2455 static void get_openreq4(struct sock *sk, struct open_request *req,
2456                          char *tmpbuf, int i, int uid)
2457 {
2458         int ttd = req->expires - jiffies;
2459 
2460         sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
2461                 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %p",
2462                 i,
2463                 req->af.v4_req.loc_addr,
2464                 ntohs(inet_sk(sk)->sport),
2465                 req->af.v4_req.rmt_addr,
2466                 ntohs(req->rmt_port),
2467                 TCP_SYN_RECV,
2468                 0, 0, /* could print option size, but that is af dependent. */
2469                 1,    /* timers active (only the expire timer) */
2470                 jiffies_to_clock_t(ttd),
2471                 req->retrans,
2472                 uid,
2473                 0,  /* non standard timer */
2474                 0, /* open_requests have no inode */
2475                 atomic_read(&sk->sk_refcnt),
2476                 req);
2477 }
2478 
2479 static void get_tcp4_sock(struct sock *sp, char *tmpbuf, int i)
2480 {
2481         int timer_active;
2482         unsigned long timer_expires;
2483         struct tcp_sock *tp = tcp_sk(sp);
2484         struct inet_sock *inet = inet_sk(sp);
2485         unsigned int dest = inet->daddr;
2486         unsigned int src = inet->rcv_saddr;
2487         __u16 destp = ntohs(inet->dport);
2488         __u16 srcp = ntohs(inet->sport);
2489 
2490         if (tp->pending == TCP_TIME_RETRANS) {
2491                 timer_active    = 1;
2492                 timer_expires   = tp->timeout;
2493         } else if (tp->pending == TCP_TIME_PROBE0) {
2494                 timer_active    = 4;
2495                 timer_expires   = tp->timeout;
2496         } else if (timer_pending(&sp->sk_timer)) {
2497                 timer_active    = 2;
2498                 timer_expires   = sp->sk_timer.expires;
2499         } else {
2500                 timer_active    = 0;
2501                 timer_expires = jiffies;
2502         }
2503 
2504         sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
2505                         "%08X %5d %8d %lu %d %p %u %u %u %u %d",
2506                 i, src, srcp, dest, destp, sp->sk_state,
2507                 tp->write_seq - tp->snd_una, tp->rcv_nxt - tp->copied_seq,
2508                 timer_active,
2509                 jiffies_to_clock_t(timer_expires - jiffies),
2510                 tp->retransmits,
2511                 sock_i_uid(sp),
2512                 tp->probes_out,
2513                 sock_i_ino(sp),
2514                 atomic_read(&sp->sk_refcnt), sp,
2515                 tp->rto, tp->ack.ato, (tp->ack.quick << 1) | tp->ack.pingpong,
2516                 tp->snd_cwnd,
2517                 tp->snd_ssthresh >= 0xFFFF ? -1 : tp->snd_ssthresh);
2518 }
2519 
2520 static void get_timewait4_sock(struct tcp_tw_bucket *tw, char *tmpbuf, int i)
2521 {
2522         unsigned int dest, src;
2523         __u16 destp, srcp;
2524         int ttd = tw->tw_ttd - jiffies;
2525 
2526         if (ttd < 0)
2527                 ttd = 0;
2528 
2529         dest  = tw->tw_daddr;
2530         src   = tw->tw_rcv_saddr;
2531         destp = ntohs(tw->tw_dport);
2532         srcp  = ntohs(tw->tw_sport);
2533 
2534         sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
2535                 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p",
2536                 i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2537                 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2538                 atomic_read(&tw->tw_refcnt), tw);
2539 }
2540 
2541 #define TMPSZ 150
2542 
2543 static int tcp4_seq_show(struct seq_file *seq, void *v)
2544 {
2545         struct tcp_iter_state* st;
2546         char tmpbuf[TMPSZ + 1];
2547 
2548         if (v == SEQ_START_TOKEN) {
2549                 seq_printf(seq, "%-*s\n", TMPSZ - 1,
2550                            "  sl  local_address rem_address   st tx_queue "
2551                            "rx_queue tr tm->when retrnsmt   uid  timeout "
2552                            "inode");
2553                 goto out;
2554         }
2555         st = seq->private;
2556 
2557         switch (st->state) {
2558         case TCP_SEQ_STATE_LISTENING:
2559         case TCP_SEQ_STATE_ESTABLISHED:
2560                 get_tcp4_sock(v, tmpbuf, st->num);
2561                 break;
2562         case TCP_SEQ_STATE_OPENREQ:
2563                 get_openreq4(st->syn_wait_sk, v, tmpbuf, st->num, st->uid);
2564                 break;
2565         case TCP_SEQ_STATE_TIME_WAIT:
2566                 get_timewait4_sock(v, tmpbuf, st->num);
2567                 break;
2568         }
2569         seq_printf(seq, "%-*s\n", TMPSZ - 1, tmpbuf);
2570 out:
2571         return 0;
2572 }
2573 
2574 static struct file_operations tcp4_seq_fops;
2575 static struct tcp_seq_afinfo tcp4_seq_afinfo = {
2576         .owner          = THIS_MODULE,
2577         .name           = "tcp",
2578         .family         = AF_INET,
2579         .seq_show       = tcp4_seq_show,
2580         .seq_fops       = &tcp4_seq_fops,
2581 };
2582 
2583 int __init tcp4_proc_init(void)
2584 {
2585         return tcp_proc_register(&tcp4_seq_afinfo);
2586 }
2587 
2588 void tcp4_proc_exit(void)
2589 {
2590         tcp_proc_unregister(&tcp4_seq_afinfo);
2591 }
2592 #endif /* CONFIG_PROC_FS */
2593 
2594 struct proto tcp_prot = {
2595         .name                   = "TCP",
2596         .owner                  = THIS_MODULE,
2597         .close                  = tcp_close,
2598         .connect                = tcp_v4_connect,
2599         .disconnect             = tcp_disconnect,
2600         .accept                 = tcp_accept,
2601         .ioctl                  = tcp_ioctl,
2602         .init                   = tcp_v4_init_sock,
2603         .destroy                = tcp_v4_destroy_sock,
2604         .shutdown               = tcp_shutdown,
2605         .setsockopt             = tcp_setsockopt,
2606         .getsockopt             = tcp_getsockopt,
2607         .sendmsg                = tcp_sendmsg,
2608         .recvmsg                = tcp_recvmsg,
2609         .backlog_rcv            = tcp_v4_do_rcv,
2610         .hash                   = tcp_v4_hash,
2611         .unhash                 = tcp_unhash,
2612         .get_port               = tcp_v4_get_port,
2613         .enter_memory_pressure  = tcp_enter_memory_pressure,
2614         .sockets_allocated      = &tcp_sockets_allocated,
2615         .memory_allocated       = &tcp_memory_allocated,
2616         .memory_pressure        = &tcp_memory_pressure,
2617         .sysctl_mem             = sysctl_tcp_mem,
2618         .sysctl_wmem            = sysctl_tcp_wmem,
2619         .sysctl_rmem            = sysctl_tcp_rmem,
2620         .max_header             = MAX_TCP_HEADER,
2621         .slab_obj_size          = sizeof(struct tcp_sock),
2622 };
2623 
2624 
2625 
2626 void __init tcp_v4_init(struct net_proto_family *ops)
2627 {
2628         int err = sock_create_kern(PF_INET, SOCK_RAW, IPPROTO_TCP, &tcp_socket);
2629         if (err < 0)
2630                 panic("Failed to create the TCP control socket.\n");
2631         tcp_socket->sk->sk_allocation   = GFP_ATOMIC;
2632         inet_sk(tcp_socket->sk)->uc_ttl = -1;
2633 
2634         /* Unhash it so that IP input processing does not even
2635          * see it, we do not wish this socket to see incoming
2636          * packets.
2637          */
2638         tcp_socket->sk->sk_prot->unhash(tcp_socket->sk);
2639 }
2640 
2641 EXPORT_SYMBOL(ipv4_specific);
2642 EXPORT_SYMBOL(tcp_bind_hash);
2643 EXPORT_SYMBOL(tcp_bucket_create);
2644 EXPORT_SYMBOL(tcp_hashinfo);
2645 EXPORT_SYMBOL(tcp_inherit_port);
2646 EXPORT_SYMBOL(tcp_listen_wlock);
2647 EXPORT_SYMBOL(tcp_port_rover);
2648 EXPORT_SYMBOL(tcp_prot);
2649 EXPORT_SYMBOL(tcp_put_port);
2650 EXPORT_SYMBOL(tcp_unhash);
2651 EXPORT_SYMBOL(tcp_v4_conn_request);
2652 EXPORT_SYMBOL(tcp_v4_connect);
2653 EXPORT_SYMBOL(tcp_v4_do_rcv);
2654 EXPORT_SYMBOL(tcp_v4_rebuild_header);
2655 EXPORT_SYMBOL(tcp_v4_remember_stamp);
2656 EXPORT_SYMBOL(tcp_v4_send_check);
2657 EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
2658 
2659 #ifdef CONFIG_PROC_FS
2660 EXPORT_SYMBOL(tcp_proc_register);
2661 EXPORT_SYMBOL(tcp_proc_unregister);
2662 #endif
2663 EXPORT_SYMBOL(sysctl_local_port_range);
2664 EXPORT_SYMBOL(sysctl_max_syn_backlog);
2665 EXPORT_SYMBOL(sysctl_tcp_low_latency);
2666 
2667 
  This page was automatically generated by the LXR engine.