Linux kernel & device driver programming

Cross-Referenced Linux and Device Driver Code

[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ]
Version: [ 2.6.11.8 ] [ 2.6.25 ] [ 2.6.25.8 ] [ 2.6.31.13 ] Architecture: [ i386 ]
  1 /*
  2  * ip_vs_proto_udp.c:   UDP load balancing support for IPVS
  3  *
  4  * Version:     $Id: ip_vs_proto_udp.c,v 1.3 2002/11/30 01:50:35 wensong Exp $
  5  *
  6  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
  7  *              Julian Anastasov <ja@ssi.bg>
  8  *
  9  *              This program is free software; you can redistribute it and/or
 10  *              modify it under the terms of the GNU General Public License
 11  *              as published by the Free Software Foundation; either version
 12  *              2 of the License, or (at your option) any later version.
 13  *
 14  * Changes:
 15  *
 16  */
 17 
 18 #include <linux/in.h>
 19 #include <linux/ip.h>
 20 #include <linux/kernel.h>
 21 #include <linux/netfilter.h>
 22 #include <linux/netfilter_ipv4.h>
 23 #include <linux/udp.h>
 24 
 25 #include <net/ip_vs.h>
 26 #include <net/ip.h>
 27 
 28 static struct ip_vs_conn *
 29 udp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
 30                 const struct iphdr *iph, unsigned int proto_off, int inverse)
 31 {
 32         struct ip_vs_conn *cp;
 33         __be16 _ports[2], *pptr;
 34 
 35         pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
 36         if (pptr == NULL)
 37                 return NULL;
 38 
 39         if (likely(!inverse)) {
 40                 cp = ip_vs_conn_in_get(iph->protocol,
 41                                        iph->saddr, pptr[0],
 42                                        iph->daddr, pptr[1]);
 43         } else {
 44                 cp = ip_vs_conn_in_get(iph->protocol,
 45                                        iph->daddr, pptr[1],
 46                                        iph->saddr, pptr[0]);
 47         }
 48 
 49         return cp;
 50 }
 51 
 52 
 53 static struct ip_vs_conn *
 54 udp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
 55                  const struct iphdr *iph, unsigned int proto_off, int inverse)
 56 {
 57         struct ip_vs_conn *cp;
 58         __be16 _ports[2], *pptr;
 59 
 60         pptr = skb_header_pointer(skb, ip_hdrlen(skb),
 61                                   sizeof(_ports), _ports);
 62         if (pptr == NULL)
 63                 return NULL;
 64 
 65         if (likely(!inverse)) {
 66                 cp = ip_vs_conn_out_get(iph->protocol,
 67                                         iph->saddr, pptr[0],
 68                                         iph->daddr, pptr[1]);
 69         } else {
 70                 cp = ip_vs_conn_out_get(iph->protocol,
 71                                         iph->daddr, pptr[1],
 72                                         iph->saddr, pptr[0]);
 73         }
 74 
 75         return cp;
 76 }
 77 
 78 
 79 static int
 80 udp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp,
 81                   int *verdict, struct ip_vs_conn **cpp)
 82 {
 83         struct ip_vs_service *svc;
 84         struct udphdr _udph, *uh;
 85 
 86         uh = skb_header_pointer(skb, ip_hdrlen(skb),
 87                                 sizeof(_udph), &_udph);
 88         if (uh == NULL) {
 89                 *verdict = NF_DROP;
 90                 return 0;
 91         }
 92 
 93         if ((svc = ip_vs_service_get(skb->mark, ip_hdr(skb)->protocol,
 94                                      ip_hdr(skb)->daddr, uh->dest))) {
 95                 if (ip_vs_todrop()) {
 96                         /*
 97                          * It seems that we are very loaded.
 98                          * We have to drop this packet :(
 99                          */
100                         ip_vs_service_put(svc);
101                         *verdict = NF_DROP;
102                         return 0;
103                 }
104 
105                 /*
106                  * Let the virtual server select a real server for the
107                  * incoming connection, and create a connection entry.
108                  */
109                 *cpp = ip_vs_schedule(svc, skb);
110                 if (!*cpp) {
111                         *verdict = ip_vs_leave(svc, skb, pp);
112                         return 0;
113                 }
114                 ip_vs_service_put(svc);
115         }
116         return 1;
117 }
118 
119 
120 static inline void
121 udp_fast_csum_update(struct udphdr *uhdr, __be32 oldip, __be32 newip,
122                      __be16 oldport, __be16 newport)
123 {
124         uhdr->check =
125                 csum_fold(ip_vs_check_diff4(oldip, newip,
126                                  ip_vs_check_diff2(oldport, newport,
127                                         ~csum_unfold(uhdr->check))));
128         if (!uhdr->check)
129                 uhdr->check = CSUM_MANGLED_0;
130 }
131 
132 static int
133 udp_snat_handler(struct sk_buff *skb,
134                  struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
135 {
136         struct udphdr *udph;
137         const unsigned int udphoff = ip_hdrlen(skb);
138 
139         /* csum_check requires unshared skb */
140         if (!skb_make_writable(skb, udphoff+sizeof(*udph)))
141                 return 0;
142 
143         if (unlikely(cp->app != NULL)) {
144                 /* Some checks before mangling */
145                 if (pp->csum_check && !pp->csum_check(skb, pp))
146                         return 0;
147 
148                 /*
149                  *      Call application helper if needed
150                  */
151                 if (!ip_vs_app_pkt_out(cp, skb))
152                         return 0;
153         }
154 
155         udph = (void *)ip_hdr(skb) + udphoff;
156         udph->source = cp->vport;
157 
158         /*
159          *      Adjust UDP checksums
160          */
161         if (!cp->app && (udph->check != 0)) {
162                 /* Only port and addr are changed, do fast csum update */
163                 udp_fast_csum_update(udph, cp->daddr, cp->vaddr,
164                                      cp->dport, cp->vport);
165                 if (skb->ip_summed == CHECKSUM_COMPLETE)
166                         skb->ip_summed = CHECKSUM_NONE;
167         } else {
168                 /* full checksum calculation */
169                 udph->check = 0;
170                 skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0);
171                 udph->check = csum_tcpudp_magic(cp->vaddr, cp->caddr,
172                                                 skb->len - udphoff,
173                                                 cp->protocol, skb->csum);
174                 if (udph->check == 0)
175                         udph->check = CSUM_MANGLED_0;
176                 IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
177                           pp->name, udph->check,
178                           (char*)&(udph->check) - (char*)udph);
179         }
180         return 1;
181 }
182 
183 
184 static int
185 udp_dnat_handler(struct sk_buff *skb,
186                  struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
187 {
188         struct udphdr *udph;
189         unsigned int udphoff = ip_hdrlen(skb);
190 
191         /* csum_check requires unshared skb */
192         if (!skb_make_writable(skb, udphoff+sizeof(*udph)))
193                 return 0;
194 
195         if (unlikely(cp->app != NULL)) {
196                 /* Some checks before mangling */
197                 if (pp->csum_check && !pp->csum_check(skb, pp))
198                         return 0;
199 
200                 /*
201                  *      Attempt ip_vs_app call.
202                  *      It will fix ip_vs_conn
203                  */
204                 if (!ip_vs_app_pkt_in(cp, skb))
205                         return 0;
206         }
207 
208         udph = (void *)ip_hdr(skb) + udphoff;
209         udph->dest = cp->dport;
210 
211         /*
212          *      Adjust UDP checksums
213          */
214         if (!cp->app && (udph->check != 0)) {
215                 /* Only port and addr are changed, do fast csum update */
216                 udp_fast_csum_update(udph, cp->vaddr, cp->daddr,
217                                      cp->vport, cp->dport);
218                 if (skb->ip_summed == CHECKSUM_COMPLETE)
219                         skb->ip_summed = CHECKSUM_NONE;
220         } else {
221                 /* full checksum calculation */
222                 udph->check = 0;
223                 skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0);
224                 udph->check = csum_tcpudp_magic(cp->caddr, cp->daddr,
225                                                 skb->len - udphoff,
226                                                 cp->protocol, skb->csum);
227                 if (udph->check == 0)
228                         udph->check = CSUM_MANGLED_0;
229                 skb->ip_summed = CHECKSUM_UNNECESSARY;
230         }
231         return 1;
232 }
233 
234 
235 static int
236 udp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
237 {
238         struct udphdr _udph, *uh;
239         const unsigned int udphoff = ip_hdrlen(skb);
240 
241         uh = skb_header_pointer(skb, udphoff, sizeof(_udph), &_udph);
242         if (uh == NULL)
243                 return 0;
244 
245         if (uh->check != 0) {
246                 switch (skb->ip_summed) {
247                 case CHECKSUM_NONE:
248                         skb->csum = skb_checksum(skb, udphoff,
249                                                  skb->len - udphoff, 0);
250                 case CHECKSUM_COMPLETE:
251                         if (csum_tcpudp_magic(ip_hdr(skb)->saddr,
252                                               ip_hdr(skb)->daddr,
253                                               skb->len - udphoff,
254                                               ip_hdr(skb)->protocol,
255                                               skb->csum)) {
256                                 IP_VS_DBG_RL_PKT(0, pp, skb, 0,
257                                                  "Failed checksum for");
258                                 return 0;
259                         }
260                         break;
261                 default:
262                         /* No need to checksum. */
263                         break;
264                 }
265         }
266         return 1;
267 }
268 
269 
270 /*
271  *      Note: the caller guarantees that only one of register_app,
272  *      unregister_app or app_conn_bind is called each time.
273  */
274 
275 #define UDP_APP_TAB_BITS        4
276 #define UDP_APP_TAB_SIZE        (1 << UDP_APP_TAB_BITS)
277 #define UDP_APP_TAB_MASK        (UDP_APP_TAB_SIZE - 1)
278 
279 static struct list_head udp_apps[UDP_APP_TAB_SIZE];
280 static DEFINE_SPINLOCK(udp_app_lock);
281 
282 static inline __u16 udp_app_hashkey(__be16 port)
283 {
284         return (((__force u16)port >> UDP_APP_TAB_BITS) ^ (__force u16)port)
285                 & UDP_APP_TAB_MASK;
286 }
287 
288 
289 static int udp_register_app(struct ip_vs_app *inc)
290 {
291         struct ip_vs_app *i;
292         __u16 hash;
293         __be16 port = inc->port;
294         int ret = 0;
295 
296         hash = udp_app_hashkey(port);
297 
298 
299         spin_lock_bh(&udp_app_lock);
300         list_for_each_entry(i, &udp_apps[hash], p_list) {
301                 if (i->port == port) {
302                         ret = -EEXIST;
303                         goto out;
304                 }
305         }
306         list_add(&inc->p_list, &udp_apps[hash]);
307         atomic_inc(&ip_vs_protocol_udp.appcnt);
308 
309   out:
310         spin_unlock_bh(&udp_app_lock);
311         return ret;
312 }
313 
314 
315 static void
316 udp_unregister_app(struct ip_vs_app *inc)
317 {
318         spin_lock_bh(&udp_app_lock);
319         atomic_dec(&ip_vs_protocol_udp.appcnt);
320         list_del(&inc->p_list);
321         spin_unlock_bh(&udp_app_lock);
322 }
323 
324 
325 static int udp_app_conn_bind(struct ip_vs_conn *cp)
326 {
327         int hash;
328         struct ip_vs_app *inc;
329         int result = 0;
330 
331         /* Default binding: bind app only for NAT */
332         if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
333                 return 0;
334 
335         /* Lookup application incarnations and bind the right one */
336         hash = udp_app_hashkey(cp->vport);
337 
338         spin_lock(&udp_app_lock);
339         list_for_each_entry(inc, &udp_apps[hash], p_list) {
340                 if (inc->port == cp->vport) {
341                         if (unlikely(!ip_vs_app_inc_get(inc)))
342                                 break;
343                         spin_unlock(&udp_app_lock);
344 
345                         IP_VS_DBG(9, "%s: Binding conn %u.%u.%u.%u:%u->"
346                                   "%u.%u.%u.%u:%u to app %s on port %u\n",
347                                   __FUNCTION__,
348                                   NIPQUAD(cp->caddr), ntohs(cp->cport),
349                                   NIPQUAD(cp->vaddr), ntohs(cp->vport),
350                                   inc->name, ntohs(inc->port));
351                         cp->app = inc;
352                         if (inc->init_conn)
353                                 result = inc->init_conn(inc, cp);
354                         goto out;
355                 }
356         }
357         spin_unlock(&udp_app_lock);
358 
359   out:
360         return result;
361 }
362 
363 
364 static int udp_timeouts[IP_VS_UDP_S_LAST+1] = {
365         [IP_VS_UDP_S_NORMAL]            =       5*60*HZ,
366         [IP_VS_UDP_S_LAST]              =       2*HZ,
367 };
368 
369 static char * udp_state_name_table[IP_VS_UDP_S_LAST+1] = {
370         [IP_VS_UDP_S_NORMAL]            =       "UDP",
371         [IP_VS_UDP_S_LAST]              =       "BUG!",
372 };
373 
374 
375 static int
376 udp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
377 {
378         return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_UDP_S_LAST,
379                                        udp_state_name_table, sname, to);
380 }
381 
382 static const char * udp_state_name(int state)
383 {
384         if (state >= IP_VS_UDP_S_LAST)
385                 return "ERR!";
386         return udp_state_name_table[state] ? udp_state_name_table[state] : "?";
387 }
388 
389 static int
390 udp_state_transition(struct ip_vs_conn *cp, int direction,
391                      const struct sk_buff *skb,
392                      struct ip_vs_protocol *pp)
393 {
394         cp->timeout = pp->timeout_table[IP_VS_UDP_S_NORMAL];
395         return 1;
396 }
397 
398 static void udp_init(struct ip_vs_protocol *pp)
399 {
400         IP_VS_INIT_HASH_TABLE(udp_apps);
401         pp->timeout_table = udp_timeouts;
402 }
403 
404 static void udp_exit(struct ip_vs_protocol *pp)
405 {
406 }
407 
408 
409 struct ip_vs_protocol ip_vs_protocol_udp = {
410         .name =                 "UDP",
411         .protocol =             IPPROTO_UDP,
412         .dont_defrag =          0,
413         .init =                 udp_init,
414         .exit =                 udp_exit,
415         .conn_schedule =        udp_conn_schedule,
416         .conn_in_get =          udp_conn_in_get,
417         .conn_out_get =         udp_conn_out_get,
418         .snat_handler =         udp_snat_handler,
419         .dnat_handler =         udp_dnat_handler,
420         .csum_check =           udp_csum_check,
421         .state_transition =     udp_state_transition,
422         .state_name =           udp_state_name,
423         .register_app =         udp_register_app,
424         .unregister_app =       udp_unregister_app,
425         .app_conn_bind =        udp_app_conn_bind,
426         .debug_packet =         ip_vs_tcpudp_debug_packet,
427         .timeout_change =       NULL,
428         .set_state_timeout =    udp_set_state_timeout,
429 };
430 
  This page was automatically generated by the LXR engine.