Linux kernel & device driver programming

Cross-Referenced Linux and Device Driver Code

[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ]
Version: [ 2.6.11.8 ] [ 2.6.25 ] [ 2.6.25.8 ] [ 2.6.31.13 ] Architecture: [ i386 ]
  1 /*
  2  * Linux Socket Filter - Kernel level socket filtering
  3  *
  4  * Author:
  5  *     Jay Schulist <jschlst@samba.org>
  6  *
  7  * Based on the design of:
  8  *     - The Berkeley Packet Filter
  9  *
 10  * This program is free software; you can redistribute it and/or
 11  * modify it under the terms of the GNU General Public License
 12  * as published by the Free Software Foundation; either version
 13  * 2 of the License, or (at your option) any later version.
 14  *
 15  * Andi Kleen - Fix a few bad bugs and races.
 16  * Kris Katterjohn - Added many additional checks in sk_chk_filter()
 17  */
 18 
 19 #include <linux/module.h>
 20 #include <linux/types.h>
 21 #include <linux/mm.h>
 22 #include <linux/fcntl.h>
 23 #include <linux/socket.h>
 24 #include <linux/in.h>
 25 #include <linux/inet.h>
 26 #include <linux/netdevice.h>
 27 #include <linux/if_packet.h>
 28 #include <net/ip.h>
 29 #include <net/protocol.h>
 30 #include <net/netlink.h>
 31 #include <linux/skbuff.h>
 32 #include <net/sock.h>
 33 #include <linux/errno.h>
 34 #include <linux/timer.h>
 35 #include <asm/system.h>
 36 #include <asm/uaccess.h>
 37 #include <asm/unaligned.h>
 38 #include <linux/filter.h>
 39 
 40 /* No hurry in this branch */
 41 static void *__load_pointer(struct sk_buff *skb, int k)
 42 {
 43         u8 *ptr = NULL;
 44 
 45         if (k >= SKF_NET_OFF)
 46                 ptr = skb_network_header(skb) + k - SKF_NET_OFF;
 47         else if (k >= SKF_LL_OFF)
 48                 ptr = skb_mac_header(skb) + k - SKF_LL_OFF;
 49 
 50         if (ptr >= skb->head && ptr < skb_tail_pointer(skb))
 51                 return ptr;
 52         return NULL;
 53 }
 54 
 55 static inline void *load_pointer(struct sk_buff *skb, int k,
 56                                  unsigned int size, void *buffer)
 57 {
 58         if (k >= 0)
 59                 return skb_header_pointer(skb, k, size, buffer);
 60         else {
 61                 if (k >= SKF_AD_OFF)
 62                         return NULL;
 63                 return __load_pointer(skb, k);
 64         }
 65 }
 66 
 67 /**
 68  *      sk_filter - run a packet through a socket filter
 69  *      @sk: sock associated with &sk_buff
 70  *      @skb: buffer to filter
 71  *
 72  * Run the filter code and then cut skb->data to correct size returned by
 73  * sk_run_filter. If pkt_len is 0 we toss packet. If skb->len is smaller
 74  * than pkt_len we keep whole skb->data. This is the socket level
 75  * wrapper to sk_run_filter. It returns 0 if the packet should
 76  * be accepted or -EPERM if the packet should be tossed.
 77  *
 78  */
 79 int sk_filter(struct sock *sk, struct sk_buff *skb)
 80 {
 81         int err;
 82         struct sk_filter *filter;
 83 
 84         err = security_sock_rcv_skb(sk, skb);
 85         if (err)
 86                 return err;
 87 
 88         rcu_read_lock_bh();
 89         filter = rcu_dereference(sk->sk_filter);
 90         if (filter) {
 91                 unsigned int pkt_len = sk_run_filter(skb, filter->insns,
 92                                 filter->len);
 93                 err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
 94         }
 95         rcu_read_unlock_bh();
 96 
 97         return err;
 98 }
 99 EXPORT_SYMBOL(sk_filter);
100 
101 /**
102  *      sk_run_filter - run a filter on a socket
103  *      @skb: buffer to run the filter on
104  *      @filter: filter to apply
105  *      @flen: length of filter
106  *
107  * Decode and apply filter instructions to the skb->data.
108  * Return length to keep, 0 for none. skb is the data we are
109  * filtering, filter is the array of filter instructions, and
110  * len is the number of filter blocks in the array.
111  */
112 unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen)
113 {
114         struct sock_filter *fentry;     /* We walk down these */
115         void *ptr;
116         u32 A = 0;                      /* Accumulator */
117         u32 X = 0;                      /* Index Register */
118         u32 mem[BPF_MEMWORDS];          /* Scratch Memory Store */
119         u32 tmp;
120         int k;
121         int pc;
122 
123         /*
124          * Process array of filter instructions.
125          */
126         for (pc = 0; pc < flen; pc++) {
127                 fentry = &filter[pc];
128 
129                 switch (fentry->code) {
130                 case BPF_ALU|BPF_ADD|BPF_X:
131                         A += X;
132                         continue;
133                 case BPF_ALU|BPF_ADD|BPF_K:
134                         A += fentry->k;
135                         continue;
136                 case BPF_ALU|BPF_SUB|BPF_X:
137                         A -= X;
138                         continue;
139                 case BPF_ALU|BPF_SUB|BPF_K:
140                         A -= fentry->k;
141                         continue;
142                 case BPF_ALU|BPF_MUL|BPF_X:
143                         A *= X;
144                         continue;
145                 case BPF_ALU|BPF_MUL|BPF_K:
146                         A *= fentry->k;
147                         continue;
148                 case BPF_ALU|BPF_DIV|BPF_X:
149                         if (X == 0)
150                                 return 0;
151                         A /= X;
152                         continue;
153                 case BPF_ALU|BPF_DIV|BPF_K:
154                         A /= fentry->k;
155                         continue;
156                 case BPF_ALU|BPF_AND|BPF_X:
157                         A &= X;
158                         continue;
159                 case BPF_ALU|BPF_AND|BPF_K:
160                         A &= fentry->k;
161                         continue;
162                 case BPF_ALU|BPF_OR|BPF_X:
163                         A |= X;
164                         continue;
165                 case BPF_ALU|BPF_OR|BPF_K:
166                         A |= fentry->k;
167                         continue;
168                 case BPF_ALU|BPF_LSH|BPF_X:
169                         A <<= X;
170                         continue;
171                 case BPF_ALU|BPF_LSH|BPF_K:
172                         A <<= fentry->k;
173                         continue;
174                 case BPF_ALU|BPF_RSH|BPF_X:
175                         A >>= X;
176                         continue;
177                 case BPF_ALU|BPF_RSH|BPF_K:
178                         A >>= fentry->k;
179                         continue;
180                 case BPF_ALU|BPF_NEG:
181                         A = -A;
182                         continue;
183                 case BPF_JMP|BPF_JA:
184                         pc += fentry->k;
185                         continue;
186                 case BPF_JMP|BPF_JGT|BPF_K:
187                         pc += (A > fentry->k) ? fentry->jt : fentry->jf;
188                         continue;
189                 case BPF_JMP|BPF_JGE|BPF_K:
190                         pc += (A >= fentry->k) ? fentry->jt : fentry->jf;
191                         continue;
192                 case BPF_JMP|BPF_JEQ|BPF_K:
193                         pc += (A == fentry->k) ? fentry->jt : fentry->jf;
194                         continue;
195                 case BPF_JMP|BPF_JSET|BPF_K:
196                         pc += (A & fentry->k) ? fentry->jt : fentry->jf;
197                         continue;
198                 case BPF_JMP|BPF_JGT|BPF_X:
199                         pc += (A > X) ? fentry->jt : fentry->jf;
200                         continue;
201                 case BPF_JMP|BPF_JGE|BPF_X:
202                         pc += (A >= X) ? fentry->jt : fentry->jf;
203                         continue;
204                 case BPF_JMP|BPF_JEQ|BPF_X:
205                         pc += (A == X) ? fentry->jt : fentry->jf;
206                         continue;
207                 case BPF_JMP|BPF_JSET|BPF_X:
208                         pc += (A & X) ? fentry->jt : fentry->jf;
209                         continue;
210                 case BPF_LD|BPF_W|BPF_ABS:
211                         k = fentry->k;
212 load_w:
213                         ptr = load_pointer(skb, k, 4, &tmp);
214                         if (ptr != NULL) {
215                                 A = get_unaligned_be32(ptr);
216                                 continue;
217                         }
218                         break;
219                 case BPF_LD|BPF_H|BPF_ABS:
220                         k = fentry->k;
221 load_h:
222                         ptr = load_pointer(skb, k, 2, &tmp);
223                         if (ptr != NULL) {
224                                 A = get_unaligned_be16(ptr);
225                                 continue;
226                         }
227                         break;
228                 case BPF_LD|BPF_B|BPF_ABS:
229                         k = fentry->k;
230 load_b:
231                         ptr = load_pointer(skb, k, 1, &tmp);
232                         if (ptr != NULL) {
233                                 A = *(u8 *)ptr;
234                                 continue;
235                         }
236                         break;
237                 case BPF_LD|BPF_W|BPF_LEN:
238                         A = skb->len;
239                         continue;
240                 case BPF_LDX|BPF_W|BPF_LEN:
241                         X = skb->len;
242                         continue;
243                 case BPF_LD|BPF_W|BPF_IND:
244                         k = X + fentry->k;
245                         goto load_w;
246                 case BPF_LD|BPF_H|BPF_IND:
247                         k = X + fentry->k;
248                         goto load_h;
249                 case BPF_LD|BPF_B|BPF_IND:
250                         k = X + fentry->k;
251                         goto load_b;
252                 case BPF_LDX|BPF_B|BPF_MSH:
253                         ptr = load_pointer(skb, fentry->k, 1, &tmp);
254                         if (ptr != NULL) {
255                                 X = (*(u8 *)ptr & 0xf) << 2;
256                                 continue;
257                         }
258                         return 0;
259                 case BPF_LD|BPF_IMM:
260                         A = fentry->k;
261                         continue;
262                 case BPF_LDX|BPF_IMM:
263                         X = fentry->k;
264                         continue;
265                 case BPF_LD|BPF_MEM:
266                         A = mem[fentry->k];
267                         continue;
268                 case BPF_LDX|BPF_MEM:
269                         X = mem[fentry->k];
270                         continue;
271                 case BPF_MISC|BPF_TAX:
272                         X = A;
273                         continue;
274                 case BPF_MISC|BPF_TXA:
275                         A = X;
276                         continue;
277                 case BPF_RET|BPF_K:
278                         return fentry->k;
279                 case BPF_RET|BPF_A:
280                         return A;
281                 case BPF_ST:
282                         mem[fentry->k] = A;
283                         continue;
284                 case BPF_STX:
285                         mem[fentry->k] = X;
286                         continue;
287                 default:
288                         WARN_ON(1);
289                         return 0;
290                 }
291 
292                 /*
293                  * Handle ancillary data, which are impossible
294                  * (or very difficult) to get parsing packet contents.
295                  */
296                 switch (k-SKF_AD_OFF) {
297                 case SKF_AD_PROTOCOL:
298                         A = ntohs(skb->protocol);
299                         continue;
300                 case SKF_AD_PKTTYPE:
301                         A = skb->pkt_type;
302                         continue;
303                 case SKF_AD_IFINDEX:
304                         A = skb->dev->ifindex;
305                         continue;
306                 case SKF_AD_NLATTR: {
307                         struct nlattr *nla;
308 
309                         if (skb_is_nonlinear(skb))
310                                 return 0;
311                         if (A > skb->len - sizeof(struct nlattr))
312                                 return 0;
313 
314                         nla = nla_find((struct nlattr *)&skb->data[A],
315                                        skb->len - A, X);
316                         if (nla)
317                                 A = (void *)nla - (void *)skb->data;
318                         else
319                                 A = 0;
320                         continue;
321                 }
322                 case SKF_AD_NLATTR_NEST: {
323                         struct nlattr *nla;
324 
325                         if (skb_is_nonlinear(skb))
326                                 return 0;
327                         if (A > skb->len - sizeof(struct nlattr))
328                                 return 0;
329 
330                         nla = (struct nlattr *)&skb->data[A];
331                         if (nla->nla_len > A - skb->len)
332                                 return 0;
333 
334                         nla = nla_find_nested(nla, X);
335                         if (nla)
336                                 A = (void *)nla - (void *)skb->data;
337                         else
338                                 A = 0;
339                         continue;
340                 }
341                 default:
342                         return 0;
343                 }
344         }
345 
346         return 0;
347 }
348 EXPORT_SYMBOL(sk_run_filter);
349 
350 /**
351  *      sk_chk_filter - verify socket filter code
352  *      @filter: filter to verify
353  *      @flen: length of filter
354  *
355  * Check the user's filter code. If we let some ugly
356  * filter code slip through kaboom! The filter must contain
357  * no references or jumps that are out of range, no illegal
358  * instructions, and must end with a RET instruction.
359  *
360  * All jumps are forward as they are not signed.
361  *
362  * Returns 0 if the rule set is legal or -EINVAL if not.
363  */
364 int sk_chk_filter(struct sock_filter *filter, int flen)
365 {
366         struct sock_filter *ftest;
367         int pc;
368 
369         if (flen == 0 || flen > BPF_MAXINSNS)
370                 return -EINVAL;
371 
372         /* check the filter code now */
373         for (pc = 0; pc < flen; pc++) {
374                 ftest = &filter[pc];
375 
376                 /* Only allow valid instructions */
377                 switch (ftest->code) {
378                 case BPF_ALU|BPF_ADD|BPF_K:
379                 case BPF_ALU|BPF_ADD|BPF_X:
380                 case BPF_ALU|BPF_SUB|BPF_K:
381                 case BPF_ALU|BPF_SUB|BPF_X:
382                 case BPF_ALU|BPF_MUL|BPF_K:
383                 case BPF_ALU|BPF_MUL|BPF_X:
384                 case BPF_ALU|BPF_DIV|BPF_X:
385                 case BPF_ALU|BPF_AND|BPF_K:
386                 case BPF_ALU|BPF_AND|BPF_X:
387                 case BPF_ALU|BPF_OR|BPF_K:
388                 case BPF_ALU|BPF_OR|BPF_X:
389                 case BPF_ALU|BPF_LSH|BPF_K:
390                 case BPF_ALU|BPF_LSH|BPF_X:
391                 case BPF_ALU|BPF_RSH|BPF_K:
392                 case BPF_ALU|BPF_RSH|BPF_X:
393                 case BPF_ALU|BPF_NEG:
394                 case BPF_LD|BPF_W|BPF_ABS:
395                 case BPF_LD|BPF_H|BPF_ABS:
396                 case BPF_LD|BPF_B|BPF_ABS:
397                 case BPF_LD|BPF_W|BPF_LEN:
398                 case BPF_LD|BPF_W|BPF_IND:
399                 case BPF_LD|BPF_H|BPF_IND:
400                 case BPF_LD|BPF_B|BPF_IND:
401                 case BPF_LD|BPF_IMM:
402                 case BPF_LDX|BPF_W|BPF_LEN:
403                 case BPF_LDX|BPF_B|BPF_MSH:
404                 case BPF_LDX|BPF_IMM:
405                 case BPF_MISC|BPF_TAX:
406                 case BPF_MISC|BPF_TXA:
407                 case BPF_RET|BPF_K:
408                 case BPF_RET|BPF_A:
409                         break;
410 
411                 /* Some instructions need special checks */
412 
413                 case BPF_ALU|BPF_DIV|BPF_K:
414                         /* check for division by zero */
415                         if (ftest->k == 0)
416                                 return -EINVAL;
417                         break;
418 
419                 case BPF_LD|BPF_MEM:
420                 case BPF_LDX|BPF_MEM:
421                 case BPF_ST:
422                 case BPF_STX:
423                         /* check for invalid memory addresses */
424                         if (ftest->k >= BPF_MEMWORDS)
425                                 return -EINVAL;
426                         break;
427 
428                 case BPF_JMP|BPF_JA:
429                         /*
430                          * Note, the large ftest->k might cause loops.
431                          * Compare this with conditional jumps below,
432                          * where offsets are limited. --ANK (981016)
433                          */
434                         if (ftest->k >= (unsigned)(flen-pc-1))
435                                 return -EINVAL;
436                         break;
437 
438                 case BPF_JMP|BPF_JEQ|BPF_K:
439                 case BPF_JMP|BPF_JEQ|BPF_X:
440                 case BPF_JMP|BPF_JGE|BPF_K:
441                 case BPF_JMP|BPF_JGE|BPF_X:
442                 case BPF_JMP|BPF_JGT|BPF_K:
443                 case BPF_JMP|BPF_JGT|BPF_X:
444                 case BPF_JMP|BPF_JSET|BPF_K:
445                 case BPF_JMP|BPF_JSET|BPF_X:
446                         /* for conditionals both must be safe */
447                         if (pc + ftest->jt + 1 >= flen ||
448                             pc + ftest->jf + 1 >= flen)
449                                 return -EINVAL;
450                         break;
451 
452                 default:
453                         return -EINVAL;
454                 }
455         }
456 
457         return (BPF_CLASS(filter[flen - 1].code) == BPF_RET) ? 0 : -EINVAL;
458 }
459 EXPORT_SYMBOL(sk_chk_filter);
460 
461 /**
462  *      sk_filter_rcu_release: Release a socket filter by rcu_head
463  *      @rcu: rcu_head that contains the sk_filter to free
464  */
465 static void sk_filter_rcu_release(struct rcu_head *rcu)
466 {
467         struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);
468 
469         sk_filter_release(fp);
470 }
471 
472 static void sk_filter_delayed_uncharge(struct sock *sk, struct sk_filter *fp)
473 {
474         unsigned int size = sk_filter_len(fp);
475 
476         atomic_sub(size, &sk->sk_omem_alloc);
477         call_rcu_bh(&fp->rcu, sk_filter_rcu_release);
478 }
479 
480 /**
481  *      sk_attach_filter - attach a socket filter
482  *      @fprog: the filter program
483  *      @sk: the socket to use
484  *
485  * Attach the user's filter code. We first run some sanity checks on
486  * it to make sure it does not explode on us later. If an error
487  * occurs or there is insufficient memory for the filter a negative
488  * errno code is returned. On success the return is zero.
489  */
490 int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
491 {
492         struct sk_filter *fp, *old_fp;
493         unsigned int fsize = sizeof(struct sock_filter) * fprog->len;
494         int err;
495 
496         /* Make sure new filter is there and in the right amounts. */
497         if (fprog->filter == NULL)
498                 return -EINVAL;
499 
500         fp = sock_kmalloc(sk, fsize+sizeof(*fp), GFP_KERNEL);
501         if (!fp)
502                 return -ENOMEM;
503         if (copy_from_user(fp->insns, fprog->filter, fsize)) {
504                 sock_kfree_s(sk, fp, fsize+sizeof(*fp));
505                 return -EFAULT;
506         }
507 
508         atomic_set(&fp->refcnt, 1);
509         fp->len = fprog->len;
510 
511         err = sk_chk_filter(fp->insns, fp->len);
512         if (err) {
513                 sk_filter_uncharge(sk, fp);
514                 return err;
515         }
516 
517         rcu_read_lock_bh();
518         old_fp = rcu_dereference(sk->sk_filter);
519         rcu_assign_pointer(sk->sk_filter, fp);
520         rcu_read_unlock_bh();
521 
522         if (old_fp)
523                 sk_filter_delayed_uncharge(sk, old_fp);
524         return 0;
525 }
526 
527 int sk_detach_filter(struct sock *sk)
528 {
529         int ret = -ENOENT;
530         struct sk_filter *filter;
531 
532         rcu_read_lock_bh();
533         filter = rcu_dereference(sk->sk_filter);
534         if (filter) {
535                 rcu_assign_pointer(sk->sk_filter, NULL);
536                 sk_filter_delayed_uncharge(sk, filter);
537                 ret = 0;
538         }
539         rcu_read_unlock_bh();
540         return ret;
541 }
542 
  This page was automatically generated by the LXR engine.