Linux kernel & device driver programming

Cross-Referenced Linux and Device Driver Code

[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ]
Version: [ 2.6.11.8 ] [ 2.6.25 ] [ 2.6.25.8 ] [ 2.6.31.13 ] Architecture: [ i386 ]
  1 /*
  2  * Copyright (C) 2001 Momchil Velikov
  3  * Portions Copyright (C) 2001 Christoph Hellwig
  4  * Copyright (C) 2005 SGI, Christoph Lameter <clameter@sgi.com>
  5  * Copyright (C) 2006 Nick Piggin
  6  *
  7  * This program is free software; you can redistribute it and/or
  8  * modify it under the terms of the GNU General Public License as
  9  * published by the Free Software Foundation; either version 2, or (at
 10  * your option) any later version.
 11  *
 12  * This program is distributed in the hope that it will be useful, but
 13  * WITHOUT ANY WARRANTY; without even the implied warranty of
 14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 15  * General Public License for more details.
 16  *
 17  * You should have received a copy of the GNU General Public License
 18  * along with this program; if not, write to the Free Software
 19  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 20  */
 21 
 22 #include <linux/errno.h>
 23 #include <linux/init.h>
 24 #include <linux/kernel.h>
 25 #include <linux/module.h>
 26 #include <linux/radix-tree.h>
 27 #include <linux/percpu.h>
 28 #include <linux/slab.h>
 29 #include <linux/notifier.h>
 30 #include <linux/cpu.h>
 31 #include <linux/gfp.h>
 32 #include <linux/string.h>
 33 #include <linux/bitops.h>
 34 #include <linux/rcupdate.h>
 35 #include <linux/spinlock.h>
 36 
 37 
 38 #ifdef __KERNEL__
 39 #define RADIX_TREE_MAP_SHIFT    (CONFIG_BASE_SMALL ? 4 : 6)
 40 #else
 41 #define RADIX_TREE_MAP_SHIFT    3       /* For more stressful testing */
 42 #endif
 43 
 44 #define RADIX_TREE_MAP_SIZE     (1UL << RADIX_TREE_MAP_SHIFT)
 45 #define RADIX_TREE_MAP_MASK     (RADIX_TREE_MAP_SIZE-1)
 46 
 47 #define RADIX_TREE_TAG_LONGS    \
 48         ((RADIX_TREE_MAP_SIZE + BITS_PER_LONG - 1) / BITS_PER_LONG)
 49 
 50 struct radix_tree_node {
 51         unsigned int    height;         /* Height from the bottom */
 52         unsigned int    count;
 53         struct rcu_head rcu_head;
 54         void            *slots[RADIX_TREE_MAP_SIZE];
 55         unsigned long   tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS];
 56 #ifdef CONFIG_RADIX_TREE_CONCURRENT
 57         spinlock_t      lock;
 58 #endif
 59 };
 60 
 61 struct radix_tree_path {
 62         struct radix_tree_node *node;
 63         int offset;
 64 #ifdef CONFIG_RADIX_TREE_CONCURRENT
 65         spinlock_t *locked;
 66 #endif
 67 };
 68 
 69 #define RADIX_TREE_INDEX_BITS  (8 /* CHAR_BIT */ * sizeof(unsigned long))
 70 #define RADIX_TREE_MAX_PATH (DIV_ROUND_UP(RADIX_TREE_INDEX_BITS, \
 71                                           RADIX_TREE_MAP_SHIFT))
 72 
 73 /*
 74  * The height_to_maxindex array needs to be one deeper than the maximum
 75  * path as height 0 holds only 1 entry.
 76  */
 77 static unsigned long height_to_maxindex[RADIX_TREE_MAX_PATH + 1] __read_mostly;
 78 
 79 #ifdef CONFIG_RADIX_TREE_CONCURRENT
 80 static struct lock_class_key radix_node_class[RADIX_TREE_MAX_PATH + 1];
 81 #endif
 82 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 83 static const char *radix_node_key_string[RADIX_TREE_MAX_PATH + 1] = {
 84         "radix-node-00",
 85         "radix-node-01",
 86         "radix-node-02",
 87         "radix-node-03",
 88         "radix-node-04",
 89         "radix-node-05",
 90         "radix-node-06",
 91         "radix-node-07",
 92         "radix-node-08",
 93         "radix-node-09",
 94         "radix-node-10",
 95         "radix-node-11",
 96         "radix-node-12",
 97         "radix-node-13",
 98         "radix-node-14",
 99         "radix-node-15",
100 };
101 #endif
102 
103 #ifdef CONFIG_RADIX_TREE_OPTIMISTIC
104 static DEFINE_PER_CPU(unsigned long[RADIX_TREE_MAX_PATH+1], optimistic_histogram);
105 
106 static void optimistic_hit(unsigned long height)
107 {
108         unsigned long flags;
109 
110         if (height > RADIX_TREE_MAX_PATH)
111                 height = RADIX_TREE_MAX_PATH;
112 
113         /*
114          * HACK:
115          *  On PREEMPT_RT this protects the percpu var.
116          *  We really need to fix this to pass in cpu var protected
117          *  with a RT sleeping spinlock.
118          */
119         local_irq_save(flags);
120         __get_cpu_var(optimistic_histogram)[height]++;
121         local_irq_restore(flags);
122 }
123 
124 #ifdef CONFIG_PROC_FS
125 
126 #include <linux/seq_file.h>
127 #include <linux/uaccess.h>
128 
129 static void *frag_start(struct seq_file *m, loff_t *pos)
130 {
131         if (*pos < 0 || *pos > RADIX_TREE_MAX_PATH)
132                 return NULL;
133 
134         m->private = (void *)(unsigned long)*pos;
135         return pos;
136 }
137 
138 static void *frag_next(struct seq_file *m, void *arg, loff_t *pos)
139 {
140         if (*pos < RADIX_TREE_MAX_PATH) {
141                 (*pos)++;
142                 (*((unsigned long *)&m->private))++;
143                 return pos;
144         }
145         return NULL;
146 }
147 
148 static void frag_stop(struct seq_file *m, void *arg)
149 {
150 }
151 
152 unsigned long get_optimistic_stat(unsigned long index)
153 {
154         unsigned long total = 0;
155         int cpu;
156 
157         for_each_possible_cpu(cpu) {
158                 total += per_cpu(optimistic_histogram, cpu)[index];
159         }
160         return total;
161 }
162 
163 static int frag_show(struct seq_file *m, void *arg)
164 {
165         unsigned long index = (unsigned long)m->private;
166         unsigned long hits = get_optimistic_stat(index);
167 
168         if (index == 0)
169                 seq_printf(m, "levels skipped\thits\n");
170 
171         if (index < RADIX_TREE_MAX_PATH)
172                 seq_printf(m, "%9lu\t%9lu\n", index, hits);
173         else
174                 seq_printf(m, "failed\t%9lu\n", hits);
175 
176         return 0;
177 }
178 
179 struct seq_operations optimistic_op = {
180         .start = frag_start,
181         .next = frag_next,
182         .stop = frag_stop,
183         .show = frag_show,
184 };
185 
186 static void optimistic_reset(void)
187 {
188         int cpu;
189         int height;
190         for_each_possible_cpu(cpu) {
191                 for (height = 0; height <= RADIX_TREE_MAX_PATH; height++)
192                         per_cpu(optimistic_histogram, cpu)[height] = 0;
193         }
194 }
195 
196 ssize_t optimistic_write(struct file *file, const char __user *buf,
197                 size_t count, loff_t *ppos)
198 {
199         if (count) {
200                 char c;
201                 if (get_user(c, buf))
202                         return -EFAULT;
203                 if (c == '')
204                         optimistic_reset();
205         }
206         return count;
207 }
208 
209 #endif // CONFIG_PROC_FS
210 #endif // CONFIG_RADIX_TREE_OPTIMISTIC
211 
212 /*
213  * Radix tree node cache.
214  */
215 static struct kmem_cache *radix_tree_node_cachep;
216 
217 /*
218  * Per-cpu pool of preloaded nodes
219  */
220 struct radix_tree_preload {
221         int nr;
222         struct radix_tree_node *nodes[RADIX_TREE_MAX_PATH];
223 };
224 DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, };
225 
226 static inline gfp_t root_gfp_mask(struct radix_tree_root *root)
227 {
228         return root->gfp_mask & __GFP_BITS_MASK;
229 }
230 
231 /*
232  * This assumes that the caller has performed appropriate preallocation, and
233  * that the caller has pinned this thread of control to the current CPU.
234  */
235 static struct radix_tree_node *
236 radix_tree_node_alloc(struct radix_tree_root *root, int height)
237 {
238         struct radix_tree_node *ret = NULL;
239         gfp_t gfp_mask = root_gfp_mask(root);
240 
241         if (!(gfp_mask & __GFP_WAIT)) {
242                 struct radix_tree_preload *rtp;
243 
244                 /*
245                  * Provided the caller has preloaded here, we will always
246                  * succeed in getting a node here (and never reach
247                  * kmem_cache_alloc)
248                  */
249                 rtp = &get_cpu_var(radix_tree_preloads);
250                 if (rtp->nr) {
251                         ret = rtp->nodes[rtp->nr - 1];
252                         rtp->nodes[rtp->nr - 1] = NULL;
253                         rtp->nr--;
254                 }
255                 put_cpu_var(radix_tree_preloads);
256         }
257         if (ret == NULL)
258                 ret = kmem_cache_alloc(radix_tree_node_cachep,
259                                 set_migrateflags(gfp_mask, __GFP_RECLAIMABLE));
260 
261         BUG_ON(radix_tree_is_indirect_ptr(ret));
262 #ifdef CONFIG_RADIX_TREE_CONCURRENT
263         spin_lock_init(&ret->lock);
264         lockdep_set_class_and_name(&ret->lock,
265                         &radix_node_class[height],
266                         radix_node_key_string[height]);
267 #endif
268         ret->height = height;
269         return ret;
270 }
271 
272 static void radix_tree_node_rcu_free(struct rcu_head *head)
273 {
274         struct radix_tree_node *node =
275                         container_of(head, struct radix_tree_node, rcu_head);
276         kmem_cache_free(radix_tree_node_cachep, node);
277 }
278 
279 static inline void
280 radix_tree_node_free(struct radix_tree_node *node)
281 {
282         call_rcu(&node->rcu_head, radix_tree_node_rcu_free);
283 }
284 
285 #ifndef CONFIG_PREEMPT_RT
286 
287 /*
288  * Load up this CPU's radix_tree_node buffer with sufficient objects to
289  * ensure that the addition of a single element in the tree cannot fail.  On
290  * success, return zero, with preemption disabled.  On error, return -ENOMEM
291  * with preemption not disabled.
292  */
293 int radix_tree_preload(gfp_t gfp_mask)
294 {
295         struct radix_tree_preload *rtp;
296         struct radix_tree_node *node;
297         int ret = -ENOMEM;
298 
299         preempt_disable();
300         rtp = &__get_cpu_var(radix_tree_preloads);
301         while (rtp->nr < ARRAY_SIZE(rtp->nodes)) {
302                 preempt_enable();
303                 node = kmem_cache_alloc(radix_tree_node_cachep,
304                                 set_migrateflags(gfp_mask, __GFP_RECLAIMABLE));
305                 if (node == NULL)
306                         goto out;
307                 preempt_disable();
308                 rtp = &__get_cpu_var(radix_tree_preloads);
309                 if (rtp->nr < ARRAY_SIZE(rtp->nodes))
310                         rtp->nodes[rtp->nr++] = node;
311                 else
312                         kmem_cache_free(radix_tree_node_cachep, node);
313         }
314         ret = 0;
315 out:
316         return ret;
317 }
318 EXPORT_SYMBOL(radix_tree_preload);
319 
320 #endif
321 
322 static inline void tag_set(struct radix_tree_node *node, unsigned int tag,
323                 int offset)
324 {
325         __set_bit(offset, node->tags[tag]);
326 }
327 
328 static inline void tag_clear(struct radix_tree_node *node, unsigned int tag,
329                 int offset)
330 {
331         __clear_bit(offset, node->tags[tag]);
332 }
333 
334 static inline int tag_get(struct radix_tree_node *node, unsigned int tag,
335                 int offset)
336 {
337         return test_bit(offset, node->tags[tag]);
338 }
339 
340 static inline void root_tag_set(struct radix_tree_root *root, unsigned int tag)
341 {
342         root->gfp_mask |= (__force gfp_t)(1 << (tag + __GFP_BITS_SHIFT));
343 }
344 
345 
346 static inline void root_tag_clear(struct radix_tree_root *root, unsigned int tag)
347 {
348         root->gfp_mask &= (__force gfp_t)~(1 << (tag + __GFP_BITS_SHIFT));
349 }
350 
351 static inline void root_tag_clear_all(struct radix_tree_root *root)
352 {
353         root->gfp_mask &= __GFP_BITS_MASK;
354 }
355 
356 static inline int root_tag_get(struct radix_tree_root *root, unsigned int tag)
357 {
358         return (__force unsigned)root->gfp_mask & (1 << (tag + __GFP_BITS_SHIFT));
359 }
360 
361 /*
362  * Returns 1 if any slot in the node has this tag set.
363  * Otherwise returns 0.
364  */
365 static inline int any_tag_set(struct radix_tree_node *node, unsigned int tag)
366 {
367         int idx;
368         for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) {
369                 if (node->tags[tag][idx])
370                         return 1;
371         }
372         return 0;
373 }
374 
375 static inline int any_tag_set_but(struct radix_tree_node *node,
376                 unsigned int tag, int offset)
377 {
378         int idx;
379         int offset_idx = offset / BITS_PER_LONG;
380         unsigned long offset_mask = ~(1UL << (offset % BITS_PER_LONG));
381         for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) {
382                 unsigned long mask = ~0UL;
383                 if (idx == offset_idx)
384                         mask = offset_mask;
385                 if (node->tags[tag][idx] & mask)
386                         return 1;
387         }
388         return 0;
389 }
390 
391 /*
392  *      Return the maximum key which can be store into a
393  *      radix tree with height HEIGHT.
394  */
395 static inline unsigned long radix_tree_maxindex(unsigned int height)
396 {
397         return height_to_maxindex[height];
398 }
399 
400 /*
401  *      Extend a radix tree so it can store key @index.
402  */
403 static int radix_tree_extend(struct radix_tree_root *root, unsigned long index)
404 {
405         struct radix_tree_node *node;
406         unsigned int height;
407         int tag;
408 
409         /* Figure out what the height should be.  */
410         height = root->height + 1;
411         while (index > radix_tree_maxindex(height))
412                 height++;
413 
414         if (root->rnode == NULL) {
415                 root->height = height;
416                 goto out;
417         }
418 
419         do {
420                 unsigned int newheight = root->height + 1;
421                 if (!(node = radix_tree_node_alloc(root, newheight)))
422                         return -ENOMEM;
423 
424                 /* Increase the height.  */
425                 node->slots[0] = radix_tree_indirect_to_ptr(root->rnode);
426 
427                 /* Propagate the aggregated tag info into the new root */
428                 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
429                         if (root_tag_get(root, tag))
430                                 tag_set(node, tag, 0);
431                 }
432 
433                 node->count = 1;
434                 node = radix_tree_ptr_to_indirect(node);
435                 rcu_assign_pointer(root->rnode, node);
436                 root->height = newheight;
437         } while (height > root->height);
438 out:
439         return 0;
440 }
441 
442 #ifdef CONFIG_RADIX_TREE_CONCURRENT
443 static inline struct radix_tree_context *
444 radix_tree_get_context(struct radix_tree_root **rootp)
445 {
446         struct radix_tree_context *context = NULL;
447         unsigned long addr = (unsigned long)*rootp;
448 
449         if (addr & 1) {
450                 context = (struct radix_tree_context *)(addr - 1);
451                 *rootp = context->root;
452         }
453 
454         return context;
455 }
456 
457 #define RADIX_TREE_CONTEXT(context, root) \
458         struct radix_tree_context *context =    \
459                 radix_tree_get_context(&root)
460 
461 static inline spinlock_t *radix_node_lock(struct radix_tree_root *root,
462                 struct radix_tree_node *node)
463 {
464         spinlock_t *locked = &node->lock;
465         spin_lock(locked);
466         return locked;
467 }
468 
469 static inline void radix_ladder_lock(struct radix_tree_context *context,
470                 struct radix_tree_node *node)
471 {
472         if (context) {
473                 struct radix_tree_root *root = context->root;
474                 spinlock_t *locked = radix_node_lock(root, node);
475                 if (locked) {
476                         spin_unlock(context->locked);
477                         context->locked = locked;
478                 }
479         }
480 }
481 
482 static inline void radix_path_init(struct radix_tree_context *context,
483                 struct radix_tree_path *pathp)
484 {
485         pathp->locked = context ? context->locked : NULL;
486 }
487 
488 static inline void radix_path_lock(struct radix_tree_context *context,
489                 struct radix_tree_path *pathp, struct radix_tree_node *node)
490 {
491         if (context) {
492                 struct radix_tree_root *root = context->root;
493                 spinlock_t *locked = radix_node_lock(root, node);
494                 if (locked)
495                         context->locked = locked;
496                 pathp->locked = locked;
497         } else
498                 pathp->locked = NULL;
499 }
500 
501 static inline void radix_path_unlock(struct radix_tree_context *context,
502                 struct radix_tree_path *punlock)
503 {
504         if (context && punlock->locked &&
505                         context->locked != punlock->locked)
506                 spin_unlock(punlock->locked);
507 }
508 #else
509 #define RADIX_TREE_CONTEXT(context, root) do { } while (0)
510 #define radix_ladder_lock(context, node) do { } while (0)
511 #define radix_path_init(context, pathp) do { } while (0)
512 #define radix_path_lock(context, pathp, node) do { } while (0)
513 #define radix_path_unlock(context, punlock) do { } while (0)
514 #endif
515 
516 #ifdef CONFIG_RADIX_TREE_OPTIMISTIC
517 typedef int (*radix_valid_fn)(struct radix_tree_node *, int, int);
518 
519 static struct radix_tree_node *
520 radix_optimistic_lookup(struct radix_tree_context *context, unsigned long index,
521                 int tag, radix_valid_fn valid)
522 {
523         unsigned int height, shift;
524         struct radix_tree_node *node, *ret = NULL, **slot;
525         struct radix_tree_root *root = context->root;
526 
527         node = rcu_dereference(root->rnode);
528         if (node == NULL)
529                 return NULL;
530 
531         if (!radix_tree_is_indirect_ptr(node))
532                         return NULL;
533 
534         node = radix_tree_indirect_to_ptr(node);
535 
536         height = node->height;
537         if (index > radix_tree_maxindex(height))
538                 return NULL;
539 
540         shift = (height-1) * RADIX_TREE_MAP_SHIFT;
541         do {
542                 int offset = (index >> shift) & RADIX_TREE_MAP_MASK;
543                 if ((*valid)(node, offset, tag))
544                         ret = node;
545                 slot = (struct radix_tree_node **)(node->slots + offset);
546                 node = rcu_dereference(*slot);
547                 if (!node)
548                         break;
549 
550                 shift -= RADIX_TREE_MAP_SHIFT;
551                 height--;
552         } while (height > 0);
553 
554         return ret;
555 }
556 
557 static struct radix_tree_node *
558 __radix_optimistic_lock(struct radix_tree_context *context, unsigned long index,
559                 int tag, radix_valid_fn valid)
560 {
561         struct radix_tree_node *node;
562         spinlock_t *locked;
563         unsigned int shift, offset;
564 
565         node = radix_optimistic_lookup(context, index, tag, valid);
566         if (!node)
567                 goto out;
568 
569         locked = radix_node_lock(context->root, node);
570         if (!locked)
571                 goto out;
572 
573 #if 0
574         if (node != radix_optimistic_lookup(context, index, tag, valid))
575                 goto out_unlock;
576 #else
577         /* check if the node got freed */
578         if (!node->count)
579                 goto out_unlock;
580 
581         /* check if the node is still a valid termination point */
582         shift = (node->height - 1) * RADIX_TREE_MAP_SHIFT;
583         offset = (index >> shift) & RADIX_TREE_MAP_MASK;
584         if (!(*valid)(node, offset, tag))
585                 goto out_unlock;
586 #endif
587 
588         context->locked = locked;
589         return node;
590 
591 out_unlock:
592         spin_unlock(locked);
593 out:
594         return NULL;
595 }
596 
597 static struct radix_tree_node *
598 radix_optimistic_lock(struct radix_tree_context *context, unsigned long index,
599                 int tag, radix_valid_fn valid)
600 {
601         struct radix_tree_node *node = NULL;
602 
603         if (context) {
604                 node = __radix_optimistic_lock(context, index, tag, valid);
605                 if (!node) {
606                         BUG_ON(context->locked);
607                         spin_lock(&context->root->lock);
608                         context->locked = &context->root->lock;
609                         optimistic_hit(RADIX_TREE_MAX_PATH);
610                 } else
611                         optimistic_hit(context->root->height - node->height);
612         }
613         return node;
614 }
615 
616 static int radix_valid_always(struct radix_tree_node *node, int offset, int tag)
617 {
618         return 1;
619 }
620 
621 static int radix_valid_tag(struct radix_tree_node *node, int offset, int tag)
622 {
623         return tag_get(node, tag, offset);
624 }
625 #else
626 #define radix_optimistic_lock(context, index, tag, valid) NULL
627 #endif
628 
629 /**
630  *      radix_tree_insert    -    insert into a radix tree
631  *      @root:          radix tree root
632  *      @index:         index key
633  *      @item:          item to insert
634  *
635  *      Insert an item into the radix tree at position @index.
636  */
637 int radix_tree_insert(struct radix_tree_root *root,
638                         unsigned long index, void *item)
639 {
640         struct radix_tree_node *node = NULL, *slot;
641         unsigned int height, shift;
642         int offset;
643         int error;
644         int tag;
645         RADIX_TREE_CONTEXT(context, root);
646 
647         BUG_ON(radix_tree_is_indirect_ptr(item));
648 
649         node = radix_optimistic_lock(context, index, 0, radix_valid_always);
650         if (node) {
651                 height = node->height;
652                 shift = (height-1) * RADIX_TREE_MAP_SHIFT;
653                 goto optimistic;
654         }
655 
656         /* Make sure the tree is high enough.  */
657         if (index > radix_tree_maxindex(root->height)) {
658                 error = radix_tree_extend(root, index);
659                 if (error)
660                         return error;
661         }
662 
663         slot = radix_tree_indirect_to_ptr(root->rnode);
664         height = root->height;
665         shift = (height-1) * RADIX_TREE_MAP_SHIFT;
666 
667         offset = 0;                     /* uninitialised var warning */
668         while (height > 0) {
669                 if (slot == NULL) {
670                         /* Have to add a child node.  */
671                         if (!(slot = radix_tree_node_alloc(root, height)))
672                                 return -ENOMEM;
673                         if (node) {
674                                 rcu_assign_pointer(node->slots[offset], slot);
675                                 node->count++;
676                         } else
677                                 rcu_assign_pointer(root->rnode,
678                                         radix_tree_ptr_to_indirect(slot));
679                 }
680 
681                 /* Go a level down */
682                 node = slot;
683                 radix_ladder_lock(context, node);
684 
685 optimistic:
686                 offset = (index >> shift) & RADIX_TREE_MAP_MASK;
687                 slot = node->slots[offset];
688                 shift -= RADIX_TREE_MAP_SHIFT;
689                 height--;
690         }
691 
692         if (slot != NULL)
693                 return -EEXIST;
694 
695         if (node) {
696                 node->count++;
697                 rcu_assign_pointer(node->slots[offset], item);
698                 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
699                         BUG_ON(tag_get(node, tag, offset));
700         } else {
701                 rcu_assign_pointer(root->rnode, item);
702                 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
703                         BUG_ON(root_tag_get(root, tag));
704         }
705 
706         return 0;
707 }
708 EXPORT_SYMBOL(radix_tree_insert);
709 
710 /**
711  *      radix_tree_lookup_slot    -    lookup a slot in a radix tree
712  *      @root:          radix tree root
713  *      @index:         index key
714  *
715  *      Returns:  the slot corresponding to the position @index in the
716  *      radix tree @root. This is useful for update-if-exists operations.
717  *
718  *      This function can be called under rcu_read_lock iff the slot is not
719  *      modified by radix_tree_replace_slot, otherwise it must be called
720  *      exclusive from other writers. Any dereference of the slot must be done
721  *      using radix_tree_deref_slot.
722  */
723 void **radix_tree_lookup_slot(struct radix_tree_root *root, unsigned long index)
724 {
725         unsigned int height, shift;
726         struct radix_tree_node *node, **slot;
727         RADIX_TREE_CONTEXT(context, root);
728 
729         node = radix_optimistic_lock(context, index, 0, radix_valid_always);
730         if (node)
731                 goto optimistic;
732 
733         node = rcu_dereference(root->rnode);
734         if (node == NULL)
735                 return NULL;
736 
737         if (!radix_tree_is_indirect_ptr(node)) {
738                 if (index > 0)
739                         return NULL;
740                 return (void **)&root->rnode;
741         }
742         node = radix_tree_indirect_to_ptr(node);
743 
744 optimistic:
745         height = node->height;
746         if (index > radix_tree_maxindex(height))
747                 return NULL;
748 
749         shift = (height-1) * RADIX_TREE_MAP_SHIFT;
750 
751         do {
752                 slot = (struct radix_tree_node **)
753                         (node->slots + ((index>>shift) & RADIX_TREE_MAP_MASK));
754                 node = rcu_dereference(*slot);
755                 if (node == NULL)
756                         return NULL;
757 
758                 radix_ladder_lock(context, node);
759 
760                 shift -= RADIX_TREE_MAP_SHIFT;
761                 height--;
762         } while (height > 0);
763 
764         return (void **)slot;
765 }
766 EXPORT_SYMBOL(radix_tree_lookup_slot);
767 
768 /**
769  *      radix_tree_lookup    -    perform lookup operation on a radix tree
770  *      @root:          radix tree root
771  *      @index:         index key
772  *
773  *      Lookup the item at the position @index in the radix tree @root.
774  *
775  *      This function can be called under rcu_read_lock, however the caller
776  *      must manage lifetimes of leaf nodes (eg. RCU may also be used to free
777  *      them safely). No RCU barriers are required to access or modify the
778  *      returned item, however.
779  */
780 void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index)
781 {
782         unsigned int height, shift;
783         struct radix_tree_node *node, **slot;
784 
785         node = rcu_dereference(root->rnode);
786         if (node == NULL)
787                 return NULL;
788 
789         if (!radix_tree_is_indirect_ptr(node)) {
790                 if (index > 0)
791                         return NULL;
792                 return node;
793         }
794         node = radix_tree_indirect_to_ptr(node);
795 
796         height = node->height;
797         if (index > radix_tree_maxindex(height))
798                 return NULL;
799 
800         shift = (height-1) * RADIX_TREE_MAP_SHIFT;
801 
802         do {
803                 slot = (struct radix_tree_node **)
804                         (node->slots + ((index>>shift) & RADIX_TREE_MAP_MASK));
805                 node = rcu_dereference(*slot);
806                 if (node == NULL)
807                         return NULL;
808 
809                 shift -= RADIX_TREE_MAP_SHIFT;
810                 height--;
811         } while (height > 0);
812 
813         return node;
814 }
815 EXPORT_SYMBOL(radix_tree_lookup);
816 
817 /**
818  *      radix_tree_tag_set - set a tag on a radix tree node
819  *      @root:          radix tree root
820  *      @index:         index key
821  *      @tag:           tag index
822  *
823  *      Set the search tag (which must be < RADIX_TREE_MAX_TAGS)
824  *      corresponding to @index in the radix tree.  From
825  *      the root all the way down to the leaf node.
826  *
827  *      Returns the address of the tagged item.   Setting a tag on a not-present
828  *      item is a bug.
829  */
830 void *radix_tree_tag_set(struct radix_tree_root *root,
831                         unsigned long index, unsigned int tag)
832 {
833         unsigned int height, shift;
834         struct radix_tree_node *slot;
835         RADIX_TREE_CONTEXT(context, root);
836 
837         slot = radix_optimistic_lock(context, index, tag, radix_valid_tag);
838         if (slot) {
839                 height = slot->height;
840                 shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
841                 goto optimistic;
842         }
843 
844         height = root->height;
845         BUG_ON(index > radix_tree_maxindex(height));
846 
847         slot = radix_tree_indirect_to_ptr(root->rnode);
848         shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
849 
850         /* set the root's tag bit */
851         if (slot && !root_tag_get(root, tag))
852                 root_tag_set(root, tag);
853 
854         while (height > 0) {
855                 int offset;
856 
857                 radix_ladder_lock(context, slot);
858 
859 optimistic:
860                 offset = (index >> shift) & RADIX_TREE_MAP_MASK;
861                 if (!tag_get(slot, tag, offset))
862                         tag_set(slot, tag, offset);
863                 slot = slot->slots[offset];
864                 BUG_ON(slot == NULL);
865                 shift -= RADIX_TREE_MAP_SHIFT;
866                 height--;
867         }
868 
869         return slot;
870 }
871 EXPORT_SYMBOL(radix_tree_tag_set);
872 
873 /*
874  * the change can never propagate upwards from here.
875  */
876 static
877 int radix_valid_tag_clear(struct radix_tree_node *node, int offset, int tag)
878 {
879         int this, other;
880 
881         this = tag_get(node, tag, offset);
882         other = any_tag_set_but(node, tag, offset);
883 
884         return !this || other;
885 }
886 
887 /**
888  *      radix_tree_tag_clear - clear a tag on a radix tree node
889  *      @root:          radix tree root
890  *      @index:         index key
891  *      @tag:           tag index
892  *
893  *      Clear the search tag (which must be < RADIX_TREE_MAX_TAGS)
894  *      corresponding to @index in the radix tree.  If
895  *      this causes the leaf node to have no tags set then clear the tag in the
896  *      next-to-leaf node, etc.
897  *
898  *      Returns the address of the tagged item on success, else NULL.  ie:
899  *      has the same return value and semantics as radix_tree_lookup().
900  */
901 void *radix_tree_tag_clear(struct radix_tree_root *root,
902                         unsigned long index, unsigned int tag)
903 {
904         /*
905          * The radix tree path needs to be one longer than the maximum path
906          * since the "list" is null terminated.
907          */
908         struct radix_tree_path path[RADIX_TREE_MAX_PATH + 1], *pathp = path;
909         struct radix_tree_path *punlock = path, *piter;
910         struct radix_tree_node *slot = NULL;
911         unsigned int height, shift, offset;
912 
913         RADIX_TREE_CONTEXT(context, root);
914 
915         slot = radix_optimistic_lock(context, index, tag,
916                         radix_valid_tag_clear);
917         if (slot) {
918                 height = slot->height;
919                 shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
920                 offset = (index >> shift) & RADIX_TREE_MAP_MASK;
921                 pathp->offset = offset;
922                 pathp->node = slot;
923                 radix_path_init(context, pathp);
924                 goto optimistic;
925         }
926 
927         pathp->node = NULL;
928         radix_path_init(context, pathp);
929 
930         height = root->height;
931         if (index > radix_tree_maxindex(height))
932                 goto out;
933 
934         shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
935         slot = radix_tree_indirect_to_ptr(root->rnode);
936 
937         while (height > 0) {
938                 if (slot == NULL)
939                         goto out;
940 
941                 offset = (index >> shift) & RADIX_TREE_MAP_MASK;
942                 pathp++;
943                 pathp->offset = offset;
944                 pathp->node = slot;
945                 radix_path_lock(context, pathp, slot);
946 
947                 if (radix_valid_tag_clear(slot, offset, tag)) {
948                         for (; punlock < pathp; punlock++)
949                                 radix_path_unlock(context, punlock);
950                 }
951 
952 optimistic:
953                 slot = slot->slots[offset];
954                 shift -= RADIX_TREE_MAP_SHIFT;
955                 height--;
956         }
957 
958         if (slot == NULL)
959                 goto out;
960 
961         for (piter = pathp; piter >= punlock; piter--) {
962                 if (piter->node) {
963                         if (!tag_get(piter->node, tag, piter->offset))
964                                 break;
965                         tag_clear(piter->node, tag, piter->offset);
966                         if (any_tag_set(piter->node, tag))
967                                 break;
968                 } else {
969                         if (root_tag_get(root, tag))
970                                 root_tag_clear(root, tag);
971                 }
972         }
973 
974 out:
975         for (; punlock < pathp; punlock++)
976                 radix_path_unlock(context, punlock);
977         return slot;
978 }
979 EXPORT_SYMBOL(radix_tree_tag_clear);
980 
981 #ifndef __KERNEL__      /* Only the test harness uses this at present */
982 /**
983  * radix_tree_tag_get - get a tag on a radix tree node
984  * @root:               radix tree root
985  * @index:              index key
986  * @tag:                tag index (< RADIX_TREE_MAX_TAGS)
987  *
988  * Return values:
989  *
990  *  0: tag not present or not set
991  *  1: tag set
992  */
993 int radix_tree_tag_get(struct radix_tree_root *root,
994                         unsigned long index, unsigned int tag)
995 {
996         unsigned int height, shift;
997         struct radix_tree_node *node;
998         int saw_unset_tag = 0;
999 
1000         /* check the root's tag bit */
1001         if (!root_tag_get(root, tag))
1002                 return 0;
1003 
1004         node = rcu_dereference(root->rnode);
1005         if (node == NULL)
1006                 return 0;
1007 
1008         if (!radix_tree_is_indirect_ptr(node))
1009                 return (index == 0);
1010         node = radix_tree_indirect_to_ptr(node);
1011 
1012         height = node->height;
1013         if (index > radix_tree_maxindex(height))
1014                 return 0;
1015 
1016         shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
1017 
1018         for ( ; ; ) {
1019                 int offset;
1020 
1021                 if (node == NULL)
1022                         return 0;
1023 
1024                 offset = (index >> shift) & RADIX_TREE_MAP_MASK;
1025 
1026                 /*
1027                  * This is just a debug check.  Later, we can bale as soon as
1028                  * we see an unset tag.
1029                  */
1030                 if (!tag_get(node, tag, offset))
1031                         saw_unset_tag = 1;
1032                 if (height == 1) {
1033                         int ret = tag_get(node, tag, offset);
1034 
1035                         BUG_ON(ret && saw_unset_tag);
1036                         return !!ret;
1037                 }
1038                 node = rcu_dereference(node->slots[offset]);
1039                 shift -= RADIX_TREE_MAP_SHIFT;
1040                 height--;
1041         }
1042 }
1043 EXPORT_SYMBOL(radix_tree_tag_get);
1044 #endif
1045 
1046 /**
1047  *      radix_tree_next_hole    -    find the next hole (not-present entry)
1048  *      @root:          tree root
1049  *      @index:         index key
1050  *      @max_scan:      maximum range to search
1051  *
1052  *      Search the set [index, min(index+max_scan-1, MAX_INDEX)] for the lowest
1053  *      indexed hole.
1054  *
1055  *      Returns: the index of the hole if found, otherwise returns an index
1056  *      outside of the set specified (in which case 'return - index >= max_scan'
1057  *      will be true).
1058  *
1059  *      radix_tree_next_hole may be called under rcu_read_lock. However, like
1060  *      radix_tree_gang_lookup, this will not atomically search a snapshot of the
1061  *      tree at a single point in time. For example, if a hole is created at index
1062  *      5, then subsequently a hole is created at index 10, radix_tree_next_hole
1063  *      covering both indexes may return 10 if called under rcu_read_lock.
1064  */
1065 unsigned long radix_tree_next_hole(struct radix_tree_root *root,
1066                                 unsigned long index, unsigned long max_scan)
1067 {
1068         unsigned long i;
1069 
1070         for (i = 0; i < max_scan; i++) {
1071                 if (!radix_tree_lookup(root, index))
1072                         break;
1073                 index++;
1074                 if (index == 0)
1075                         break;
1076         }
1077 
1078         return index;
1079 }
1080 EXPORT_SYMBOL(radix_tree_next_hole);
1081 
1082 static unsigned int
1083 __lookup(struct radix_tree_node *slot, void ***results, unsigned long index,
1084         unsigned int max_items, unsigned long *next_index)
1085 {
1086         unsigned int nr_found = 0;
1087         unsigned int shift, height;
1088         unsigned long i;
1089 
1090         height = slot->height;
1091         if (height == 0)
1092                 goto out;
1093         shift = (height-1) * RADIX_TREE_MAP_SHIFT;
1094 
1095         for ( ; height > 1; height--) {
1096                 i = (index >> shift) & RADIX_TREE_MAP_MASK;
1097                 for (;;) {
1098                         if (slot->slots[i] != NULL)
1099                                 break;
1100                         index &= ~((1UL << shift) - 1);
1101                         index += 1UL << shift;
1102                         if (index == 0)
1103                                 goto out;       /* 32-bit wraparound */
1104                         i++;
1105                         if (i == RADIX_TREE_MAP_SIZE)
1106                                 goto out;
1107                 }
1108 
1109                 shift -= RADIX_TREE_MAP_SHIFT;
1110                 slot = rcu_dereference(slot->slots[i]);
1111                 if (slot == NULL)
1112                         goto out;
1113         }
1114 
1115         /* Bottom level: grab some items */
1116         for (i = index & RADIX_TREE_MAP_MASK; i < RADIX_TREE_MAP_SIZE; i++) {
1117                 index++;
1118                 if (slot->slots[i]) {
1119                         results[nr_found++] = &(slot->slots[i]);
1120                         if (nr_found == max_items)
1121                                 goto out;
1122                 }
1123         }
1124 out:
1125         *next_index = index;
1126         return nr_found;
1127 }
1128 
1129 /**
1130  *      radix_tree_gang_lookup - perform multiple lookup on a radix tree
1131  *      @root:          radix tree root
1132  *      @results:       where the results of the lookup are placed
1133  *      @first_index:   start the lookup from this key
1134  *      @max_items:     place up to this many items at *results
1135  *
1136  *      Performs an index-ascending scan of the tree for present items.  Places
1137  *      them at *@results and returns the number of items which were placed at
1138  *      *@results.
1139  *
1140  *      The implementation is naive.
1141  *
1142  *      Like radix_tree_lookup, radix_tree_gang_lookup may be called under
1143  *      rcu_read_lock. In this case, rather than the returned results being
1144  *      an atomic snapshot of the tree at a single point in time, the semantics
1145  *      of an RCU protected gang lookup are as though multiple radix_tree_lookups
1146  *      have been issued in individual locks, and results stored in 'results'.
1147  */
1148 unsigned int
1149 radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
1150                         unsigned long first_index, unsigned int max_items)
1151 {
1152         unsigned long max_index;
1153         struct radix_tree_node *node;
1154         unsigned long cur_index = first_index;
1155         unsigned int ret;
1156 
1157         node = rcu_dereference(root->rnode);
1158         if (!node)
1159                 return 0;
1160 
1161         if (!radix_tree_is_indirect_ptr(node)) {
1162                 if (first_index > 0)
1163                         return 0;
1164                 results[0] = node;
1165                 return 1;
1166         }
1167         node = radix_tree_indirect_to_ptr(node);
1168 
1169         max_index = radix_tree_maxindex(node->height);
1170 
1171         ret = 0;
1172         while (ret < max_items) {
1173                 unsigned int nr_found, slots_found, i;
1174                 unsigned long next_index;       /* Index of next search */
1175 
1176                 if (cur_index > max_index)
1177                         break;
1178                 slots_found = __lookup(node, (void ***)results + ret, cur_index,
1179                                         max_items - ret, &next_index);
1180                 nr_found = 0;
1181                 for (i = 0; i < slots_found; i++) {
1182                         struct radix_tree_node *slot;
1183                         slot = *(((void ***)results)[ret + i]);
1184                         if (!slot)
1185                                 continue;
1186                         results[ret + nr_found] = rcu_dereference(slot);
1187                         nr_found++;
1188                 }
1189                 ret += nr_found;
1190                 if (next_index == 0)
1191                         break;
1192                 cur_index = next_index;
1193         }
1194 
1195         return ret;
1196 }
1197 EXPORT_SYMBOL(radix_tree_gang_lookup);
1198 
1199 /**
1200  *      radix_tree_gang_lookup_slot - perform multiple slot lookup on radix tree
1201  *      @root:          radix tree root
1202  *      @results:       where the results of the lookup are placed
1203  *      @first_index:   start the lookup from this key
1204  *      @max_items:     place up to this many items at *results
1205  *
1206  *      Performs an index-ascending scan of the tree for present items.  Places
1207  *      their slots at *@results and returns the number of items which were
1208  *      placed at *@results.
1209  *
1210  *      The implementation is naive.
1211  *
1212  *      Like radix_tree_gang_lookup as far as RCU and locking goes. Slots must
1213  *      be dereferenced with radix_tree_deref_slot, and if using only RCU
1214  *      protection, radix_tree_deref_slot may fail requiring a retry.
1215  */
1216 unsigned int
1217 radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results,
1218                         unsigned long first_index, unsigned int max_items)
1219 {
1220         unsigned long max_index;
1221         struct radix_tree_node *node;
1222         unsigned long cur_index = first_index;
1223         unsigned int ret;
1224 
1225         node = rcu_dereference(root->rnode);
1226         if (!node)
1227                 return 0;
1228 
1229         if (!radix_tree_is_indirect_ptr(node)) {
1230                 if (first_index > 0)
1231                         return 0;
1232                 results[0] = (void **)&root->rnode;
1233                 return 1;
1234         }
1235         node = radix_tree_indirect_to_ptr(node);
1236 
1237         max_index = radix_tree_maxindex(node->height);
1238 
1239         ret = 0;
1240         while (ret < max_items) {
1241                 unsigned int slots_found;
1242                 unsigned long next_index;       /* Index of next search */
1243 
1244                 if (cur_index > max_index)
1245                         break;
1246                 slots_found = __lookup(node, results + ret, cur_index,
1247                                         max_items - ret, &next_index);
1248                 ret += slots_found;
1249                 if (next_index == 0)
1250                         break;
1251                 cur_index = next_index;
1252         }
1253 
1254         return ret;
1255 }
1256 EXPORT_SYMBOL(radix_tree_gang_lookup_slot);
1257 
1258 /*
1259  * FIXME: the two tag_get()s here should use find_next_bit() instead of
1260  * open-coding the search.
1261  */
1262 static unsigned int
1263 __lookup_tag(struct radix_tree_node *slot, void ***results, unsigned long index,
1264         unsigned int max_items, unsigned long *next_index, unsigned int tag)
1265 {
1266         unsigned int nr_found = 0;
1267         unsigned int shift, height;
1268 
1269         height = slot->height;
1270         if (height == 0)
1271                 goto out;
1272         shift = (height-1) * RADIX_TREE_MAP_SHIFT;
1273 
1274         while (height > 0) {
1275                 unsigned long i = (index >> shift) & RADIX_TREE_MAP_MASK ;
1276 
1277                 for (;;) {
1278                         if (tag_get(slot, tag, i))
1279                                 break;
1280                         index &= ~((1UL << shift) - 1);
1281                         index += 1UL << shift;
1282                         if (index == 0)
1283                                 goto out;       /* 32-bit wraparound */
1284                         i++;
1285                         if (i == RADIX_TREE_MAP_SIZE)
1286                                 goto out;
1287                 }
1288                 height--;
1289                 if (height == 0) {      /* Bottom level: grab some items */
1290                         unsigned long j = index & RADIX_TREE_MAP_MASK;
1291 
1292                         for ( ; j < RADIX_TREE_MAP_SIZE; j++) {
1293                                 struct radix_tree_node *node;
1294                                 index++;
1295                                 if (!tag_get(slot, tag, j))
1296                                         continue;
1297                                 node = slot->slots[j];
1298                                 /*
1299                                  * Even though the tag was found set, we need to
1300                                  * recheck that we have a non-NULL node, because
1301                                  * if this lookup is lockless, it may have been
1302                                  * subsequently deleted.
1303                                  *
1304                                  * Similar care must be taken in any place that
1305                                  * lookup ->slots[x] without a lock (ie. can't
1306                                  * rely on its value remaining the same).
1307                                  */
1308                                 if (slot->slots[j]) {
1309                                         results[nr_found++] = &slot->slots[j];
1310                                         if (nr_found == max_items)
1311                                                 goto out;
1312                                 }
1313                         }
1314                 }
1315                 shift -= RADIX_TREE_MAP_SHIFT;
1316                 slot = rcu_dereference(slot->slots[i]);
1317                 if (slot == NULL)
1318                         break;
1319         }
1320 out:
1321         *next_index = index;
1322         return nr_found;
1323 }
1324 
1325 /**
1326  *      radix_tree_gang_lookup_tag - perform multiple lookup on a radix tree
1327  *                                   based on a tag
1328  *      @root:          radix tree root
1329  *      @results:       where the results of the lookup are placed
1330  *      @first_index:   start the lookup from this key
1331  *      @max_items:     place up to this many items at *results
1332  *      @tag:           the tag index (< RADIX_TREE_MAX_TAGS)
1333  *
1334  *      Performs an index-ascending scan of the tree for present items which
1335  *      have the tag indexed by @tag set.  Places the items at *@results and
1336  *      returns the number of items which were placed at *@results.
1337  */
1338 unsigned int
1339 radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
1340                 unsigned long first_index, unsigned int max_items,
1341                 unsigned int tag)
1342 {
1343         struct radix_tree_node *node;
1344         unsigned long max_index;
1345         unsigned long cur_index = first_index;
1346         unsigned int ret;
1347 
1348         /* check the root's tag bit */
1349         if (!root_tag_get(root, tag))
1350                 return 0;
1351 
1352         node = rcu_dereference(root->rnode);
1353         if (!node)
1354                 return 0;
1355 
1356         if (!radix_tree_is_indirect_ptr(node)) {
1357                 if (first_index > 0)
1358                         return 0;
1359                 results[0] = node;
1360                 return 1;
1361         }
1362         node = radix_tree_indirect_to_ptr(node);
1363 
1364         max_index = radix_tree_maxindex(node->height);
1365 
1366         ret = 0;
1367         while (ret < max_items) {
1368                 unsigned int slots_found, nr_found, i;
1369                 unsigned long next_index;       /* Index of next search */
1370 
1371                 if (cur_index > max_index)
1372                         break;
1373                 slots_found = __lookup_tag(node, (void ***)results + ret,
1374                                 cur_index, max_items - ret, &next_index, tag);
1375                 nr_found = 0;
1376                 for (i = 0; i < slots_found; i++) {
1377                         struct radix_tree_node *slot;
1378                         slot = *((void ***)results)[ret + i];
1379                         if (!slot)
1380                                 continue;
1381                         results[ret + nr_found] = rcu_dereference(slot);
1382                         nr_found++;
1383                 }
1384                 ret += nr_found;
1385                 if (next_index == 0)
1386                         break;
1387                 cur_index = next_index;
1388         }
1389 
1390         return ret;
1391 }
1392 EXPORT_SYMBOL(radix_tree_gang_lookup_tag);
1393 
1394 /**
1395  *      radix_tree_gang_lookup_tag_slot - perform multiple slot lookup on a
1396  *                                        radix tree based on a tag
1397  *      @root:          radix tree root
1398  *      @results:       where the results of the lookup are placed
1399  *      @first_index:   start the lookup from this key
1400  *      @max_items:     place up to this many items at *results
1401  *      @tag:           the tag index (< RADIX_TREE_MAX_TAGS)
1402  *
1403  *      Performs an index-ascending scan of the tree for present items which
1404  *      have the tag indexed by @tag set.  Places the slots at *@results and
1405  *      returns the number of slots which were placed at *@results.
1406  */
1407 unsigned int
1408 radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results,
1409                 unsigned long first_index, unsigned int max_items,
1410                 unsigned int tag)
1411 {
1412         struct radix_tree_node *node;
1413         unsigned long max_index;
1414         unsigned long cur_index = first_index;
1415         unsigned int ret;
1416 
1417         /* check the root's tag bit */
1418         if (!root_tag_get(root, tag))
1419                 return 0;
1420 
1421         node = rcu_dereference(root->rnode);
1422         if (!node)
1423                 return 0;
1424 
1425         if (!radix_tree_is_indirect_ptr(node)) {
1426                 if (first_index > 0)
1427                         return 0;
1428                 results[0] = (void **)&root->rnode;
1429                 return 1;
1430         }
1431         node = radix_tree_indirect_to_ptr(node);
1432 
1433         max_index = radix_tree_maxindex(node->height);
1434 
1435         ret = 0;
1436         while (ret < max_items) {
1437                 unsigned int slots_found;
1438                 unsigned long next_index;       /* Index of next search */
1439 
1440                 if (cur_index > max_index)
1441                         break;
1442                 slots_found = __lookup_tag(node, results + ret,
1443                                 cur_index, max_items - ret, &next_index, tag);
1444                 ret += slots_found;
1445                 if (next_index == 0)
1446                         break;
1447                 cur_index = next_index;
1448         }
1449 
1450         return ret;
1451 }
1452 EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot);
1453 
1454 
1455 /**
1456  *      radix_tree_shrink    -    shrink height of a radix tree to minimal
1457  *      @root           radix tree root
1458  */
1459 static inline void radix_tree_shrink(struct radix_tree_root *root)
1460 {
1461         /* try to shrink tree height */
1462         while (root->height > 0) {
1463                 struct radix_tree_node *to_free = root->rnode;
1464                 void *newptr;
1465                 int tag;
1466 
1467                 BUG_ON(!radix_tree_is_indirect_ptr(to_free));
1468                 to_free = radix_tree_indirect_to_ptr(to_free);
1469 
1470                 /*
1471                  * The candidate node has more than one child, or its child
1472                  * is not at the leftmost slot, we cannot shrink.
1473                  */
1474                 if (to_free->count != 1)
1475                         break;
1476                 if (!to_free->slots[0])
1477                         break;
1478 
1479                 /*
1480                  * We don't need rcu_assign_pointer(), since we are simply
1481                  * moving the node from one part of the tree to another. If
1482                  * it was safe to dereference the old pointer to it
1483                  * (to_free->slots[0]), it will be safe to dereference the new
1484                  * one (root->rnode).
1485                  */
1486                 newptr = to_free->slots[0];
1487                 if (root->height > 1)
1488                         newptr = radix_tree_ptr_to_indirect(newptr);
1489                 root->rnode = newptr;
1490                 root->height--;
1491                 /* must only free zeroed nodes into the slab */
1492                 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
1493                         tag_clear(to_free, tag, 0);
1494                 to_free->slots[0] = NULL;
1495                 to_free->count = 0;
1496         }
1497 }
1498 
1499 static
1500 int radix_valid_delete(struct radix_tree_node *node, int offset, int tag)
1501 {
1502         /*
1503          * we need to check for > 2, because nodes with a single child
1504          * can still be deleted, see radix_tree_shrink().
1505          */
1506         int unlock = (node->count > 2);
1507 
1508         if (!unlock)
1509                 return unlock;
1510 
1511         for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
1512                 if (!radix_valid_tag_clear(node, offset, tag)) {
1513                         unlock = 0;
1514                         break;
1515                 }
1516         }
1517 
1518         return unlock;
1519 }
1520 
1521 /**
1522  *      radix_tree_delete    -    delete an item from a radix tree
1523  *      @root:          radix tree root
1524  *      @index:         index key
1525  *
1526  *      Remove the item at @index from the radix tree rooted at @root.
1527  *
1528  *      Returns the address of the deleted item, or NULL if it was not present.
1529  */
1530 void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
1531 {
1532         /*
1533          * The radix tree path needs to be one longer than the maximum path
1534          * since the "list" is null terminated.
1535          */
1536         struct radix_tree_path path[RADIX_TREE_MAX_PATH + 1], *pathp = path;
1537         struct radix_tree_path *punlock = path, *piter;
1538         struct radix_tree_node *slot = NULL;
1539         unsigned int height, shift;
1540         int tag;
1541         int offset;
1542         RADIX_TREE_CONTEXT(context, root);
1543 
1544         slot = radix_optimistic_lock(context, index, 0, radix_valid_delete);
1545         if (slot) {
1546                 height = slot->height;
1547                 shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
1548                 offset = (index >> shift) & RADIX_TREE_MAP_MASK;
1549                 pathp->offset = offset;
1550                 pathp->node = slot;
1551                 radix_path_init(context, pathp);
1552                 goto optimistic;
1553         }
1554 
1555         pathp->node = NULL;
1556         radix_path_init(context, pathp);
1557 
1558         height = root->height;
1559         if (index > radix_tree_maxindex(height))
1560                 goto out;
1561 
1562         slot = root->rnode;
1563         if (height == 0) {
1564                 root_tag_clear_all(root);
1565                 root->rnode = NULL;
1566                 goto out;
1567         }
1568         slot = radix_tree_indirect_to_ptr(slot);
1569 
1570         shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
1571 
1572         do {
1573                 if (slot == NULL)
1574                         goto out;
1575 
1576                 pathp++;
1577                 offset = (index >> shift) & RADIX_TREE_MAP_MASK;
1578                 pathp->offset = offset;
1579                 pathp->node = slot;
1580                 radix_path_lock(context, pathp, slot);
1581 
1582                 if (radix_valid_delete(slot, offset, 0)) {
1583                         for (; punlock < pathp; punlock++)
1584                                 radix_path_unlock(context, punlock);
1585                 }
1586 
1587 optimistic:
1588                 slot = slot->slots[offset];
1589                 shift -= RADIX_TREE_MAP_SHIFT;
1590                 height--;
1591         } while (height > 0);
1592 
1593         if (slot == NULL)
1594                 goto out;
1595 
1596         /*
1597          * Clear all tags associated with the just-deleted item
1598          */
1599         for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
1600                 for (piter = pathp; piter >= punlock; piter--) {
1601                         if (piter->node) {
1602                                 if (!tag_get(piter->node, tag, piter->offset))
1603                                         break;
1604                                 tag_clear(piter->node, tag, piter->offset);
1605                                 if (any_tag_set(piter->node, tag))
1606                                         break;
1607                         } else {
1608                                 if (root_tag_get(root, tag))
1609                                         root_tag_clear(root, tag);
1610                         }
1611                 }
1612         }
1613 
1614         /* Now unhook the nodes we do not need anymore */
1615         for (piter = pathp; piter >= punlock && piter->node; piter--) {
1616                 piter->node->slots[piter->offset] = NULL;
1617                 piter->node->count--;
1618 
1619                 if (piter->node->count) {
1620                         if (piter->node ==
1621                                         radix_tree_indirect_to_ptr(root->rnode))
1622                                 radix_tree_shrink(root);
1623                         goto out;
1624                 }
1625         }
1626 
1627         BUG_ON(piter->node);
1628 
1629         root_tag_clear_all(root);
1630         root->height = 0;
1631         root->rnode = NULL;
1632 
1633 out:
1634         for (; punlock <= pathp; punlock++) {
1635                 radix_path_unlock(context, punlock);
1636                 if (punlock->node && punlock->node->count == 0)
1637                         radix_tree_node_free(punlock->node);
1638         }
1639         return slot;
1640 }
1641 EXPORT_SYMBOL(radix_tree_delete);
1642 
1643 /**
1644  *      radix_tree_tagged - test whether any items in the tree are tagged
1645  *      @root:          radix tree root
1646  *      @tag:           tag to test
1647  */
1648 int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag)
1649 {
1650         return root_tag_get(root, tag);
1651 }
1652 EXPORT_SYMBOL(radix_tree_tagged);
1653 
1654 static void
1655 radix_tree_node_ctor(struct kmem_cache *cachep, void *node)
1656 {
1657         memset(node, 0, sizeof(struct radix_tree_node));
1658 }
1659 
1660 static __init unsigned long __maxindex(unsigned int height)
1661 {
1662         unsigned int width = height * RADIX_TREE_MAP_SHIFT;
1663         int shift = RADIX_TREE_INDEX_BITS - width;
1664 
1665         if (shift < 0)
1666                 return ~0UL;
1667         if (shift >= BITS_PER_LONG)
1668                 return 0UL;
1669         return ~0UL >> shift;
1670 }
1671 
1672 static __init void radix_tree_init_maxindex(void)
1673 {
1674         unsigned int i;
1675 
1676         for (i = 0; i < ARRAY_SIZE(height_to_maxindex); i++)
1677                 height_to_maxindex[i] = __maxindex(i);
1678 }
1679 
1680 static int radix_tree_callback(struct notifier_block *nfb,
1681                             unsigned long action,
1682                             void *hcpu)
1683 {
1684        int cpu = (long)hcpu;
1685        struct radix_tree_preload *rtp;
1686 
1687        /* Free per-cpu pool of perloaded nodes */
1688        if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
1689                rtp = &per_cpu(radix_tree_preloads, cpu);
1690                while (rtp->nr) {
1691                        kmem_cache_free(radix_tree_node_cachep,
1692                                        rtp->nodes[rtp->nr-1]);
1693                        rtp->nodes[rtp->nr-1] = NULL;
1694                        rtp->nr--;
1695                }
1696        }
1697        return NOTIFY_OK;
1698 }
1699 
1700 void __init radix_tree_init(void)
1701 {
1702         radix_tree_node_cachep = kmem_cache_create("radix_tree_node",
1703                         sizeof(struct radix_tree_node), 0,
1704                         SLAB_PANIC, radix_tree_node_ctor);
1705         radix_tree_init_maxindex();
1706         hotcpu_notifier(radix_tree_callback, 0);
1707 }
1708 
  This page was automatically generated by the LXR engine.