1 /*
2 * Copyright (C) 2001 Momchil Velikov
3 * Portions Copyright (C) 2001 Christoph Hellwig
4 * Copyright (C) 2005 SGI, Christoph Lameter <clameter@sgi.com>
5 * Copyright (C) 2006 Nick Piggin
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as
9 * published by the Free Software Foundation; either version 2, or (at
10 * your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 */
21
22 #include <linux/errno.h>
23 #include <linux/init.h>
24 #include <linux/kernel.h>
25 #include <linux/module.h>
26 #include <linux/radix-tree.h>
27 #include <linux/percpu.h>
28 #include <linux/slab.h>
29 #include <linux/notifier.h>
30 #include <linux/cpu.h>
31 #include <linux/gfp.h>
32 #include <linux/string.h>
33 #include <linux/bitops.h>
34 #include <linux/rcupdate.h>
35 #include <linux/spinlock.h>
36
37
38 #ifdef __KERNEL__
39 #define RADIX_TREE_MAP_SHIFT (CONFIG_BASE_SMALL ? 4 : 6)
40 #else
41 #define RADIX_TREE_MAP_SHIFT 3 /* For more stressful testing */
42 #endif
43
44 #define RADIX_TREE_MAP_SIZE (1UL << RADIX_TREE_MAP_SHIFT)
45 #define RADIX_TREE_MAP_MASK (RADIX_TREE_MAP_SIZE-1)
46
47 #define RADIX_TREE_TAG_LONGS \
48 ((RADIX_TREE_MAP_SIZE + BITS_PER_LONG - 1) / BITS_PER_LONG)
49
50 struct radix_tree_node {
51 unsigned int height; /* Height from the bottom */
52 unsigned int count;
53 struct rcu_head rcu_head;
54 void *slots[RADIX_TREE_MAP_SIZE];
55 unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS];
56 #ifdef CONFIG_RADIX_TREE_CONCURRENT
57 spinlock_t lock;
58 #endif
59 };
60
61 struct radix_tree_path {
62 struct radix_tree_node *node;
63 int offset;
64 #ifdef CONFIG_RADIX_TREE_CONCURRENT
65 spinlock_t *locked;
66 #endif
67 };
68
69 #define RADIX_TREE_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(unsigned long))
70 #define RADIX_TREE_MAX_PATH (DIV_ROUND_UP(RADIX_TREE_INDEX_BITS, \
71 RADIX_TREE_MAP_SHIFT))
72
73 /*
74 * The height_to_maxindex array needs to be one deeper than the maximum
75 * path as height 0 holds only 1 entry.
76 */
77 static unsigned long height_to_maxindex[RADIX_TREE_MAX_PATH + 1] __read_mostly;
78
79 #ifdef CONFIG_RADIX_TREE_CONCURRENT
80 static struct lock_class_key radix_node_class[RADIX_TREE_MAX_PATH + 1];
81 #endif
82 #ifdef CONFIG_DEBUG_LOCK_ALLOC
83 static const char *radix_node_key_string[RADIX_TREE_MAX_PATH + 1] = {
84 "radix-node-00",
85 "radix-node-01",
86 "radix-node-02",
87 "radix-node-03",
88 "radix-node-04",
89 "radix-node-05",
90 "radix-node-06",
91 "radix-node-07",
92 "radix-node-08",
93 "radix-node-09",
94 "radix-node-10",
95 "radix-node-11",
96 "radix-node-12",
97 "radix-node-13",
98 "radix-node-14",
99 "radix-node-15",
100 };
101 #endif
102
103 #ifdef CONFIG_RADIX_TREE_OPTIMISTIC
104 static DEFINE_PER_CPU(unsigned long[RADIX_TREE_MAX_PATH+1], optimistic_histogram);
105
106 static void optimistic_hit(unsigned long height)
107 {
108 unsigned long flags;
109
110 if (height > RADIX_TREE_MAX_PATH)
111 height = RADIX_TREE_MAX_PATH;
112
113 /*
114 * HACK:
115 * On PREEMPT_RT this protects the percpu var.
116 * We really need to fix this to pass in cpu var protected
117 * with a RT sleeping spinlock.
118 */
119 local_irq_save(flags);
120 __get_cpu_var(optimistic_histogram)[height]++;
121 local_irq_restore(flags);
122 }
123
124 #ifdef CONFIG_PROC_FS
125
126 #include <linux/seq_file.h>
127 #include <linux/uaccess.h>
128
129 static void *frag_start(struct seq_file *m, loff_t *pos)
130 {
131 if (*pos < 0 || *pos > RADIX_TREE_MAX_PATH)
132 return NULL;
133
134 m->private = (void *)(unsigned long)*pos;
135 return pos;
136 }
137
138 static void *frag_next(struct seq_file *m, void *arg, loff_t *pos)
139 {
140 if (*pos < RADIX_TREE_MAX_PATH) {
141 (*pos)++;
142 (*((unsigned long *)&m->private))++;
143 return pos;
144 }
145 return NULL;
146 }
147
148 static void frag_stop(struct seq_file *m, void *arg)
149 {
150 }
151
152 unsigned long get_optimistic_stat(unsigned long index)
153 {
154 unsigned long total = 0;
155 int cpu;
156
157 for_each_possible_cpu(cpu) {
158 total += per_cpu(optimistic_histogram, cpu)[index];
159 }
160 return total;
161 }
162
163 static int frag_show(struct seq_file *m, void *arg)
164 {
165 unsigned long index = (unsigned long)m->private;
166 unsigned long hits = get_optimistic_stat(index);
167
168 if (index == 0)
169 seq_printf(m, "levels skipped\thits\n");
170
171 if (index < RADIX_TREE_MAX_PATH)
172 seq_printf(m, "%9lu\t%9lu\n", index, hits);
173 else
174 seq_printf(m, "failed\t%9lu\n", hits);
175
176 return 0;
177 }
178
179 struct seq_operations optimistic_op = {
180 .start = frag_start,
181 .next = frag_next,
182 .stop = frag_stop,
183 .show = frag_show,
184 };
185
186 static void optimistic_reset(void)
187 {
188 int cpu;
189 int height;
190 for_each_possible_cpu(cpu) {
191 for (height = 0; height <= RADIX_TREE_MAX_PATH; height++)
192 per_cpu(optimistic_histogram, cpu)[height] = 0;
193 }
194 }
195
196 ssize_t optimistic_write(struct file *file, const char __user *buf,
197 size_t count, loff_t *ppos)
198 {
199 if (count) {
200 char c;
201 if (get_user(c, buf))
202 return -EFAULT;
203 if (c == '')
204 optimistic_reset();
205 }
206 return count;
207 }
208
209 #endif // CONFIG_PROC_FS
210 #endif // CONFIG_RADIX_TREE_OPTIMISTIC
211
212 /*
213 * Radix tree node cache.
214 */
215 static struct kmem_cache *radix_tree_node_cachep;
216
217 /*
218 * Per-cpu pool of preloaded nodes
219 */
220 struct radix_tree_preload {
221 int nr;
222 struct radix_tree_node *nodes[RADIX_TREE_MAX_PATH];
223 };
224 DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, };
225
226 static inline gfp_t root_gfp_mask(struct radix_tree_root *root)
227 {
228 return root->gfp_mask & __GFP_BITS_MASK;
229 }
230
231 /*
232 * This assumes that the caller has performed appropriate preallocation, and
233 * that the caller has pinned this thread of control to the current CPU.
234 */
235 static struct radix_tree_node *
236 radix_tree_node_alloc(struct radix_tree_root *root, int height)
237 {
238 struct radix_tree_node *ret = NULL;
239 gfp_t gfp_mask = root_gfp_mask(root);
240
241 if (!(gfp_mask & __GFP_WAIT)) {
242 struct radix_tree_preload *rtp;
243
244 /*
245 * Provided the caller has preloaded here, we will always
246 * succeed in getting a node here (and never reach
247 * kmem_cache_alloc)
248 */
249 rtp = &get_cpu_var(radix_tree_preloads);
250 if (rtp->nr) {
251 ret = rtp->nodes[rtp->nr - 1];
252 rtp->nodes[rtp->nr - 1] = NULL;
253 rtp->nr--;
254 }
255 put_cpu_var(radix_tree_preloads);
256 }
257 if (ret == NULL)
258 ret = kmem_cache_alloc(radix_tree_node_cachep,
259 set_migrateflags(gfp_mask, __GFP_RECLAIMABLE));
260
261 BUG_ON(radix_tree_is_indirect_ptr(ret));
262 #ifdef CONFIG_RADIX_TREE_CONCURRENT
263 spin_lock_init(&ret->lock);
264 lockdep_set_class_and_name(&ret->lock,
265 &radix_node_class[height],
266 radix_node_key_string[height]);
267 #endif
268 ret->height = height;
269 return ret;
270 }
271
272 static void radix_tree_node_rcu_free(struct rcu_head *head)
273 {
274 struct radix_tree_node *node =
275 container_of(head, struct radix_tree_node, rcu_head);
276 kmem_cache_free(radix_tree_node_cachep, node);
277 }
278
279 static inline void
280 radix_tree_node_free(struct radix_tree_node *node)
281 {
282 call_rcu(&node->rcu_head, radix_tree_node_rcu_free);
283 }
284
285 #ifndef CONFIG_PREEMPT_RT
286
287 /*
288 * Load up this CPU's radix_tree_node buffer with sufficient objects to
289 * ensure that the addition of a single element in the tree cannot fail. On
290 * success, return zero, with preemption disabled. On error, return -ENOMEM
291 * with preemption not disabled.
292 */
293 int radix_tree_preload(gfp_t gfp_mask)
294 {
295 struct radix_tree_preload *rtp;
296 struct radix_tree_node *node;
297 int ret = -ENOMEM;
298
299 preempt_disable();
300 rtp = &__get_cpu_var(radix_tree_preloads);
301 while (rtp->nr < ARRAY_SIZE(rtp->nodes)) {
302 preempt_enable();
303 node = kmem_cache_alloc(radix_tree_node_cachep,
304 set_migrateflags(gfp_mask, __GFP_RECLAIMABLE));
305 if (node == NULL)
306 goto out;
307 preempt_disable();
308 rtp = &__get_cpu_var(radix_tree_preloads);
309 if (rtp->nr < ARRAY_SIZE(rtp->nodes))
310 rtp->nodes[rtp->nr++] = node;
311 else
312 kmem_cache_free(radix_tree_node_cachep, node);
313 }
314 ret = 0;
315 out:
316 return ret;
317 }
318 EXPORT_SYMBOL(radix_tree_preload);
319
320 #endif
321
322 static inline void tag_set(struct radix_tree_node *node, unsigned int tag,
323 int offset)
324 {
325 __set_bit(offset, node->tags[tag]);
326 }
327
328 static inline void tag_clear(struct radix_tree_node *node, unsigned int tag,
329 int offset)
330 {
331 __clear_bit(offset, node->tags[tag]);
332 }
333
334 static inline int tag_get(struct radix_tree_node *node, unsigned int tag,
335 int offset)
336 {
337 return test_bit(offset, node->tags[tag]);
338 }
339
340 static inline void root_tag_set(struct radix_tree_root *root, unsigned int tag)
341 {
342 root->gfp_mask |= (__force gfp_t)(1 << (tag + __GFP_BITS_SHIFT));
343 }
344
345
346 static inline void root_tag_clear(struct radix_tree_root *root, unsigned int tag)
347 {
348 root->gfp_mask &= (__force gfp_t)~(1 << (tag + __GFP_BITS_SHIFT));
349 }
350
351 static inline void root_tag_clear_all(struct radix_tree_root *root)
352 {
353 root->gfp_mask &= __GFP_BITS_MASK;
354 }
355
356 static inline int root_tag_get(struct radix_tree_root *root, unsigned int tag)
357 {
358 return (__force unsigned)root->gfp_mask & (1 << (tag + __GFP_BITS_SHIFT));
359 }
360
361 /*
362 * Returns 1 if any slot in the node has this tag set.
363 * Otherwise returns 0.
364 */
365 static inline int any_tag_set(struct radix_tree_node *node, unsigned int tag)
366 {
367 int idx;
368 for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) {
369 if (node->tags[tag][idx])
370 return 1;
371 }
372 return 0;
373 }
374
375 static inline int any_tag_set_but(struct radix_tree_node *node,
376 unsigned int tag, int offset)
377 {
378 int idx;
379 int offset_idx = offset / BITS_PER_LONG;
380 unsigned long offset_mask = ~(1UL << (offset % BITS_PER_LONG));
381 for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) {
382 unsigned long mask = ~0UL;
383 if (idx == offset_idx)
384 mask = offset_mask;
385 if (node->tags[tag][idx] & mask)
386 return 1;
387 }
388 return 0;
389 }
390
391 /*
392 * Return the maximum key which can be store into a
393 * radix tree with height HEIGHT.
394 */
395 static inline unsigned long radix_tree_maxindex(unsigned int height)
396 {
397 return height_to_maxindex[height];
398 }
399
400 /*
401 * Extend a radix tree so it can store key @index.
402 */
403 static int radix_tree_extend(struct radix_tree_root *root, unsigned long index)
404 {
405 struct radix_tree_node *node;
406 unsigned int height;
407 int tag;
408
409 /* Figure out what the height should be. */
410 height = root->height + 1;
411 while (index > radix_tree_maxindex(height))
412 height++;
413
414 if (root->rnode == NULL) {
415 root->height = height;
416 goto out;
417 }
418
419 do {
420 unsigned int newheight = root->height + 1;
421 if (!(node = radix_tree_node_alloc(root, newheight)))
422 return -ENOMEM;
423
424 /* Increase the height. */
425 node->slots[0] = radix_tree_indirect_to_ptr(root->rnode);
426
427 /* Propagate the aggregated tag info into the new root */
428 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
429 if (root_tag_get(root, tag))
430 tag_set(node, tag, 0);
431 }
432
433 node->count = 1;
434 node = radix_tree_ptr_to_indirect(node);
435 rcu_assign_pointer(root->rnode, node);
436 root->height = newheight;
437 } while (height > root->height);
438 out:
439 return 0;
440 }
441
442 #ifdef CONFIG_RADIX_TREE_CONCURRENT
443 static inline struct radix_tree_context *
444 radix_tree_get_context(struct radix_tree_root **rootp)
445 {
446 struct radix_tree_context *context = NULL;
447 unsigned long addr = (unsigned long)*rootp;
448
449 if (addr & 1) {
450 context = (struct radix_tree_context *)(addr - 1);
451 *rootp = context->root;
452 }
453
454 return context;
455 }
456
457 #define RADIX_TREE_CONTEXT(context, root) \
458 struct radix_tree_context *context = \
459 radix_tree_get_context(&root)
460
461 static inline spinlock_t *radix_node_lock(struct radix_tree_root *root,
462 struct radix_tree_node *node)
463 {
464 spinlock_t *locked = &node->lock;
465 spin_lock(locked);
466 return locked;
467 }
468
469 static inline void radix_ladder_lock(struct radix_tree_context *context,
470 struct radix_tree_node *node)
471 {
472 if (context) {
473 struct radix_tree_root *root = context->root;
474 spinlock_t *locked = radix_node_lock(root, node);
475 if (locked) {
476 spin_unlock(context->locked);
477 context->locked = locked;
478 }
479 }
480 }
481
482 static inline void radix_path_init(struct radix_tree_context *context,
483 struct radix_tree_path *pathp)
484 {
485 pathp->locked = context ? context->locked : NULL;
486 }
487
488 static inline void radix_path_lock(struct radix_tree_context *context,
489 struct radix_tree_path *pathp, struct radix_tree_node *node)
490 {
491 if (context) {
492 struct radix_tree_root *root = context->root;
493 spinlock_t *locked = radix_node_lock(root, node);
494 if (locked)
495 context->locked = locked;
496 pathp->locked = locked;
497 } else
498 pathp->locked = NULL;
499 }
500
501 static inline void radix_path_unlock(struct radix_tree_context *context,
502 struct radix_tree_path *punlock)
503 {
504 if (context && punlock->locked &&
505 context->locked != punlock->locked)
506 spin_unlock(punlock->locked);
507 }
508 #else
509 #define RADIX_TREE_CONTEXT(context, root) do { } while (0)
510 #define radix_ladder_lock(context, node) do { } while (0)
511 #define radix_path_init(context, pathp) do { } while (0)
512 #define radix_path_lock(context, pathp, node) do { } while (0)
513 #define radix_path_unlock(context, punlock) do { } while (0)
514 #endif
515
516 #ifdef CONFIG_RADIX_TREE_OPTIMISTIC
517 typedef int (*radix_valid_fn)(struct radix_tree_node *, int, int);
518
519 static struct radix_tree_node *
520 radix_optimistic_lookup(struct radix_tree_context *context, unsigned long index,
521 int tag, radix_valid_fn valid)
522 {
523 unsigned int height, shift;
524 struct radix_tree_node *node, *ret = NULL, **slot;
525 struct radix_tree_root *root = context->root;
526
527 node = rcu_dereference(root->rnode);
528 if (node == NULL)
529 return NULL;
530
531 if (!radix_tree_is_indirect_ptr(node))
532 return NULL;
533
534 node = radix_tree_indirect_to_ptr(node);
535
536 height = node->height;
537 if (index > radix_tree_maxindex(height))
538 return NULL;
539
540 shift = (height-1) * RADIX_TREE_MAP_SHIFT;
541 do {
542 int offset = (index >> shift) & RADIX_TREE_MAP_MASK;
543 if ((*valid)(node, offset, tag))
544 ret = node;
545 slot = (struct radix_tree_node **)(node->slots + offset);
546 node = rcu_dereference(*slot);
547 if (!node)
548 break;
549
550 shift -= RADIX_TREE_MAP_SHIFT;
551 height--;
552 } while (height > 0);
553
554 return ret;
555 }
556
557 static struct radix_tree_node *
558 __radix_optimistic_lock(struct radix_tree_context *context, unsigned long index,
559 int tag, radix_valid_fn valid)
560 {
561 struct radix_tree_node *node;
562 spinlock_t *locked;
563 unsigned int shift, offset;
564
565 node = radix_optimistic_lookup(context, index, tag, valid);
566 if (!node)
567 goto out;
568
569 locked = radix_node_lock(context->root, node);
570 if (!locked)
571 goto out;
572
573 #if 0
574 if (node != radix_optimistic_lookup(context, index, tag, valid))
575 goto out_unlock;
576 #else
577 /* check if the node got freed */
578 if (!node->count)
579 goto out_unlock;
580
581 /* check if the node is still a valid termination point */
582 shift = (node->height - 1) * RADIX_TREE_MAP_SHIFT;
583 offset = (index >> shift) & RADIX_TREE_MAP_MASK;
584 if (!(*valid)(node, offset, tag))
585 goto out_unlock;
586 #endif
587
588 context->locked = locked;
589 return node;
590
591 out_unlock:
592 spin_unlock(locked);
593 out:
594 return NULL;
595 }
596
597 static struct radix_tree_node *
598 radix_optimistic_lock(struct radix_tree_context *context, unsigned long index,
599 int tag, radix_valid_fn valid)
600 {
601 struct radix_tree_node *node = NULL;
602
603 if (context) {
604 node = __radix_optimistic_lock(context, index, tag, valid);
605 if (!node) {
606 BUG_ON(context->locked);
607 spin_lock(&context->root->lock);
608 context->locked = &context->root->lock;
609 optimistic_hit(RADIX_TREE_MAX_PATH);
610 } else
611 optimistic_hit(context->root->height - node->height);
612 }
613 return node;
614 }
615
616 static int radix_valid_always(struct radix_tree_node *node, int offset, int tag)
617 {
618 return 1;
619 }
620
621 static int radix_valid_tag(struct radix_tree_node *node, int offset, int tag)
622 {
623 return tag_get(node, tag, offset);
624 }
625 #else
626 #define radix_optimistic_lock(context, index, tag, valid) NULL
627 #endif
628
629 /**
630 * radix_tree_insert - insert into a radix tree
631 * @root: radix tree root
632 * @index: index key
633 * @item: item to insert
634 *
635 * Insert an item into the radix tree at position @index.
636 */
637 int radix_tree_insert(struct radix_tree_root *root,
638 unsigned long index, void *item)
639 {
640 struct radix_tree_node *node = NULL, *slot;
641 unsigned int height, shift;
642 int offset;
643 int error;
644 int tag;
645 RADIX_TREE_CONTEXT(context, root);
646
647 BUG_ON(radix_tree_is_indirect_ptr(item));
648
649 node = radix_optimistic_lock(context, index, 0, radix_valid_always);
650 if (node) {
651 height = node->height;
652 shift = (height-1) * RADIX_TREE_MAP_SHIFT;
653 goto optimistic;
654 }
655
656 /* Make sure the tree is high enough. */
657 if (index > radix_tree_maxindex(root->height)) {
658 error = radix_tree_extend(root, index);
659 if (error)
660 return error;
661 }
662
663 slot = radix_tree_indirect_to_ptr(root->rnode);
664 height = root->height;
665 shift = (height-1) * RADIX_TREE_MAP_SHIFT;
666
667 offset = 0; /* uninitialised var warning */
668 while (height > 0) {
669 if (slot == NULL) {
670 /* Have to add a child node. */
671 if (!(slot = radix_tree_node_alloc(root, height)))
672 return -ENOMEM;
673 if (node) {
674 rcu_assign_pointer(node->slots[offset], slot);
675 node->count++;
676 } else
677 rcu_assign_pointer(root->rnode,
678 radix_tree_ptr_to_indirect(slot));
679 }
680
681 /* Go a level down */
682 node = slot;
683 radix_ladder_lock(context, node);
684
685 optimistic:
686 offset = (index >> shift) & RADIX_TREE_MAP_MASK;
687 slot = node->slots[offset];
688 shift -= RADIX_TREE_MAP_SHIFT;
689 height--;
690 }
691
692 if (slot != NULL)
693 return -EEXIST;
694
695 if (node) {
696 node->count++;
697 rcu_assign_pointer(node->slots[offset], item);
698 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
699 BUG_ON(tag_get(node, tag, offset));
700 } else {
701 rcu_assign_pointer(root->rnode, item);
702 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
703 BUG_ON(root_tag_get(root, tag));
704 }
705
706 return 0;
707 }
708 EXPORT_SYMBOL(radix_tree_insert);
709
710 /**
711 * radix_tree_lookup_slot - lookup a slot in a radix tree
712 * @root: radix tree root
713 * @index: index key
714 *
715 * Returns: the slot corresponding to the position @index in the
716 * radix tree @root. This is useful for update-if-exists operations.
717 *
718 * This function can be called under rcu_read_lock iff the slot is not
719 * modified by radix_tree_replace_slot, otherwise it must be called
720 * exclusive from other writers. Any dereference of the slot must be done
721 * using radix_tree_deref_slot.
722 */
723 void **radix_tree_lookup_slot(struct radix_tree_root *root, unsigned long index)
724 {
725 unsigned int height, shift;
726 struct radix_tree_node *node, **slot;
727 RADIX_TREE_CONTEXT(context, root);
728
729 node = radix_optimistic_lock(context, index, 0, radix_valid_always);
730 if (node)
731 goto optimistic;
732
733 node = rcu_dereference(root->rnode);
734 if (node == NULL)
735 return NULL;
736
737 if (!radix_tree_is_indirect_ptr(node)) {
738 if (index > 0)
739 return NULL;
740 return (void **)&root->rnode;
741 }
742 node = radix_tree_indirect_to_ptr(node);
743
744 optimistic:
745 height = node->height;
746 if (index > radix_tree_maxindex(height))
747 return NULL;
748
749 shift = (height-1) * RADIX_TREE_MAP_SHIFT;
750
751 do {
752 slot = (struct radix_tree_node **)
753 (node->slots + ((index>>shift) & RADIX_TREE_MAP_MASK));
754 node = rcu_dereference(*slot);
755 if (node == NULL)
756 return NULL;
757
758 radix_ladder_lock(context, node);
759
760 shift -= RADIX_TREE_MAP_SHIFT;
761 height--;
762 } while (height > 0);
763
764 return (void **)slot;
765 }
766 EXPORT_SYMBOL(radix_tree_lookup_slot);
767
768 /**
769 * radix_tree_lookup - perform lookup operation on a radix tree
770 * @root: radix tree root
771 * @index: index key
772 *
773 * Lookup the item at the position @index in the radix tree @root.
774 *
775 * This function can be called under rcu_read_lock, however the caller
776 * must manage lifetimes of leaf nodes (eg. RCU may also be used to free
777 * them safely). No RCU barriers are required to access or modify the
778 * returned item, however.
779 */
780 void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index)
781 {
782 unsigned int height, shift;
783 struct radix_tree_node *node, **slot;
784
785 node = rcu_dereference(root->rnode);
786 if (node == NULL)
787 return NULL;
788
789 if (!radix_tree_is_indirect_ptr(node)) {
790 if (index > 0)
791 return NULL;
792 return node;
793 }
794 node = radix_tree_indirect_to_ptr(node);
795
796 height = node->height;
797 if (index > radix_tree_maxindex(height))
798 return NULL;
799
800 shift = (height-1) * RADIX_TREE_MAP_SHIFT;
801
802 do {
803 slot = (struct radix_tree_node **)
804 (node->slots + ((index>>shift) & RADIX_TREE_MAP_MASK));
805 node = rcu_dereference(*slot);
806 if (node == NULL)
807 return NULL;
808
809 shift -= RADIX_TREE_MAP_SHIFT;
810 height--;
811 } while (height > 0);
812
813 return node;
814 }
815 EXPORT_SYMBOL(radix_tree_lookup);
816
817 /**
818 * radix_tree_tag_set - set a tag on a radix tree node
819 * @root: radix tree root
820 * @index: index key
821 * @tag: tag index
822 *
823 * Set the search tag (which must be < RADIX_TREE_MAX_TAGS)
824 * corresponding to @index in the radix tree. From
825 * the root all the way down to the leaf node.
826 *
827 * Returns the address of the tagged item. Setting a tag on a not-present
828 * item is a bug.
829 */
830 void *radix_tree_tag_set(struct radix_tree_root *root,
831 unsigned long index, unsigned int tag)
832 {
833 unsigned int height, shift;
834 struct radix_tree_node *slot;
835 RADIX_TREE_CONTEXT(context, root);
836
837 slot = radix_optimistic_lock(context, index, tag, radix_valid_tag);
838 if (slot) {
839 height = slot->height;
840 shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
841 goto optimistic;
842 }
843
844 height = root->height;
845 BUG_ON(index > radix_tree_maxindex(height));
846
847 slot = radix_tree_indirect_to_ptr(root->rnode);
848 shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
849
850 /* set the root's tag bit */
851 if (slot && !root_tag_get(root, tag))
852 root_tag_set(root, tag);
853
854 while (height > 0) {
855 int offset;
856
857 radix_ladder_lock(context, slot);
858
859 optimistic:
860 offset = (index >> shift) & RADIX_TREE_MAP_MASK;
861 if (!tag_get(slot, tag, offset))
862 tag_set(slot, tag, offset);
863 slot = slot->slots[offset];
864 BUG_ON(slot == NULL);
865 shift -= RADIX_TREE_MAP_SHIFT;
866 height--;
867 }
868
869 return slot;
870 }
871 EXPORT_SYMBOL(radix_tree_tag_set);
872
873 /*
874 * the change can never propagate upwards from here.
875 */
876 static
877 int radix_valid_tag_clear(struct radix_tree_node *node, int offset, int tag)
878 {
879 int this, other;
880
881 this = tag_get(node, tag, offset);
882 other = any_tag_set_but(node, tag, offset);
883
884 return !this || other;
885 }
886
887 /**
888 * radix_tree_tag_clear - clear a tag on a radix tree node
889 * @root: radix tree root
890 * @index: index key
891 * @tag: tag index
892 *
893 * Clear the search tag (which must be < RADIX_TREE_MAX_TAGS)
894 * corresponding to @index in the radix tree. If
895 * this causes the leaf node to have no tags set then clear the tag in the
896 * next-to-leaf node, etc.
897 *
898 * Returns the address of the tagged item on success, else NULL. ie:
899 * has the same return value and semantics as radix_tree_lookup().
900 */
901 void *radix_tree_tag_clear(struct radix_tree_root *root,
902 unsigned long index, unsigned int tag)
903 {
904 /*
905 * The radix tree path needs to be one longer than the maximum path
906 * since the "list" is null terminated.
907 */
908 struct radix_tree_path path[RADIX_TREE_MAX_PATH + 1], *pathp = path;
909 struct radix_tree_path *punlock = path, *piter;
910 struct radix_tree_node *slot = NULL;
911 unsigned int height, shift, offset;
912
913 RADIX_TREE_CONTEXT(context, root);
914
915 slot = radix_optimistic_lock(context, index, tag,
916 radix_valid_tag_clear);
917 if (slot) {
918 height = slot->height;
919 shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
920 offset = (index >> shift) & RADIX_TREE_MAP_MASK;
921 pathp->offset = offset;
922 pathp->node = slot;
923 radix_path_init(context, pathp);
924 goto optimistic;
925 }
926
927 pathp->node = NULL;
928 radix_path_init(context, pathp);
929
930 height = root->height;
931 if (index > radix_tree_maxindex(height))
932 goto out;
933
934 shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
935 slot = radix_tree_indirect_to_ptr(root->rnode);
936
937 while (height > 0) {
938 if (slot == NULL)
939 goto out;
940
941 offset = (index >> shift) & RADIX_TREE_MAP_MASK;
942 pathp++;
943 pathp->offset = offset;
944 pathp->node = slot;
945 radix_path_lock(context, pathp, slot);
946
947 if (radix_valid_tag_clear(slot, offset, tag)) {
948 for (; punlock < pathp; punlock++)
949 radix_path_unlock(context, punlock);
950 }
951
952 optimistic:
953 slot = slot->slots[offset];
954 shift -= RADIX_TREE_MAP_SHIFT;
955 height--;
956 }
957
958 if (slot == NULL)
959 goto out;
960
961 for (piter = pathp; piter >= punlock; piter--) {
962 if (piter->node) {
963 if (!tag_get(piter->node, tag, piter->offset))
964 break;
965 tag_clear(piter->node, tag, piter->offset);
966 if (any_tag_set(piter->node, tag))
967 break;
968 } else {
969 if (root_tag_get(root, tag))
970 root_tag_clear(root, tag);
971 }
972 }
973
974 out:
975 for (; punlock < pathp; punlock++)
976 radix_path_unlock(context, punlock);
977 return slot;
978 }
979 EXPORT_SYMBOL(radix_tree_tag_clear);
980
981 #ifndef __KERNEL__ /* Only the test harness uses this at present */
982 /**
983 * radix_tree_tag_get - get a tag on a radix tree node
984 * @root: radix tree root
985 * @index: index key
986 * @tag: tag index (< RADIX_TREE_MAX_TAGS)
987 *
988 * Return values:
989 *
990 * 0: tag not present or not set
991 * 1: tag set
992 */
993 int radix_tree_tag_get(struct radix_tree_root *root,
994 unsigned long index, unsigned int tag)
995 {
996 unsigned int height, shift;
997 struct radix_tree_node *node;
998 int saw_unset_tag = 0;
999
1000 /* check the root's tag bit */
1001 if (!root_tag_get(root, tag))
1002 return 0;
1003
1004 node = rcu_dereference(root->rnode);
1005 if (node == NULL)
1006 return 0;
1007
1008 if (!radix_tree_is_indirect_ptr(node))
1009 return (index == 0);
1010 node = radix_tree_indirect_to_ptr(node);
1011
1012 height = node->height;
1013 if (index > radix_tree_maxindex(height))
1014 return 0;
1015
1016 shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
1017
1018 for ( ; ; ) {
1019 int offset;
1020
1021 if (node == NULL)
1022 return 0;
1023
1024 offset = (index >> shift) & RADIX_TREE_MAP_MASK;
1025
1026 /*
1027 * This is just a debug check. Later, we can bale as soon as
1028 * we see an unset tag.
1029 */
1030 if (!tag_get(node, tag, offset))
1031 saw_unset_tag = 1;
1032 if (height == 1) {
1033 int ret = tag_get(node, tag, offset);
1034
1035 BUG_ON(ret && saw_unset_tag);
1036 return !!ret;
1037 }
1038 node = rcu_dereference(node->slots[offset]);
1039 shift -= RADIX_TREE_MAP_SHIFT;
1040 height--;
1041 }
1042 }
1043 EXPORT_SYMBOL(radix_tree_tag_get);
1044 #endif
1045
1046 /**
1047 * radix_tree_next_hole - find the next hole (not-present entry)
1048 * @root: tree root
1049 * @index: index key
1050 * @max_scan: maximum range to search
1051 *
1052 * Search the set [index, min(index+max_scan-1, MAX_INDEX)] for the lowest
1053 * indexed hole.
1054 *
1055 * Returns: the index of the hole if found, otherwise returns an index
1056 * outside of the set specified (in which case 'return - index >= max_scan'
1057 * will be true).
1058 *
1059 * radix_tree_next_hole may be called under rcu_read_lock. However, like
1060 * radix_tree_gang_lookup, this will not atomically search a snapshot of the
1061 * tree at a single point in time. For example, if a hole is created at index
1062 * 5, then subsequently a hole is created at index 10, radix_tree_next_hole
1063 * covering both indexes may return 10 if called under rcu_read_lock.
1064 */
1065 unsigned long radix_tree_next_hole(struct radix_tree_root *root,
1066 unsigned long index, unsigned long max_scan)
1067 {
1068 unsigned long i;
1069
1070 for (i = 0; i < max_scan; i++) {
1071 if (!radix_tree_lookup(root, index))
1072 break;
1073 index++;
1074 if (index == 0)
1075 break;
1076 }
1077
1078 return index;
1079 }
1080 EXPORT_SYMBOL(radix_tree_next_hole);
1081
1082 static unsigned int
1083 __lookup(struct radix_tree_node *slot, void ***results, unsigned long index,
1084 unsigned int max_items, unsigned long *next_index)
1085 {
1086 unsigned int nr_found = 0;
1087 unsigned int shift, height;
1088 unsigned long i;
1089
1090 height = slot->height;
1091 if (height == 0)
1092 goto out;
1093 shift = (height-1) * RADIX_TREE_MAP_SHIFT;
1094
1095 for ( ; height > 1; height--) {
1096 i = (index >> shift) & RADIX_TREE_MAP_MASK;
1097 for (;;) {
1098 if (slot->slots[i] != NULL)
1099 break;
1100 index &= ~((1UL << shift) - 1);
1101 index += 1UL << shift;
1102 if (index == 0)
1103 goto out; /* 32-bit wraparound */
1104 i++;
1105 if (i == RADIX_TREE_MAP_SIZE)
1106 goto out;
1107 }
1108
1109 shift -= RADIX_TREE_MAP_SHIFT;
1110 slot = rcu_dereference(slot->slots[i]);
1111 if (slot == NULL)
1112 goto out;
1113 }
1114
1115 /* Bottom level: grab some items */
1116 for (i = index & RADIX_TREE_MAP_MASK; i < RADIX_TREE_MAP_SIZE; i++) {
1117 index++;
1118 if (slot->slots[i]) {
1119 results[nr_found++] = &(slot->slots[i]);
1120 if (nr_found == max_items)
1121 goto out;
1122 }
1123 }
1124 out:
1125 *next_index = index;
1126 return nr_found;
1127 }
1128
1129 /**
1130 * radix_tree_gang_lookup - perform multiple lookup on a radix tree
1131 * @root: radix tree root
1132 * @results: where the results of the lookup are placed
1133 * @first_index: start the lookup from this key
1134 * @max_items: place up to this many items at *results
1135 *
1136 * Performs an index-ascending scan of the tree for present items. Places
1137 * them at *@results and returns the number of items which were placed at
1138 * *@results.
1139 *
1140 * The implementation is naive.
1141 *
1142 * Like radix_tree_lookup, radix_tree_gang_lookup may be called under
1143 * rcu_read_lock. In this case, rather than the returned results being
1144 * an atomic snapshot of the tree at a single point in time, the semantics
1145 * of an RCU protected gang lookup are as though multiple radix_tree_lookups
1146 * have been issued in individual locks, and results stored in 'results'.
1147 */
1148 unsigned int
1149 radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
1150 unsigned long first_index, unsigned int max_items)
1151 {
1152 unsigned long max_index;
1153 struct radix_tree_node *node;
1154 unsigned long cur_index = first_index;
1155 unsigned int ret;
1156
1157 node = rcu_dereference(root->rnode);
1158 if (!node)
1159 return 0;
1160
1161 if (!radix_tree_is_indirect_ptr(node)) {
1162 if (first_index > 0)
1163 return 0;
1164 results[0] = node;
1165 return 1;
1166 }
1167 node = radix_tree_indirect_to_ptr(node);
1168
1169 max_index = radix_tree_maxindex(node->height);
1170
1171 ret = 0;
1172 while (ret < max_items) {
1173 unsigned int nr_found, slots_found, i;
1174 unsigned long next_index; /* Index of next search */
1175
1176 if (cur_index > max_index)
1177 break;
1178 slots_found = __lookup(node, (void ***)results + ret, cur_index,
1179 max_items - ret, &next_index);
1180 nr_found = 0;
1181 for (i = 0; i < slots_found; i++) {
1182 struct radix_tree_node *slot;
1183 slot = *(((void ***)results)[ret + i]);
1184 if (!slot)
1185 continue;
1186 results[ret + nr_found] = rcu_dereference(slot);
1187 nr_found++;
1188 }
1189 ret += nr_found;
1190 if (next_index == 0)
1191 break;
1192 cur_index = next_index;
1193 }
1194
1195 return ret;
1196 }
1197 EXPORT_SYMBOL(radix_tree_gang_lookup);
1198
1199 /**
1200 * radix_tree_gang_lookup_slot - perform multiple slot lookup on radix tree
1201 * @root: radix tree root
1202 * @results: where the results of the lookup are placed
1203 * @first_index: start the lookup from this key
1204 * @max_items: place up to this many items at *results
1205 *
1206 * Performs an index-ascending scan of the tree for present items. Places
1207 * their slots at *@results and returns the number of items which were
1208 * placed at *@results.
1209 *
1210 * The implementation is naive.
1211 *
1212 * Like radix_tree_gang_lookup as far as RCU and locking goes. Slots must
1213 * be dereferenced with radix_tree_deref_slot, and if using only RCU
1214 * protection, radix_tree_deref_slot may fail requiring a retry.
1215 */
1216 unsigned int
1217 radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results,
1218 unsigned long first_index, unsigned int max_items)
1219 {
1220 unsigned long max_index;
1221 struct radix_tree_node *node;
1222 unsigned long cur_index = first_index;
1223 unsigned int ret;
1224
1225 node = rcu_dereference(root->rnode);
1226 if (!node)
1227 return 0;
1228
1229 if (!radix_tree_is_indirect_ptr(node)) {
1230 if (first_index > 0)
1231 return 0;
1232 results[0] = (void **)&root->rnode;
1233 return 1;
1234 }
1235 node = radix_tree_indirect_to_ptr(node);
1236
1237 max_index = radix_tree_maxindex(node->height);
1238
1239 ret = 0;
1240 while (ret < max_items) {
1241 unsigned int slots_found;
1242 unsigned long next_index; /* Index of next search */
1243
1244 if (cur_index > max_index)
1245 break;
1246 slots_found = __lookup(node, results + ret, cur_index,
1247 max_items - ret, &next_index);
1248 ret += slots_found;
1249 if (next_index == 0)
1250 break;
1251 cur_index = next_index;
1252 }
1253
1254 return ret;
1255 }
1256 EXPORT_SYMBOL(radix_tree_gang_lookup_slot);
1257
1258 /*
1259 * FIXME: the two tag_get()s here should use find_next_bit() instead of
1260 * open-coding the search.
1261 */
1262 static unsigned int
1263 __lookup_tag(struct radix_tree_node *slot, void ***results, unsigned long index,
1264 unsigned int max_items, unsigned long *next_index, unsigned int tag)
1265 {
1266 unsigned int nr_found = 0;
1267 unsigned int shift, height;
1268
1269 height = slot->height;
1270 if (height == 0)
1271 goto out;
1272 shift = (height-1) * RADIX_TREE_MAP_SHIFT;
1273
1274 while (height > 0) {
1275 unsigned long i = (index >> shift) & RADIX_TREE_MAP_MASK ;
1276
1277 for (;;) {
1278 if (tag_get(slot, tag, i))
1279 break;
1280 index &= ~((1UL << shift) - 1);
1281 index += 1UL << shift;
1282 if (index == 0)
1283 goto out; /* 32-bit wraparound */
1284 i++;
1285 if (i == RADIX_TREE_MAP_SIZE)
1286 goto out;
1287 }
1288 height--;
1289 if (height == 0) { /* Bottom level: grab some items */
1290 unsigned long j = index & RADIX_TREE_MAP_MASK;
1291
1292 for ( ; j < RADIX_TREE_MAP_SIZE; j++) {
1293 struct radix_tree_node *node;
1294 index++;
1295 if (!tag_get(slot, tag, j))
1296 continue;
1297 node = slot->slots[j];
1298 /*
1299 * Even though the tag was found set, we need to
1300 * recheck that we have a non-NULL node, because
1301 * if this lookup is lockless, it may have been
1302 * subsequently deleted.
1303 *
1304 * Similar care must be taken in any place that
1305 * lookup ->slots[x] without a lock (ie. can't
1306 * rely on its value remaining the same).
1307 */
1308 if (slot->slots[j]) {
1309 results[nr_found++] = &slot->slots[j];
1310 if (nr_found == max_items)
1311 goto out;
1312 }
1313 }
1314 }
1315 shift -= RADIX_TREE_MAP_SHIFT;
1316 slot = rcu_dereference(slot->slots[i]);
1317 if (slot == NULL)
1318 break;
1319 }
1320 out:
1321 *next_index = index;
1322 return nr_found;
1323 }
1324
1325 /**
1326 * radix_tree_gang_lookup_tag - perform multiple lookup on a radix tree
1327 * based on a tag
1328 * @root: radix tree root
1329 * @results: where the results of the lookup are placed
1330 * @first_index: start the lookup from this key
1331 * @max_items: place up to this many items at *results
1332 * @tag: the tag index (< RADIX_TREE_MAX_TAGS)
1333 *
1334 * Performs an index-ascending scan of the tree for present items which
1335 * have the tag indexed by @tag set. Places the items at *@results and
1336 * returns the number of items which were placed at *@results.
1337 */
1338 unsigned int
1339 radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
1340 unsigned long first_index, unsigned int max_items,
1341 unsigned int tag)
1342 {
1343 struct radix_tree_node *node;
1344 unsigned long max_index;
1345 unsigned long cur_index = first_index;
1346 unsigned int ret;
1347
1348 /* check the root's tag bit */
1349 if (!root_tag_get(root, tag))
1350 return 0;
1351
1352 node = rcu_dereference(root->rnode);
1353 if (!node)
1354 return 0;
1355
1356 if (!radix_tree_is_indirect_ptr(node)) {
1357 if (first_index > 0)
1358 return 0;
1359 results[0] = node;
1360 return 1;
1361 }
1362 node = radix_tree_indirect_to_ptr(node);
1363
1364 max_index = radix_tree_maxindex(node->height);
1365
1366 ret = 0;
1367 while (ret < max_items) {
1368 unsigned int slots_found, nr_found, i;
1369 unsigned long next_index; /* Index of next search */
1370
1371 if (cur_index > max_index)
1372 break;
1373 slots_found = __lookup_tag(node, (void ***)results + ret,
1374 cur_index, max_items - ret, &next_index, tag);
1375 nr_found = 0;
1376 for (i = 0; i < slots_found; i++) {
1377 struct radix_tree_node *slot;
1378 slot = *((void ***)results)[ret + i];
1379 if (!slot)
1380 continue;
1381 results[ret + nr_found] = rcu_dereference(slot);
1382 nr_found++;
1383 }
1384 ret += nr_found;
1385 if (next_index == 0)
1386 break;
1387 cur_index = next_index;
1388 }
1389
1390 return ret;
1391 }
1392 EXPORT_SYMBOL(radix_tree_gang_lookup_tag);
1393
1394 /**
1395 * radix_tree_gang_lookup_tag_slot - perform multiple slot lookup on a
1396 * radix tree based on a tag
1397 * @root: radix tree root
1398 * @results: where the results of the lookup are placed
1399 * @first_index: start the lookup from this key
1400 * @max_items: place up to this many items at *results
1401 * @tag: the tag index (< RADIX_TREE_MAX_TAGS)
1402 *
1403 * Performs an index-ascending scan of the tree for present items which
1404 * have the tag indexed by @tag set. Places the slots at *@results and
1405 * returns the number of slots which were placed at *@results.
1406 */
1407 unsigned int
1408 radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results,
1409 unsigned long first_index, unsigned int max_items,
1410 unsigned int tag)
1411 {
1412 struct radix_tree_node *node;
1413 unsigned long max_index;
1414 unsigned long cur_index = first_index;
1415 unsigned int ret;
1416
1417 /* check the root's tag bit */
1418 if (!root_tag_get(root, tag))
1419 return 0;
1420
1421 node = rcu_dereference(root->rnode);
1422 if (!node)
1423 return 0;
1424
1425 if (!radix_tree_is_indirect_ptr(node)) {
1426 if (first_index > 0)
1427 return 0;
1428 results[0] = (void **)&root->rnode;
1429 return 1;
1430 }
1431 node = radix_tree_indirect_to_ptr(node);
1432
1433 max_index = radix_tree_maxindex(node->height);
1434
1435 ret = 0;
1436 while (ret < max_items) {
1437 unsigned int slots_found;
1438 unsigned long next_index; /* Index of next search */
1439
1440 if (cur_index > max_index)
1441 break;
1442 slots_found = __lookup_tag(node, results + ret,
1443 cur_index, max_items - ret, &next_index, tag);
1444 ret += slots_found;
1445 if (next_index == 0)
1446 break;
1447 cur_index = next_index;
1448 }
1449
1450 return ret;
1451 }
1452 EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot);
1453
1454
1455 /**
1456 * radix_tree_shrink - shrink height of a radix tree to minimal
1457 * @root radix tree root
1458 */
1459 static inline void radix_tree_shrink(struct radix_tree_root *root)
1460 {
1461 /* try to shrink tree height */
1462 while (root->height > 0) {
1463 struct radix_tree_node *to_free = root->rnode;
1464 void *newptr;
1465 int tag;
1466
1467 BUG_ON(!radix_tree_is_indirect_ptr(to_free));
1468 to_free = radix_tree_indirect_to_ptr(to_free);
1469
1470 /*
1471 * The candidate node has more than one child, or its child
1472 * is not at the leftmost slot, we cannot shrink.
1473 */
1474 if (to_free->count != 1)
1475 break;
1476 if (!to_free->slots[0])
1477 break;
1478
1479 /*
1480 * We don't need rcu_assign_pointer(), since we are simply
1481 * moving the node from one part of the tree to another. If
1482 * it was safe to dereference the old pointer to it
1483 * (to_free->slots[0]), it will be safe to dereference the new
1484 * one (root->rnode).
1485 */
1486 newptr = to_free->slots[0];
1487 if (root->height > 1)
1488 newptr = radix_tree_ptr_to_indirect(newptr);
1489 root->rnode = newptr;
1490 root->height--;
1491 /* must only free zeroed nodes into the slab */
1492 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
1493 tag_clear(to_free, tag, 0);
1494 to_free->slots[0] = NULL;
1495 to_free->count = 0;
1496 }
1497 }
1498
1499 static
1500 int radix_valid_delete(struct radix_tree_node *node, int offset, int tag)
1501 {
1502 /*
1503 * we need to check for > 2, because nodes with a single child
1504 * can still be deleted, see radix_tree_shrink().
1505 */
1506 int unlock = (node->count > 2);
1507
1508 if (!unlock)
1509 return unlock;
1510
1511 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
1512 if (!radix_valid_tag_clear(node, offset, tag)) {
1513 unlock = 0;
1514 break;
1515 }
1516 }
1517
1518 return unlock;
1519 }
1520
1521 /**
1522 * radix_tree_delete - delete an item from a radix tree
1523 * @root: radix tree root
1524 * @index: index key
1525 *
1526 * Remove the item at @index from the radix tree rooted at @root.
1527 *
1528 * Returns the address of the deleted item, or NULL if it was not present.
1529 */
1530 void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
1531 {
1532 /*
1533 * The radix tree path needs to be one longer than the maximum path
1534 * since the "list" is null terminated.
1535 */
1536 struct radix_tree_path path[RADIX_TREE_MAX_PATH + 1], *pathp = path;
1537 struct radix_tree_path *punlock = path, *piter;
1538 struct radix_tree_node *slot = NULL;
1539 unsigned int height, shift;
1540 int tag;
1541 int offset;
1542 RADIX_TREE_CONTEXT(context, root);
1543
1544 slot = radix_optimistic_lock(context, index, 0, radix_valid_delete);
1545 if (slot) {
1546 height = slot->height;
1547 shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
1548 offset = (index >> shift) & RADIX_TREE_MAP_MASK;
1549 pathp->offset = offset;
1550 pathp->node = slot;
1551 radix_path_init(context, pathp);
1552 goto optimistic;
1553 }
1554
1555 pathp->node = NULL;
1556 radix_path_init(context, pathp);
1557
1558 height = root->height;
1559 if (index > radix_tree_maxindex(height))
1560 goto out;
1561
1562 slot = root->rnode;
1563 if (height == 0) {
1564 root_tag_clear_all(root);
1565 root->rnode = NULL;
1566 goto out;
1567 }
1568 slot = radix_tree_indirect_to_ptr(slot);
1569
1570 shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
1571
1572 do {
1573 if (slot == NULL)
1574 goto out;
1575
1576 pathp++;
1577 offset = (index >> shift) & RADIX_TREE_MAP_MASK;
1578 pathp->offset = offset;
1579 pathp->node = slot;
1580 radix_path_lock(context, pathp, slot);
1581
1582 if (radix_valid_delete(slot, offset, 0)) {
1583 for (; punlock < pathp; punlock++)
1584 radix_path_unlock(context, punlock);
1585 }
1586
1587 optimistic:
1588 slot = slot->slots[offset];
1589 shift -= RADIX_TREE_MAP_SHIFT;
1590 height--;
1591 } while (height > 0);
1592
1593 if (slot == NULL)
1594 goto out;
1595
1596 /*
1597 * Clear all tags associated with the just-deleted item
1598 */
1599 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
1600 for (piter = pathp; piter >= punlock; piter--) {
1601 if (piter->node) {
1602 if (!tag_get(piter->node, tag, piter->offset))
1603 break;
1604 tag_clear(piter->node, tag, piter->offset);
1605 if (any_tag_set(piter->node, tag))
1606 break;
1607 } else {
1608 if (root_tag_get(root, tag))
1609 root_tag_clear(root, tag);
1610 }
1611 }
1612 }
1613
1614 /* Now unhook the nodes we do not need anymore */
1615 for (piter = pathp; piter >= punlock && piter->node; piter--) {
1616 piter->node->slots[piter->offset] = NULL;
1617 piter->node->count--;
1618
1619 if (piter->node->count) {
1620 if (piter->node ==
1621 radix_tree_indirect_to_ptr(root->rnode))
1622 radix_tree_shrink(root);
1623 goto out;
1624 }
1625 }
1626
1627 BUG_ON(piter->node);
1628
1629 root_tag_clear_all(root);
1630 root->height = 0;
1631 root->rnode = NULL;
1632
1633 out:
1634 for (; punlock <= pathp; punlock++) {
1635 radix_path_unlock(context, punlock);
1636 if (punlock->node && punlock->node->count == 0)
1637 radix_tree_node_free(punlock->node);
1638 }
1639 return slot;
1640 }
1641 EXPORT_SYMBOL(radix_tree_delete);
1642
1643 /**
1644 * radix_tree_tagged - test whether any items in the tree are tagged
1645 * @root: radix tree root
1646 * @tag: tag to test
1647 */
1648 int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag)
1649 {
1650 return root_tag_get(root, tag);
1651 }
1652 EXPORT_SYMBOL(radix_tree_tagged);
1653
1654 static void
1655 radix_tree_node_ctor(struct kmem_cache *cachep, void *node)
1656 {
1657 memset(node, 0, sizeof(struct radix_tree_node));
1658 }
1659
1660 static __init unsigned long __maxindex(unsigned int height)
1661 {
1662 unsigned int width = height * RADIX_TREE_MAP_SHIFT;
1663 int shift = RADIX_TREE_INDEX_BITS - width;
1664
1665 if (shift < 0)
1666 return ~0UL;
1667 if (shift >= BITS_PER_LONG)
1668 return 0UL;
1669 return ~0UL >> shift;
1670 }
1671
1672 static __init void radix_tree_init_maxindex(void)
1673 {
1674 unsigned int i;
1675
1676 for (i = 0; i < ARRAY_SIZE(height_to_maxindex); i++)
1677 height_to_maxindex[i] = __maxindex(i);
1678 }
1679
1680 static int radix_tree_callback(struct notifier_block *nfb,
1681 unsigned long action,
1682 void *hcpu)
1683 {
1684 int cpu = (long)hcpu;
1685 struct radix_tree_preload *rtp;
1686
1687 /* Free per-cpu pool of perloaded nodes */
1688 if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
1689 rtp = &per_cpu(radix_tree_preloads, cpu);
1690 while (rtp->nr) {
1691 kmem_cache_free(radix_tree_node_cachep,
1692 rtp->nodes[rtp->nr-1]);
1693 rtp->nodes[rtp->nr-1] = NULL;
1694 rtp->nr--;
1695 }
1696 }
1697 return NOTIFY_OK;
1698 }
1699
1700 void __init radix_tree_init(void)
1701 {
1702 radix_tree_node_cachep = kmem_cache_create("radix_tree_node",
1703 sizeof(struct radix_tree_node), 0,
1704 SLAB_PANIC, radix_tree_node_ctor);
1705 radix_tree_init_maxindex();
1706 hotcpu_notifier(radix_tree_callback, 0);
1707 }
1708
|
This page was automatically generated by the
LXR engine.
|