1 /*
2 * fs/dcache.c
3 *
4 * Complete reimplementation
5 * (C) 1997 Thomas Schoebel-Theuer,
6 * with heavy changes by Linus Torvalds
7 */
8
9 /*
10 * Notes on the allocation strategy:
11 *
12 * The dcache is a master of the icache - whenever a dcache entry
13 * exists, the inode will always exist. "iput()" is done either when
14 * the dcache entry is deleted or garbage collected.
15 */
16
17 #include <linux/syscalls.h>
18 #include <linux/string.h>
19 #include <linux/mm.h>
20 #include <linux/fs.h>
21 #include <linux/fsnotify.h>
22 #include <linux/slab.h>
23 #include <linux/init.h>
24 #include <linux/hash.h>
25 #include <linux/cache.h>
26 #include <linux/module.h>
27 #include <linux/mount.h>
28 #include <linux/file.h>
29 #include <asm/uaccess.h>
30 #include <linux/security.h>
31 #include <linux/seqlock.h>
32 #include <linux/swap.h>
33 #include <linux/bootmem.h>
34 #include "internal.h"
35
36
37 int sysctl_vfs_cache_pressure __read_mostly = 100;
38 EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
39
40 __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lock);
41 __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
42
43 EXPORT_SYMBOL(dcache_lock);
44
45 static struct kmem_cache *dentry_cache __read_mostly;
46
47 #define DNAME_INLINE_LEN (sizeof(struct dentry)-offsetof(struct dentry,d_iname))
48
49 /*
50 * This is the single most critical data structure when it comes
51 * to the dcache: the hashtable for lookups. Somebody should try
52 * to make this good - I've just made it work.
53 *
54 * This hash-function tries to avoid losing too many bits of hash
55 * information, yet avoid using a prime hash-size or similar.
56 */
57 #define D_HASHBITS d_hash_shift
58 #define D_HASHMASK d_hash_mask
59
60 static unsigned int d_hash_mask __read_mostly;
61 static unsigned int d_hash_shift __read_mostly;
62 static struct hlist_head *dentry_hashtable __read_mostly;
63 static LIST_HEAD(dentry_unused);
64
65 /* Statistics gathering. */
66 struct dentry_stat_t dentry_stat = {
67 .age_limit = 45,
68 };
69
70 static void __d_free(struct dentry *dentry)
71 {
72 if (dname_external(dentry))
73 kfree(dentry->d_name.name);
74 kmem_cache_free(dentry_cache, dentry);
75 }
76
77 static void d_callback(struct rcu_head *head)
78 {
79 struct dentry * dentry = container_of(head, struct dentry, d_u.d_rcu);
80 __d_free(dentry);
81 }
82
83 /*
84 * no dcache_lock, please. The caller must decrement dentry_stat.nr_dentry
85 * inside dcache_lock.
86 */
87 static void d_free(struct dentry *dentry)
88 {
89 if (dentry->d_op && dentry->d_op->d_release)
90 dentry->d_op->d_release(dentry);
91 /* if dentry was never inserted into hash, immediate free is OK */
92 if (hlist_unhashed(&dentry->d_hash))
93 __d_free(dentry);
94 else
95 call_rcu(&dentry->d_u.d_rcu, d_callback);
96 }
97
98 static void dentry_lru_remove(struct dentry *dentry)
99 {
100 if (!list_empty(&dentry->d_lru)) {
101 list_del_init(&dentry->d_lru);
102 dentry_stat.nr_unused--;
103 }
104 }
105
106 /*
107 * Release the dentry's inode, using the filesystem
108 * d_iput() operation if defined.
109 * Called with dcache_lock and per dentry lock held, drops both.
110 */
111 static void dentry_iput(struct dentry * dentry)
112 {
113 struct inode *inode = dentry->d_inode;
114 if (inode) {
115 dentry->d_inode = NULL;
116 list_del_init(&dentry->d_alias);
117 spin_unlock(&dentry->d_lock);
118 spin_unlock(&dcache_lock);
119 if (!inode->i_nlink)
120 fsnotify_inoderemove(inode);
121 if (dentry->d_op && dentry->d_op->d_iput)
122 dentry->d_op->d_iput(dentry, inode);
123 else
124 iput(inode);
125 } else {
126 spin_unlock(&dentry->d_lock);
127 spin_unlock(&dcache_lock);
128 }
129 }
130
131 /**
132 * d_kill - kill dentry and return parent
133 * @dentry: dentry to kill
134 *
135 * Called with dcache_lock and d_lock, releases both. The dentry must
136 * already be unhashed and removed from the LRU.
137 *
138 * If this is the root of the dentry tree, return NULL.
139 */
140 static struct dentry *d_kill(struct dentry *dentry)
141 {
142 struct dentry *parent;
143
144 list_del(&dentry->d_u.d_child);
145 dentry_stat.nr_dentry--; /* For d_free, below */
146 /*drops the locks, at that point nobody can reach this dentry */
147 dentry_iput(dentry);
148 parent = dentry->d_parent;
149 d_free(dentry);
150 return dentry == parent ? NULL : parent;
151 }
152
153 /*
154 * This is dput
155 *
156 * This is complicated by the fact that we do not want to put
157 * dentries that are no longer on any hash chain on the unused
158 * list: we'd much rather just get rid of them immediately.
159 *
160 * However, that implies that we have to traverse the dentry
161 * tree upwards to the parents which might _also_ now be
162 * scheduled for deletion (it may have been only waiting for
163 * its last child to go away).
164 *
165 * This tail recursion is done by hand as we don't want to depend
166 * on the compiler to always get this right (gcc generally doesn't).
167 * Real recursion would eat up our stack space.
168 */
169
170 /*
171 * dput - release a dentry
172 * @dentry: dentry to release
173 *
174 * Release a dentry. This will drop the usage count and if appropriate
175 * call the dentry unlink method as well as removing it from the queues and
176 * releasing its resources. If the parent dentries were scheduled for release
177 * they too may now get deleted.
178 *
179 * no dcache lock, please.
180 */
181
182 void dput(struct dentry *dentry)
183 {
184 if (!dentry)
185 return;
186
187 repeat:
188 if (atomic_read(&dentry->d_count) == 1)
189 might_sleep();
190 if (!atomic_dec_and_lock(&dentry->d_count, &dcache_lock))
191 return;
192
193 spin_lock(&dentry->d_lock);
194 if (atomic_read(&dentry->d_count)) {
195 spin_unlock(&dentry->d_lock);
196 spin_unlock(&dcache_lock);
197 return;
198 }
199
200 /*
201 * AV: ->d_delete() is _NOT_ allowed to block now.
202 */
203 if (dentry->d_op && dentry->d_op->d_delete) {
204 if (dentry->d_op->d_delete(dentry))
205 goto unhash_it;
206 }
207 /* Unreachable? Get rid of it */
208 if (d_unhashed(dentry))
209 goto kill_it;
210 if (list_empty(&dentry->d_lru)) {
211 dentry->d_flags |= DCACHE_REFERENCED;
212 list_add(&dentry->d_lru, &dentry_unused);
213 dentry_stat.nr_unused++;
214 }
215 spin_unlock(&dentry->d_lock);
216 spin_unlock(&dcache_lock);
217 return;
218
219 unhash_it:
220 __d_drop(dentry);
221 kill_it:
222 dentry_lru_remove(dentry);
223 dentry = d_kill(dentry);
224 if (dentry)
225 goto repeat;
226 }
227
228 /**
229 * d_invalidate - invalidate a dentry
230 * @dentry: dentry to invalidate
231 *
232 * Try to invalidate the dentry if it turns out to be
233 * possible. If there are other dentries that can be
234 * reached through this one we can't delete it and we
235 * return -EBUSY. On success we return 0.
236 *
237 * no dcache lock.
238 */
239
240 int d_invalidate(struct dentry * dentry)
241 {
242 /*
243 * If it's already been dropped, return OK.
244 */
245 spin_lock(&dcache_lock);
246 if (d_unhashed(dentry)) {
247 spin_unlock(&dcache_lock);
248 return 0;
249 }
250 /*
251 * Check whether to do a partial shrink_dcache
252 * to get rid of unused child entries.
253 */
254 if (!list_empty(&dentry->d_subdirs)) {
255 spin_unlock(&dcache_lock);
256 shrink_dcache_parent(dentry);
257 spin_lock(&dcache_lock);
258 }
259
260 /*
261 * Somebody else still using it?
262 *
263 * If it's a directory, we can't drop it
264 * for fear of somebody re-populating it
265 * with children (even though dropping it
266 * would make it unreachable from the root,
267 * we might still populate it if it was a
268 * working directory or similar).
269 */
270 spin_lock(&dentry->d_lock);
271 if (atomic_read(&dentry->d_count) > 1) {
272 if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode)) {
273 spin_unlock(&dentry->d_lock);
274 spin_unlock(&dcache_lock);
275 return -EBUSY;
276 }
277 }
278
279 __d_drop(dentry);
280 spin_unlock(&dentry->d_lock);
281 spin_unlock(&dcache_lock);
282 return 0;
283 }
284
285 /* This should be called _only_ with dcache_lock held */
286
287 static inline struct dentry * __dget_locked(struct dentry *dentry)
288 {
289 atomic_inc(&dentry->d_count);
290 dentry_lru_remove(dentry);
291 return dentry;
292 }
293
294 struct dentry * dget_locked(struct dentry *dentry)
295 {
296 return __dget_locked(dentry);
297 }
298
299 /**
300 * d_find_alias - grab a hashed alias of inode
301 * @inode: inode in question
302 * @want_discon: flag, used by d_splice_alias, to request
303 * that only a DISCONNECTED alias be returned.
304 *
305 * If inode has a hashed alias, or is a directory and has any alias,
306 * acquire the reference to alias and return it. Otherwise return NULL.
307 * Notice that if inode is a directory there can be only one alias and
308 * it can be unhashed only if it has no children, or if it is the root
309 * of a filesystem.
310 *
311 * If the inode has an IS_ROOT, DCACHE_DISCONNECTED alias, then prefer
312 * any other hashed alias over that one unless @want_discon is set,
313 * in which case only return an IS_ROOT, DCACHE_DISCONNECTED alias.
314 */
315
316 static struct dentry * __d_find_alias(struct inode *inode, int want_discon)
317 {
318 struct list_head *head, *next, *tmp;
319 struct dentry *alias, *discon_alias=NULL;
320
321 head = &inode->i_dentry;
322 next = inode->i_dentry.next;
323 while (next != head) {
324 tmp = next;
325 next = tmp->next;
326 prefetch(next);
327 alias = list_entry(tmp, struct dentry, d_alias);
328 if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
329 if (IS_ROOT(alias) &&
330 (alias->d_flags & DCACHE_DISCONNECTED))
331 discon_alias = alias;
332 else if (!want_discon) {
333 __dget_locked(alias);
334 return alias;
335 }
336 }
337 }
338 if (discon_alias)
339 __dget_locked(discon_alias);
340 return discon_alias;
341 }
342
343 struct dentry * d_find_alias(struct inode *inode)
344 {
345 struct dentry *de = NULL;
346
347 if (!list_empty(&inode->i_dentry)) {
348 spin_lock(&dcache_lock);
349 de = __d_find_alias(inode, 0);
350 spin_unlock(&dcache_lock);
351 }
352 return de;
353 }
354
355 /*
356 * Try to kill dentries associated with this inode.
357 * WARNING: you must own a reference to inode.
358 */
359 void d_prune_aliases(struct inode *inode)
360 {
361 struct dentry *dentry;
362 restart:
363 spin_lock(&dcache_lock);
364 list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
365 spin_lock(&dentry->d_lock);
366 if (!atomic_read(&dentry->d_count)) {
367 __dget_locked(dentry);
368 __d_drop(dentry);
369 spin_unlock(&dentry->d_lock);
370 spin_unlock(&dcache_lock);
371 dput(dentry);
372 goto restart;
373 }
374 spin_unlock(&dentry->d_lock);
375 }
376 spin_unlock(&dcache_lock);
377 }
378
379 /*
380 * Throw away a dentry - free the inode, dput the parent. This requires that
381 * the LRU list has already been removed.
382 *
383 * Try to prune ancestors as well. This is necessary to prevent
384 * quadratic behavior of shrink_dcache_parent(), but is also expected
385 * to be beneficial in reducing dentry cache fragmentation.
386 *
387 * Called with dcache_lock, drops it and then regains.
388 * Called with dentry->d_lock held, drops it.
389 */
390 static void prune_one_dentry(struct dentry * dentry)
391 {
392 __d_drop(dentry);
393 dentry = d_kill(dentry);
394
395 /*
396 * Prune ancestors. Locking is simpler than in dput(),
397 * because dcache_lock needs to be taken anyway.
398 */
399 spin_lock(&dcache_lock);
400 while (dentry) {
401 if (!atomic_dec_and_lock(&dentry->d_count, &dentry->d_lock))
402 return;
403
404 if (dentry->d_op && dentry->d_op->d_delete)
405 dentry->d_op->d_delete(dentry);
406 dentry_lru_remove(dentry);
407 __d_drop(dentry);
408 dentry = d_kill(dentry);
409 spin_lock(&dcache_lock);
410 }
411 }
412
413 /**
414 * prune_dcache - shrink the dcache
415 * @count: number of entries to try and free
416 * @sb: if given, ignore dentries for other superblocks
417 * which are being unmounted.
418 *
419 * Shrink the dcache. This is done when we need
420 * more memory, or simply when we need to unmount
421 * something (at which point we need to unuse
422 * all dentries).
423 *
424 * This function may fail to free any resources if
425 * all the dentries are in use.
426 */
427
428 static void prune_dcache(int count, struct super_block *sb)
429 {
430 spin_lock(&dcache_lock);
431 for (; count ; count--) {
432 struct dentry *dentry;
433 struct list_head *tmp;
434 struct rw_semaphore *s_umount;
435
436 cond_resched_lock(&dcache_lock);
437
438 tmp = dentry_unused.prev;
439 if (sb) {
440 /* Try to find a dentry for this sb, but don't try
441 * too hard, if they aren't near the tail they will
442 * be moved down again soon
443 */
444 int skip = count;
445 while (skip && tmp != &dentry_unused &&
446 list_entry(tmp, struct dentry, d_lru)->d_sb != sb) {
447 skip--;
448 tmp = tmp->prev;
449 }
450 }
451 if (tmp == &dentry_unused)
452 break;
453 list_del_init(tmp);
454 prefetch(dentry_unused.prev);
455 dentry_stat.nr_unused--;
456 dentry = list_entry(tmp, struct dentry, d_lru);
457
458 spin_lock(&dentry->d_lock);
459 /*
460 * We found an inuse dentry which was not removed from
461 * dentry_unused because of laziness during lookup. Do not free
462 * it - just keep it off the dentry_unused list.
463 */
464 if (atomic_read(&dentry->d_count)) {
465 spin_unlock(&dentry->d_lock);
466 continue;
467 }
468 /* If the dentry was recently referenced, don't free it. */
469 if (dentry->d_flags & DCACHE_REFERENCED) {
470 dentry->d_flags &= ~DCACHE_REFERENCED;
471 list_add(&dentry->d_lru, &dentry_unused);
472 dentry_stat.nr_unused++;
473 spin_unlock(&dentry->d_lock);
474 continue;
475 }
476 /*
477 * If the dentry is not DCACHED_REFERENCED, it is time
478 * to remove it from the dcache, provided the super block is
479 * NULL (which means we are trying to reclaim memory)
480 * or this dentry belongs to the same super block that
481 * we want to shrink.
482 */
483 /*
484 * If this dentry is for "my" filesystem, then I can prune it
485 * without taking the s_umount lock (I already hold it).
486 */
487 if (sb && dentry->d_sb == sb) {
488 prune_one_dentry(dentry);
489 continue;
490 }
491 /*
492 * ...otherwise we need to be sure this filesystem isn't being
493 * unmounted, otherwise we could race with
494 * generic_shutdown_super(), and end up holding a reference to
495 * an inode while the filesystem is unmounted.
496 * So we try to get s_umount, and make sure s_root isn't NULL.
497 * (Take a local copy of s_umount to avoid a use-after-free of
498 * `dentry').
499 */
500 s_umount = &dentry->d_sb->s_umount;
501 if (down_read_trylock(s_umount)) {
502 if (dentry->d_sb->s_root != NULL) {
503 prune_one_dentry(dentry);
504 up_read(s_umount);
505 continue;
506 }
507 up_read(s_umount);
508 }
509 spin_unlock(&dentry->d_lock);
510 /*
511 * Insert dentry at the head of the list as inserting at the
512 * tail leads to a cycle.
513 */
514 list_add(&dentry->d_lru, &dentry_unused);
515 dentry_stat.nr_unused++;
516 }
517 spin_unlock(&dcache_lock);
518 }
519
520 /*
521 * Shrink the dcache for the specified super block.
522 * This allows us to unmount a device without disturbing
523 * the dcache for the other devices.
524 *
525 * This implementation makes just two traversals of the
526 * unused list. On the first pass we move the selected
527 * dentries to the most recent end, and on the second
528 * pass we free them. The second pass must restart after
529 * each dput(), but since the target dentries are all at
530 * the end, it's really just a single traversal.
531 */
532
533 /**
534 * shrink_dcache_sb - shrink dcache for a superblock
535 * @sb: superblock
536 *
537 * Shrink the dcache for the specified super block. This
538 * is used to free the dcache before unmounting a file
539 * system
540 */
541
542 void shrink_dcache_sb(struct super_block * sb)
543 {
544 struct list_head *tmp, *next;
545 struct dentry *dentry;
546
547 /*
548 * Pass one ... move the dentries for the specified
549 * superblock to the most recent end of the unused list.
550 */
551 spin_lock(&dcache_lock);
552 list_for_each_prev_safe(tmp, next, &dentry_unused) {
553 dentry = list_entry(tmp, struct dentry, d_lru);
554 if (dentry->d_sb != sb)
555 continue;
556 list_move_tail(tmp, &dentry_unused);
557 }
558
559 /*
560 * Pass two ... free the dentries for this superblock.
561 */
562 repeat:
563 list_for_each_prev_safe(tmp, next, &dentry_unused) {
564 dentry = list_entry(tmp, struct dentry, d_lru);
565 if (dentry->d_sb != sb)
566 continue;
567 dentry_stat.nr_unused--;
568 list_del_init(tmp);
569 spin_lock(&dentry->d_lock);
570 if (atomic_read(&dentry->d_count)) {
571 spin_unlock(&dentry->d_lock);
572 continue;
573 }
574 prune_one_dentry(dentry);
575 cond_resched_lock(&dcache_lock);
576 goto repeat;
577 }
578 spin_unlock(&dcache_lock);
579 }
580
581 /*
582 * destroy a single subtree of dentries for unmount
583 * - see the comments on shrink_dcache_for_umount() for a description of the
584 * locking
585 */
586 static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
587 {
588 struct dentry *parent;
589 unsigned detached = 0;
590
591 BUG_ON(!IS_ROOT(dentry));
592
593 /* detach this root from the system */
594 spin_lock(&dcache_lock);
595 dentry_lru_remove(dentry);
596 __d_drop(dentry);
597 spin_unlock(&dcache_lock);
598
599 for (;;) {
600 /* descend to the first leaf in the current subtree */
601 while (!list_empty(&dentry->d_subdirs)) {
602 struct dentry *loop;
603
604 /* this is a branch with children - detach all of them
605 * from the system in one go */
606 spin_lock(&dcache_lock);
607 list_for_each_entry(loop, &dentry->d_subdirs,
608 d_u.d_child) {
609 dentry_lru_remove(loop);
610 __d_drop(loop);
611 cond_resched_lock(&dcache_lock);
612 }
613 spin_unlock(&dcache_lock);
614
615 /* move to the first child */
616 dentry = list_entry(dentry->d_subdirs.next,
617 struct dentry, d_u.d_child);
618 }
619
620 /* consume the dentries from this leaf up through its parents
621 * until we find one with children or run out altogether */
622 do {
623 struct inode *inode;
624
625 if (atomic_read(&dentry->d_count) != 0) {
626 printk(KERN_ERR
627 "BUG: Dentry %p{i=%lx,n=%s}"
628 " still in use (%d)"
629 " [unmount of %s %s]\n",
630 dentry,
631 dentry->d_inode ?
632 dentry->d_inode->i_ino : 0UL,
633 dentry->d_name.name,
634 atomic_read(&dentry->d_count),
635 dentry->d_sb->s_type->name,
636 dentry->d_sb->s_id);
637 BUG();
638 }
639
640 parent = dentry->d_parent;
641 if (parent == dentry)
642 parent = NULL;
643 else
644 atomic_dec(&parent->d_count);
645
646 list_del(&dentry->d_u.d_child);
647 detached++;
648
649 inode = dentry->d_inode;
650 if (inode) {
651 dentry->d_inode = NULL;
652 list_del_init(&dentry->d_alias);
653 if (dentry->d_op && dentry->d_op->d_iput)
654 dentry->d_op->d_iput(dentry, inode);
655 else
656 iput(inode);
657 }
658
659 d_free(dentry);
660
661 /* finished when we fall off the top of the tree,
662 * otherwise we ascend to the parent and move to the
663 * next sibling if there is one */
664 if (!parent)
665 goto out;
666
667 dentry = parent;
668
669 } while (list_empty(&dentry->d_subdirs));
670
671 dentry = list_entry(dentry->d_subdirs.next,
672 struct dentry, d_u.d_child);
673 }
674 out:
675 /* several dentries were freed, need to correct nr_dentry */
676 spin_lock(&dcache_lock);
677 dentry_stat.nr_dentry -= detached;
678 spin_unlock(&dcache_lock);
679 }
680
681 /*
682 * destroy the dentries attached to a superblock on unmounting
683 * - we don't need to use dentry->d_lock, and only need dcache_lock when
684 * removing the dentry from the system lists and hashes because:
685 * - the superblock is detached from all mountings and open files, so the
686 * dentry trees will not be rearranged by the VFS
687 * - s_umount is write-locked, so the memory pressure shrinker will ignore
688 * any dentries belonging to this superblock that it comes across
689 * - the filesystem itself is no longer permitted to rearrange the dentries
690 * in this superblock
691 */
692 void shrink_dcache_for_umount(struct super_block *sb)
693 {
694 struct dentry *dentry;
695
696 // -rt: this might succeed there ...
697 // if (down_read_trylock(&sb->s_umount))
698 // BUG();
699
700 dentry = sb->s_root;
701 sb->s_root = NULL;
702 atomic_dec(&dentry->d_count);
703 shrink_dcache_for_umount_subtree(dentry);
704
705 while (!hlist_empty(&sb->s_anon)) {
706 dentry = hlist_entry(sb->s_anon.first, struct dentry, d_hash);
707 shrink_dcache_for_umount_subtree(dentry);
708 }
709 }
710
711 /*
712 * Search for at least 1 mount point in the dentry's subdirs.
713 * We descend to the next level whenever the d_subdirs
714 * list is non-empty and continue searching.
715 */
716
717 /**
718 * have_submounts - check for mounts over a dentry
719 * @parent: dentry to check.
720 *
721 * Return true if the parent or its subdirectories contain
722 * a mount point
723 */
724
725 int have_submounts(struct dentry *parent)
726 {
727 struct dentry *this_parent = parent;
728 struct list_head *next;
729
730 spin_lock(&dcache_lock);
731 if (d_mountpoint(parent))
732 goto positive;
733 repeat:
734 next = this_parent->d_subdirs.next;
735 resume:
736 while (next != &this_parent->d_subdirs) {
737 struct list_head *tmp = next;
738 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
739 next = tmp->next;
740 /* Have we found a mount point ? */
741 if (d_mountpoint(dentry))
742 goto positive;
743 if (!list_empty(&dentry->d_subdirs)) {
744 this_parent = dentry;
745 goto repeat;
746 }
747 }
748 /*
749 * All done at this level ... ascend and resume the search.
750 */
751 if (this_parent != parent) {
752 next = this_parent->d_u.d_child.next;
753 this_parent = this_parent->d_parent;
754 goto resume;
755 }
756 spin_unlock(&dcache_lock);
757 return 0; /* No mount points found in tree */
758 positive:
759 spin_unlock(&dcache_lock);
760 return 1;
761 }
762
763 /*
764 * Search the dentry child list for the specified parent,
765 * and move any unused dentries to the end of the unused
766 * list for prune_dcache(). We descend to the next level
767 * whenever the d_subdirs list is non-empty and continue
768 * searching.
769 *
770 * It returns zero iff there are no unused children,
771 * otherwise it returns the number of children moved to
772 * the end of the unused list. This may not be the total
773 * number of unused children, because select_parent can
774 * drop the lock and return early due to latency
775 * constraints.
776 */
777 static int select_parent(struct dentry * parent)
778 {
779 struct dentry *this_parent = parent;
780 struct list_head *next;
781 int found = 0;
782
783 spin_lock(&dcache_lock);
784 repeat:
785 next = this_parent->d_subdirs.next;
786 resume:
787 while (next != &this_parent->d_subdirs) {
788 struct list_head *tmp = next;
789 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
790 next = tmp->next;
791
792 dentry_lru_remove(dentry);
793 /*
794 * move only zero ref count dentries to the end
795 * of the unused list for prune_dcache
796 */
797 if (!atomic_read(&dentry->d_count)) {
798 list_add_tail(&dentry->d_lru, &dentry_unused);
799 dentry_stat.nr_unused++;
800 found++;
801 }
802
803 /*
804 * We can return to the caller if we have found some (this
805 * ensures forward progress). We'll be coming back to find
806 * the rest.
807 */
808 if (found && need_resched())
809 goto out;
810
811 /*
812 * Descend a level if the d_subdirs list is non-empty.
813 */
814 if (!list_empty(&dentry->d_subdirs)) {
815 this_parent = dentry;
816 goto repeat;
817 }
818 }
819 /*
820 * All done at this level ... ascend and resume the search.
821 */
822 if (this_parent != parent) {
823 next = this_parent->d_u.d_child.next;
824 this_parent = this_parent->d_parent;
825 goto resume;
826 }
827 out:
828 spin_unlock(&dcache_lock);
829 return found;
830 }
831
832 /**
833 * shrink_dcache_parent - prune dcache
834 * @parent: parent of entries to prune
835 *
836 * Prune the dcache to remove unused children of the parent dentry.
837 */
838
839 void shrink_dcache_parent(struct dentry * parent)
840 {
841 int found;
842
843 while ((found = select_parent(parent)) != 0)
844 prune_dcache(found, parent->d_sb);
845 }
846
847 /*
848 * Scan `nr' dentries and return the number which remain.
849 *
850 * We need to avoid reentering the filesystem if the caller is performing a
851 * GFP_NOFS allocation attempt. One example deadlock is:
852 *
853 * ext2_new_block->getblk->GFP->shrink_dcache_memory->prune_dcache->
854 * prune_one_dentry->dput->dentry_iput->iput->inode->i_sb->s_op->put_inode->
855 * ext2_discard_prealloc->ext2_free_blocks->lock_super->DEADLOCK.
856 *
857 * In this case we return -1 to tell the caller that we baled.
858 */
859 static int shrink_dcache_memory(int nr, gfp_t gfp_mask)
860 {
861 if (nr) {
862 if (!(gfp_mask & __GFP_FS))
863 return -1;
864 prune_dcache(nr, NULL);
865 }
866 return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
867 }
868
869 static struct shrinker dcache_shrinker = {
870 .shrink = shrink_dcache_memory,
871 .seeks = DEFAULT_SEEKS,
872 };
873
874 /**
875 * d_alloc - allocate a dcache entry
876 * @parent: parent of entry to allocate
877 * @name: qstr of the name
878 *
879 * Allocates a dentry. It returns %NULL if there is insufficient memory
880 * available. On a success the dentry is returned. The name passed in is
881 * copied and the copy passed in may be reused after this call.
882 */
883
884 struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
885 {
886 struct dentry *dentry;
887 char *dname;
888
889 dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL);
890 if (!dentry)
891 return NULL;
892
893 if (name->len > DNAME_INLINE_LEN-1) {
894 dname = kmalloc(name->len + 1, GFP_KERNEL);
895 if (!dname) {
896 kmem_cache_free(dentry_cache, dentry);
897 return NULL;
898 }
899 } else {
900 dname = dentry->d_iname;
901 }
902 dentry->d_name.name = dname;
903
904 dentry->d_name.len = name->len;
905 dentry->d_name.hash = name->hash;
906 memcpy(dname, name->name, name->len);
907 dname[name->len] = 0;
908
909 atomic_set(&dentry->d_count, 1);
910 dentry->d_flags = DCACHE_UNHASHED;
911 spin_lock_init(&dentry->d_lock);
912 dentry->d_inode = NULL;
913 dentry->d_parent = NULL;
914 dentry->d_sb = NULL;
915 dentry->d_op = NULL;
916 dentry->d_fsdata = NULL;
917 dentry->d_mounted = 0;
918 #ifdef CONFIG_PROFILING
919 dentry->d_cookie = NULL;
920 #endif
921 INIT_HLIST_NODE(&dentry->d_hash);
922 INIT_LIST_HEAD(&dentry->d_lru);
923 INIT_LIST_HEAD(&dentry->d_subdirs);
924 INIT_LIST_HEAD(&dentry->d_alias);
925
926 if (parent) {
927 dentry->d_parent = dget(parent);
928 dentry->d_sb = parent->d_sb;
929 } else {
930 INIT_LIST_HEAD(&dentry->d_u.d_child);
931 }
932
933 spin_lock(&dcache_lock);
934 if (parent)
935 list_add(&dentry->d_u.d_child, &parent->d_subdirs);
936 dentry_stat.nr_dentry++;
937 spin_unlock(&dcache_lock);
938
939 return dentry;
940 }
941
942 struct dentry *d_alloc_name(struct dentry *parent, const char *name)
943 {
944 struct qstr q;
945
946 q.name = name;
947 q.len = strlen(name);
948 q.hash = full_name_hash(q.name, q.len);
949 return d_alloc(parent, &q);
950 }
951
952 /**
953 * d_instantiate - fill in inode information for a dentry
954 * @entry: dentry to complete
955 * @inode: inode to attach to this dentry
956 *
957 * Fill in inode information in the entry.
958 *
959 * This turns negative dentries into productive full members
960 * of society.
961 *
962 * NOTE! This assumes that the inode count has been incremented
963 * (or otherwise set) by the caller to indicate that it is now
964 * in use by the dcache.
965 */
966
967 void d_instantiate(struct dentry *entry, struct inode * inode)
968 {
969 BUG_ON(!list_empty(&entry->d_alias));
970 spin_lock(&dcache_lock);
971 if (inode)
972 list_add(&entry->d_alias, &inode->i_dentry);
973 entry->d_inode = inode;
974 fsnotify_d_instantiate(entry, inode);
975 spin_unlock(&dcache_lock);
976 security_d_instantiate(entry, inode);
977 }
978
979 /**
980 * d_instantiate_unique - instantiate a non-aliased dentry
981 * @entry: dentry to instantiate
982 * @inode: inode to attach to this dentry
983 *
984 * Fill in inode information in the entry. On success, it returns NULL.
985 * If an unhashed alias of "entry" already exists, then we return the
986 * aliased dentry instead and drop one reference to inode.
987 *
988 * Note that in order to avoid conflicts with rename() etc, the caller
989 * had better be holding the parent directory semaphore.
990 *
991 * This also assumes that the inode count has been incremented
992 * (or otherwise set) by the caller to indicate that it is now
993 * in use by the dcache.
994 */
995 static struct dentry *__d_instantiate_unique(struct dentry *entry,
996 struct inode *inode)
997 {
998 struct dentry *alias;
999 int len = entry->d_name.len;
1000 const char *name = entry->d_name.name;
1001 unsigned int hash = entry->d_name.hash;
1002
1003 if (!inode) {
1004 entry->d_inode = NULL;
1005 return NULL;
1006 }
1007
1008 list_for_each_entry(alias, &inode->i_dentry, d_alias) {
1009 struct qstr *qstr = &alias->d_name;
1010
1011 if (qstr->hash != hash)
1012 continue;
1013 if (alias->d_parent != entry->d_parent)
1014 continue;
1015 if (qstr->len != len)
1016 continue;
1017 if (memcmp(qstr->name, name, len))
1018 continue;
1019 dget_locked(alias);
1020 return alias;
1021 }
1022
1023 list_add(&entry->d_alias, &inode->i_dentry);
1024 entry->d_inode = inode;
1025 fsnotify_d_instantiate(entry, inode);
1026 return NULL;
1027 }
1028
1029 struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
1030 {
1031 struct dentry *result;
1032
1033 BUG_ON(!list_empty(&entry->d_alias));
1034
1035 spin_lock(&dcache_lock);
1036 result = __d_instantiate_unique(entry, inode);
1037 spin_unlock(&dcache_lock);
1038
1039 if (!result) {
1040 security_d_instantiate(entry, inode);
1041 return NULL;
1042 }
1043
1044 BUG_ON(!d_unhashed(result));
1045 iput(inode);
1046 return result;
1047 }
1048
1049 EXPORT_SYMBOL(d_instantiate_unique);
1050
1051 /**
1052 * d_alloc_root - allocate root dentry
1053 * @root_inode: inode to allocate the root for
1054 *
1055 * Allocate a root ("/") dentry for the inode given. The inode is
1056 * instantiated and returned. %NULL is returned if there is insufficient
1057 * memory or the inode passed is %NULL.
1058 */
1059
1060 struct dentry * d_alloc_root(struct inode * root_inode)
1061 {
1062 struct dentry *res = NULL;
1063
1064 if (root_inode) {
1065 static const struct qstr name = { .name = "/", .len = 1 };
1066
1067 res = d_alloc(NULL, &name);
1068 if (res) {
1069 res->d_sb = root_inode->i_sb;
1070 res->d_parent = res;
1071 d_instantiate(res, root_inode);
1072 }
1073 }
1074 return res;
1075 }
1076
1077 static inline struct hlist_head *d_hash(struct dentry *parent,
1078 unsigned long hash)
1079 {
1080 hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES;
1081 hash = hash ^ ((hash ^ GOLDEN_RATIO_PRIME) >> D_HASHBITS);
1082 return dentry_hashtable + (hash & D_HASHMASK);
1083 }
1084
1085 /**
1086 * d_alloc_anon - allocate an anonymous dentry
1087 * @inode: inode to allocate the dentry for
1088 *
1089 * This is similar to d_alloc_root. It is used by filesystems when
1090 * creating a dentry for a given inode, often in the process of
1091 * mapping a filehandle to a dentry. The returned dentry may be
1092 * anonymous, or may have a full name (if the inode was already
1093 * in the cache). The file system may need to make further
1094 * efforts to connect this dentry into the dcache properly.
1095 *
1096 * When called on a directory inode, we must ensure that
1097 * the inode only ever has one dentry. If a dentry is
1098 * found, that is returned instead of allocating a new one.
1099 *
1100 * On successful return, the reference to the inode has been transferred
1101 * to the dentry. If %NULL is returned (indicating kmalloc failure),
1102 * the reference on the inode has not been released.
1103 */
1104
1105 struct dentry * d_alloc_anon(struct inode *inode)
1106 {
1107 static const struct qstr anonstring = { .name = "" };
1108 struct dentry *tmp;
1109 struct dentry *res;
1110
1111 if ((res = d_find_alias(inode))) {
1112 iput(inode);
1113 return res;
1114 }
1115
1116 tmp = d_alloc(NULL, &anonstring);
1117 if (!tmp)
1118 return NULL;
1119
1120 tmp->d_parent = tmp; /* make sure dput doesn't croak */
1121
1122 spin_lock(&dcache_lock);
1123 res = __d_find_alias(inode, 0);
1124 if (!res) {
1125 /* attach a disconnected dentry */
1126 res = tmp;
1127 tmp = NULL;
1128 spin_lock(&res->d_lock);
1129 res->d_sb = inode->i_sb;
1130 res->d_parent = res;
1131 res->d_inode = inode;
1132 res->d_flags |= DCACHE_DISCONNECTED;
1133 res->d_flags &= ~DCACHE_UNHASHED;
1134 list_add(&res->d_alias, &inode->i_dentry);
1135 hlist_add_head(&res->d_hash, &inode->i_sb->s_anon);
1136 spin_unlock(&res->d_lock);
1137
1138 inode = NULL; /* don't drop reference */
1139 }
1140 spin_unlock(&dcache_lock);
1141
1142 if (inode)
1143 iput(inode);
1144 if (tmp)
1145 dput(tmp);
1146 return res;
1147 }
1148
1149
1150 /**
1151 * d_splice_alias - splice a disconnected dentry into the tree if one exists
1152 * @inode: the inode which may have a disconnected dentry
1153 * @dentry: a negative dentry which we want to point to the inode.
1154 *
1155 * If inode is a directory and has a 'disconnected' dentry (i.e. IS_ROOT and
1156 * DCACHE_DISCONNECTED), then d_move that in place of the given dentry
1157 * and return it, else simply d_add the inode to the dentry and return NULL.
1158 *
1159 * This is needed in the lookup routine of any filesystem that is exportable
1160 * (via knfsd) so that we can build dcache paths to directories effectively.
1161 *
1162 * If a dentry was found and moved, then it is returned. Otherwise NULL
1163 * is returned. This matches the expected return value of ->lookup.
1164 *
1165 */
1166 struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
1167 {
1168 struct dentry *new = NULL;
1169
1170 if (inode && S_ISDIR(inode->i_mode)) {
1171 spin_lock(&dcache_lock);
1172 new = __d_find_alias(inode, 1);
1173 if (new) {
1174 BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED));
1175 fsnotify_d_instantiate(new, inode);
1176 spin_unlock(&dcache_lock);
1177 security_d_instantiate(new, inode);
1178 d_rehash(dentry);
1179 d_move(new, dentry);
1180 iput(inode);
1181 } else {
1182 /* d_instantiate takes dcache_lock, so we do it by hand */
1183 list_add(&dentry->d_alias, &inode->i_dentry);
1184 dentry->d_inode = inode;
1185 fsnotify_d_instantiate(dentry, inode);
1186 spin_unlock(&dcache_lock);
1187 security_d_instantiate(dentry, inode);
1188 d_rehash(dentry);
1189 }
1190 } else
1191 d_add(dentry, inode);
1192 return new;
1193 }
1194
1195
1196 /**
1197 * d_lookup - search for a dentry
1198 * @parent: parent dentry
1199 * @name: qstr of name we wish to find
1200 *
1201 * Searches the children of the parent dentry for the name in question. If
1202 * the dentry is found its reference count is incremented and the dentry
1203 * is returned. The caller must use d_put to free the entry when it has
1204 * finished using it. %NULL is returned on failure.
1205 *
1206 * __d_lookup is dcache_lock free. The hash list is protected using RCU.
1207 * Memory barriers are used while updating and doing lockless traversal.
1208 * To avoid races with d_move while rename is happening, d_lock is used.
1209 *
1210 * Overflows in memcmp(), while d_move, are avoided by keeping the length
1211 * and name pointer in one structure pointed by d_qstr.
1212 *
1213 * rcu_read_lock() and rcu_read_unlock() are used to disable preemption while
1214 * lookup is going on.
1215 *
1216 * dentry_unused list is not updated even if lookup finds the required dentry
1217 * in there. It is updated in places such as prune_dcache, shrink_dcache_sb,
1218 * select_parent and __dget_locked. This laziness saves lookup from dcache_lock
1219 * acquisition.
1220 *
1221 * d_lookup() is protected against the concurrent renames in some unrelated
1222 * directory using the seqlockt_t rename_lock.
1223 */
1224
1225 struct dentry * d_lookup(struct dentry * parent, struct qstr * name)
1226 {
1227 struct dentry * dentry = NULL;
1228 unsigned long seq;
1229
1230 do {
1231 seq = read_seqbegin(&rename_lock);
1232 dentry = __d_lookup(parent, name);
1233 if (dentry)
1234 break;
1235 } while (read_seqretry(&rename_lock, seq));
1236 return dentry;
1237 }
1238
1239 struct dentry * __d_lookup(struct dentry * parent, struct qstr * name)
1240 {
1241 unsigned int len = name->len;
1242 unsigned int hash = name->hash;
1243 const unsigned char *str = name->name;
1244 struct hlist_head *head = d_hash(parent,hash);
1245 struct dentry *found = NULL;
1246 struct hlist_node *node;
1247 struct dentry *dentry;
1248
1249 rcu_read_lock();
1250
1251 hlist_for_each_entry_rcu(dentry, node, head, d_hash) {
1252 struct qstr *qstr;
1253
1254 if (dentry->d_name.hash != hash)
1255 continue;
1256 if (dentry->d_parent != parent)
1257 continue;
1258
1259 spin_lock(&dentry->d_lock);
1260
1261 /*
1262 * Recheck the dentry after taking the lock - d_move may have
1263 * changed things. Don't bother checking the hash because we're
1264 * about to compare the whole name anyway.
1265 */
1266 if (dentry->d_parent != parent)
1267 goto next;
1268
1269 /*
1270 * It is safe to compare names since d_move() cannot
1271 * change the qstr (protected by d_lock).
1272 */
1273 qstr = &dentry->d_name;
1274 if (parent->d_op && parent->d_op->d_compare) {
1275 if (parent->d_op->d_compare(parent, qstr, name))
1276 goto next;
1277 } else {
1278 if (qstr->len != len)
1279 goto next;
1280 if (memcmp(qstr->name, str, len))
1281 goto next;
1282 }
1283
1284 if (!d_unhashed(dentry)) {
1285 atomic_inc(&dentry->d_count);
1286 found = dentry;
1287 }
1288 spin_unlock(&dentry->d_lock);
1289 break;
1290 next:
1291 spin_unlock(&dentry->d_lock);
1292 }
1293 rcu_read_unlock();
1294
1295 return found;
1296 }
1297
1298 /**
1299 * d_hash_and_lookup - hash the qstr then search for a dentry
1300 * @dir: Directory to search in
1301 * @name: qstr of name we wish to find
1302 *
1303 * On hash failure or on lookup failure NULL is returned.
1304 */
1305 struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name)
1306 {
1307 struct dentry *dentry = NULL;
1308
1309 /*
1310 * Check for a fs-specific hash function. Note that we must
1311 * calculate the standard hash first, as the d_op->d_hash()
1312 * routine may choose to leave the hash value unchanged.
1313 */
1314 name->hash = full_name_hash(name->name, name->len);
1315 if (dir->d_op && dir->d_op->d_hash) {
1316 if (dir->d_op->d_hash(dir, name) < 0)
1317 goto out;
1318 }
1319 dentry = d_lookup(dir, name);
1320 out:
1321 return dentry;
1322 }
1323
1324 /**
1325 * d_validate - verify dentry provided from insecure source
1326 * @dentry: The dentry alleged to be valid child of @dparent
1327 * @dparent: The parent dentry (known to be valid)
1328 * @hash: Hash of the dentry
1329 * @len: Length of the name
1330 *
1331 * An insecure source has sent us a dentry, here we verify it and dget() it.
1332 * This is used by ncpfs in its readdir implementation.
1333 * Zero is returned in the dentry is invalid.
1334 */
1335
1336 int d_validate(struct dentry *dentry, struct dentry *dparent)
1337 {
1338 struct hlist_head *base;
1339 struct hlist_node *lhp;
1340
1341 /* Check whether the ptr might be valid at all.. */
1342 if (!kmem_ptr_validate(dentry_cache, dentry))
1343 goto out;
1344
1345 if (dentry->d_parent != dparent)
1346 goto out;
1347
1348 spin_lock(&dcache_lock);
1349 base = d_hash(dparent, dentry->d_name.hash);
1350 hlist_for_each(lhp,base) {
1351 /* hlist_for_each_entry_rcu() not required for d_hash list
1352 * as it is parsed under dcache_lock
1353 */
1354 if (dentry == hlist_entry(lhp, struct dentry, d_hash)) {
1355 __dget_locked(dentry);
1356 spin_unlock(&dcache_lock);
1357 return 1;
1358 }
1359 }
1360 spin_unlock(&dcache_lock);
1361 out:
1362 return 0;
1363 }
1364
1365 /*
1366 * When a file is deleted, we have two options:
1367 * - turn this dentry into a negative dentry
1368 * - unhash this dentry and free it.
1369 *
1370 * Usually, we want to just turn this into
1371 * a negative dentry, but if anybody else is
1372 * currently using the dentry or the inode
1373 * we can't do that and we fall back on removing
1374 * it from the hash queues and waiting for
1375 * it to be deleted later when it has no users
1376 */
1377
1378 /**
1379 * d_delete - delete a dentry
1380 * @dentry: The dentry to delete
1381 *
1382 * Turn the dentry into a negative dentry if possible, otherwise
1383 * remove it from the hash queues so it can be deleted later
1384 */
1385
1386 void d_delete(struct dentry * dentry)
1387 {
1388 int isdir = 0;
1389 /*
1390 * Are we the only user?
1391 */
1392 spin_lock(&dcache_lock);
1393 spin_lock(&dentry->d_lock);
1394 isdir = S_ISDIR(dentry->d_inode->i_mode);
1395 if (atomic_read(&dentry->d_count) == 1) {
1396 dentry_iput(dentry);
1397 fsnotify_nameremove(dentry, isdir);
1398 return;
1399 }
1400
1401 if (!d_unhashed(dentry))
1402 __d_drop(dentry);
1403
1404 spin_unlock(&dentry->d_lock);
1405 spin_unlock(&dcache_lock);
1406
1407 fsnotify_nameremove(dentry, isdir);
1408 }
1409
1410 static void __d_rehash(struct dentry * entry, struct hlist_head *list)
1411 {
1412
1413 entry->d_flags &= ~DCACHE_UNHASHED;
1414 hlist_add_head_rcu(&entry->d_hash, list);
1415 }
1416
1417 static void _d_rehash(struct dentry * entry)
1418 {
1419 __d_rehash(entry, d_hash(entry->d_parent, entry->d_name.hash));
1420 }
1421
1422 /**
1423 * d_rehash - add an entry back to the hash
1424 * @entry: dentry to add to the hash
1425 *
1426 * Adds a dentry to the hash according to its name.
1427 */
1428
1429 void d_rehash(struct dentry * entry)
1430 {
1431 spin_lock(&dcache_lock);
1432 spin_lock(&entry->d_lock);
1433 _d_rehash(entry);
1434 spin_unlock(&entry->d_lock);
1435 spin_unlock(&dcache_lock);
1436 }
1437
1438 #define do_switch(x,y) do { \
1439 __typeof__ (x) __tmp = x; \
1440 x = y; y = __tmp; } while (0)
1441
1442 /*
1443 * When switching names, the actual string doesn't strictly have to
1444 * be preserved in the target - because we're dropping the target
1445 * anyway. As such, we can just do a simple memcpy() to copy over
1446 * the new name before we switch.
1447 *
1448 * Note that we have to be a lot more careful about getting the hash
1449 * switched - we have to switch the hash value properly even if it
1450 * then no longer matches the actual (corrupted) string of the target.
1451 * The hash value has to match the hash queue that the dentry is on..
1452 */
1453 static void switch_names(struct dentry *dentry, struct dentry *target)
1454 {
1455 if (dname_external(target)) {
1456 if (dname_external(dentry)) {
1457 /*
1458 * Both external: swap the pointers
1459 */
1460 do_switch(target->d_name.name, dentry->d_name.name);
1461 } else {
1462 /*
1463 * dentry:internal, target:external. Steal target's
1464 * storage and make target internal.
1465 */
1466 memcpy(target->d_iname, dentry->d_name.name,
1467 dentry->d_name.len + 1);
1468 dentry->d_name.name = target->d_name.name;
1469 target->d_name.name = target->d_iname;
1470 }
1471 } else {
1472 if (dname_external(dentry)) {
1473 /*
1474 * dentry:external, target:internal. Give dentry's
1475 * storage to target and make dentry internal
1476 */
1477 memcpy(dentry->d_iname, target->d_name.name,
1478 target->d_name.len + 1);
1479 target->d_name.name = dentry->d_name.name;
1480 dentry->d_name.name = dentry->d_iname;
1481 } else {
1482 /*
1483 * Both are internal. Just copy target to dentry
1484 */
1485 memcpy(dentry->d_iname, target->d_name.name,
1486 target->d_name.len + 1);
1487 }
1488 }
1489 }
1490
1491 /*
1492 * We cannibalize "target" when moving dentry on top of it,
1493 * because it's going to be thrown away anyway. We could be more
1494 * polite about it, though.
1495 *
1496 * This forceful removal will result in ugly /proc output if
1497 * somebody holds a file open that got deleted due to a rename.
1498 * We could be nicer about the deleted file, and let it show
1499 * up under the name it had before it was deleted rather than
1500 * under the original name of the file that was moved on top of it.
1501 */
1502
1503 /*
1504 * d_move_locked - move a dentry
1505 * @dentry: entry to move
1506 * @target: new dentry
1507 *
1508 * Update the dcache to reflect the move of a file name. Negative
1509 * dcache entries should not be moved in this way.
1510 */
1511 static void d_move_locked(struct dentry * dentry, struct dentry * target)
1512 {
1513 struct hlist_head *list;
1514
1515 if (!dentry->d_inode)
1516 printk(KERN_WARNING "VFS: moving negative dcache entry\n");
1517
1518 write_seqlock(&rename_lock);
1519 /*
1520 * XXXX: do we really need to take target->d_lock?
1521 */
1522 if (target < dentry) {
1523 spin_lock(&target->d_lock);
1524 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
1525 } else {
1526 spin_lock(&dentry->d_lock);
1527 spin_lock_nested(&target->d_lock, DENTRY_D_LOCK_NESTED);
1528 }
1529
1530 /* Move the dentry to the target hash queue, if on different bucket */
1531 if (d_unhashed(dentry))
1532 goto already_unhashed;
1533
1534 hlist_del_rcu(&dentry->d_hash);
1535
1536 already_unhashed:
1537 list = d_hash(target->d_parent, target->d_name.hash);
1538 __d_rehash(dentry, list);
1539
1540 /* Unhash the target: dput() will then get rid of it */
1541 __d_drop(target);
1542
1543 list_del(&dentry->d_u.d_child);
1544 list_del(&target->d_u.d_child);
1545
1546 /* Switch the names.. */
1547 switch_names(dentry, target);
1548 do_switch(dentry->d_name.len, target->d_name.len);
1549 do_switch(dentry->d_name.hash, target->d_name.hash);
1550
1551 /* ... and switch the parents */
1552 if (IS_ROOT(dentry)) {
1553 dentry->d_parent = target->d_parent;
1554 target->d_parent = target;
1555 INIT_LIST_HEAD(&target->d_u.d_child);
1556 } else {
1557 do_switch(dentry->d_parent, target->d_parent);
1558
1559 /* And add them back to the (new) parent lists */
1560 list_add(&target->d_u.d_child, &target->d_parent->d_subdirs);
1561 }
1562
1563 list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
1564 spin_unlock(&target->d_lock);
1565 fsnotify_d_move(dentry);
1566 spin_unlock(&dentry->d_lock);
1567 write_sequnlock(&rename_lock);
1568 }
1569
1570 /**
1571 * d_move - move a dentry
1572 * @dentry: entry to move
1573 * @target: new dentry
1574 *
1575 * Update the dcache to reflect the move of a file name. Negative
1576 * dcache entries should not be moved in this way.
1577 */
1578
1579 void d_move(struct dentry * dentry, struct dentry * target)
1580 {
1581 spin_lock(&dcache_lock);
1582 d_move_locked(dentry, target);
1583 spin_unlock(&dcache_lock);
1584 }
1585
1586 /*
1587 * Helper that returns 1 if p1 is a parent of p2, else 0
1588 */
1589 static int d_isparent(struct dentry *p1, struct dentry *p2)
1590 {
1591 struct dentry *p;
1592
1593 for (p = p2; p->d_parent != p; p = p->d_parent) {
1594 if (p->d_parent == p1)
1595 return 1;
1596 }
1597 return 0;
1598 }
1599
1600 /*
1601 * This helper attempts to cope with remotely renamed directories
1602 *
1603 * It assumes that the caller is already holding
1604 * dentry->d_parent->d_inode->i_mutex and the dcache_lock
1605 *
1606 * Note: If ever the locking in lock_rename() changes, then please
1607 * remember to update this too...
1608 *
1609 * On return, dcache_lock will have been unlocked.
1610 */
1611 static struct dentry *__d_unalias(struct dentry *dentry, struct dentry *alias)
1612 {
1613 struct mutex *m1 = NULL, *m2 = NULL;
1614 struct dentry *ret;
1615
1616 /* If alias and dentry share a parent, then no extra locks required */
1617 if (alias->d_parent == dentry->d_parent)
1618 goto out_unalias;
1619
1620 /* Check for loops */
1621 ret = ERR_PTR(-ELOOP);
1622 if (d_isparent(alias, dentry))
1623 goto out_err;
1624
1625 /* See lock_rename() */
1626 ret = ERR_PTR(-EBUSY);
1627 if (!mutex_trylock(&dentry->d_sb->s_vfs_rename_mutex))
1628 goto out_err;
1629 m1 = &dentry->d_sb->s_vfs_rename_mutex;
1630 if (!mutex_trylock(&alias->d_parent->d_inode->i_mutex))
1631 goto out_err;
1632 m2 = &alias->d_parent->d_inode->i_mutex;
1633 out_unalias:
1634 d_move_locked(alias, dentry);
1635 ret = alias;
1636 out_err:
1637 spin_unlock(&dcache_lock);
1638 if (m2)
1639 mutex_unlock(m2);
1640 if (m1)
1641 mutex_unlock(m1);
1642 return ret;
1643 }
1644
1645 /*
1646 * Prepare an anonymous dentry for life in the superblock's dentry tree as a
1647 * named dentry in place of the dentry to be replaced.
1648 */
1649 static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
1650 {
1651 struct dentry *dparent, *aparent;
1652
1653 switch_names(dentry, anon);
1654 do_switch(dentry->d_name.len, anon->d_name.len);
1655 do_switch(dentry->d_name.hash, anon->d_name.hash);
1656
1657 dparent = dentry->d_parent;
1658 aparent = anon->d_parent;
1659
1660 dentry->d_parent = (aparent == anon) ? dentry : aparent;
1661 list_del(&dentry->d_u.d_child);
1662 if (!IS_ROOT(dentry))
1663 list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
1664 else
1665 INIT_LIST_HEAD(&dentry->d_u.d_child);
1666
1667 anon->d_parent = (dparent == dentry) ? anon : dparent;
1668 list_del(&anon->d_u.d_child);
1669 if (!IS_ROOT(anon))
1670 list_add(&anon->d_u.d_child, &anon->d_parent->d_subdirs);
1671 else
1672 INIT_LIST_HEAD(&anon->d_u.d_child);
1673
1674 anon->d_flags &= ~DCACHE_DISCONNECTED;
1675 }
1676
1677 /**
1678 * d_materialise_unique - introduce an inode into the tree
1679 * @dentry: candidate dentry
1680 * @inode: inode to bind to the dentry, to which aliases may be attached
1681 *
1682 * Introduces an dentry into the tree, substituting an extant disconnected
1683 * root directory alias in its place if there is one
1684 */
1685 struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
1686 {
1687 struct dentry *actual;
1688
1689 BUG_ON(!d_unhashed(dentry));
1690
1691 spin_lock(&dcache_lock);
1692
1693 if (!inode) {
1694 actual = dentry;
1695 dentry->d_inode = NULL;
1696 goto found_lock;
1697 }
1698
1699 if (S_ISDIR(inode->i_mode)) {
1700 struct dentry *alias;
1701
1702 /* Does an aliased dentry already exist? */
1703 alias = __d_find_alias(inode, 0);
1704 if (alias) {
1705 actual = alias;
1706 /* Is this an anonymous mountpoint that we could splice
1707 * into our tree? */
1708 if (IS_ROOT(alias)) {
1709 spin_lock(&alias->d_lock);
1710 __d_materialise_dentry(dentry, alias);
1711 __d_drop(alias);
1712 goto found;
1713 }
1714 /* Nope, but we must(!) avoid directory aliasing */
1715 actual = __d_unalias(dentry, alias);
1716 if (IS_ERR(actual))
1717 dput(alias);
1718 goto out_nolock;
1719 }
1720 }
1721
1722 /* Add a unique reference */
1723 actual = __d_instantiate_unique(dentry, inode);
1724 if (!actual)
1725 actual = dentry;
1726 else if (unlikely(!d_unhashed(actual)))
1727 goto shouldnt_be_hashed;
1728
1729 found_lock:
1730 spin_lock(&actual->d_lock);
1731 found:
1732 _d_rehash(actual);
1733 spin_unlock(&actual->d_lock);
1734 spin_unlock(&dcache_lock);
1735 out_nolock:
1736 if (actual == dentry) {
1737 security_d_instantiate(dentry, inode);
1738 return NULL;
1739 }
1740
1741 iput(inode);
1742 return actual;
1743
1744 shouldnt_be_hashed:
1745 spin_unlock(&dcache_lock);
1746 BUG();
1747 goto shouldnt_be_hashed;
1748 }
1749
1750 /**
1751 * d_path - return the path of a dentry
1752 * @dentry: dentry to report
1753 * @vfsmnt: vfsmnt to which the dentry belongs
1754 * @root: root dentry
1755 * @rootmnt: vfsmnt to which the root dentry belongs
1756 * @buffer: buffer to return value in
1757 * @buflen: buffer length
1758 *
1759 * Convert a dentry into an ASCII path name. If the entry has been deleted
1760 * the string " (deleted)" is appended. Note that this is ambiguous.
1761 *
1762 * Returns the buffer or an error code if the path was too long.
1763 *
1764 * "buflen" should be positive. Caller holds the dcache_lock.
1765 */
1766 static char *__d_path(struct dentry *dentry, struct vfsmount *vfsmnt,
1767 struct path *root, char *buffer, int buflen)
1768 {
1769 char * end = buffer+buflen;
1770 char * retval;
1771 int namelen;
1772
1773 *--end = '\0';
1774 buflen--;
1775 if (!IS_ROOT(dentry) && d_unhashed(dentry)) {
1776 buflen -= 10;
1777 end -= 10;
1778 if (buflen < 0)
1779 goto Elong;
1780 memcpy(end, " (deleted)", 10);
1781 }
1782
1783 if (buflen < 1)
1784 goto Elong;
1785 /* Get '/' right */
1786 retval = end-1;
1787 *retval = '/';
1788
1789 for (;;) {
1790 struct dentry * parent;
1791
1792 if (dentry == root->dentry && vfsmnt == root->mnt)
1793 break;
1794 if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
1795 /* Global root? */
1796 spin_lock(&vfsmount_lock);
1797 if (vfsmnt->mnt_parent == vfsmnt) {
1798 spin_unlock(&vfsmount_lock);
1799 goto global_root;
1800 }
1801 dentry = vfsmnt->mnt_mountpoint;
1802 vfsmnt = vfsmnt->mnt_parent;
1803 spin_unlock(&vfsmount_lock);
1804 continue;
1805 }
1806 parent = dentry->d_parent;
1807 prefetch(parent);
1808 namelen = dentry->d_name.len;
1809 buflen -= namelen + 1;
1810 if (buflen < 0)
1811 goto Elong;
1812 end -= namelen;
1813 memcpy(end, dentry->d_name.name, namelen);
1814 *--end = '/';
1815 retval = end;
1816 dentry = parent;
1817 }
1818
1819 return retval;
1820
1821 global_root:
1822 namelen = dentry->d_name.len;
1823 buflen -= namelen;
1824 if (buflen < 0)
1825 goto Elong;
1826 retval -= namelen-1; /* hit the slash */
1827 memcpy(retval, dentry->d_name.name, namelen);
1828 return retval;
1829 Elong:
1830 return ERR_PTR(-ENAMETOOLONG);
1831 }
1832
1833 /**
1834 * d_path - return the path of a dentry
1835 * @path: path to report
1836 * @buf: buffer to return value in
1837 * @buflen: buffer length
1838 *
1839 * Convert a dentry into an ASCII path name. If the entry has been deleted
1840 * the string " (deleted)" is appended. Note that this is ambiguous.
1841 *
1842 * Returns the buffer or an error code if the path was too long.
1843 *
1844 * "buflen" should be positive. Caller holds the dcache_lock.
1845 */
1846 char *d_path(struct path *path, char *buf, int buflen)
1847 {
1848 char *res;
1849 struct path root;
1850
1851 /*
1852 * We have various synthetic filesystems that never get mounted. On
1853 * these filesystems dentries are never used for lookup purposes, and
1854 * thus don't need to be hashed. They also don't need a name until a
1855 * user wants to identify the object in /proc/pid/fd/. The little hack
1856 * below allows us to generate a name for these objects on demand:
1857 */
1858 if (path->dentry->d_op && path->dentry->d_op->d_dname)
1859 return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
1860
1861 read_lock(¤t->fs->lock);
1862 root = current->fs->root;
1863 path_get(¤t->fs->root);
1864 read_unlock(¤t->fs->lock);
1865 spin_lock(&dcache_lock);
1866 res = __d_path(path->dentry, path->mnt, &root, buf, buflen);
1867 spin_unlock(&dcache_lock);
1868 path_put(&root);
1869 return res;
1870 }
1871
1872 /*
1873 * Helper function for dentry_operations.d_dname() members
1874 */
1875 char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen,
1876 const char *fmt, ...)
1877 {
1878 va_list args;
1879 char temp[64];
1880 int sz;
1881
1882 va_start(args, fmt);
1883 sz = vsnprintf(temp, sizeof(temp), fmt, args) + 1;
1884 va_end(args);
1885
1886 if (sz > sizeof(temp) || sz > buflen)
1887 return ERR_PTR(-ENAMETOOLONG);
1888
1889 buffer += buflen - sz;
1890 return memcpy(buffer, temp, sz);
1891 }
1892
1893 /*
1894 * NOTE! The user-level library version returns a
1895 * character pointer. The kernel system call just
1896 * returns the length of the buffer filled (which
1897 * includes the ending '\0' character), or a negative
1898 * error value. So libc would do something like
1899 *
1900 * char *getcwd(char * buf, size_t size)
1901 * {
1902 * int retval;
1903 *
1904 * retval = sys_getcwd(buf, size);
1905 * if (retval >= 0)
1906 * return buf;
1907 * errno = -retval;
1908 * return NULL;
1909 * }
1910 */
1911 asmlinkage long sys_getcwd(char __user *buf, unsigned long size)
1912 {
1913 int error;
1914 struct path pwd, root;
1915 char *page = (char *) __get_free_page(GFP_USER);
1916
1917 if (!page)
1918 return -ENOMEM;
1919
1920 read_lock(¤t->fs->lock);
1921 pwd = current->fs->pwd;
1922 path_get(¤t->fs->pwd);
1923 root = current->fs->root;
1924 path_get(¤t->fs->root);
1925 read_unlock(¤t->fs->lock);
1926
1927 error = -ENOENT;
1928 /* Has the current directory has been unlinked? */
1929 spin_lock(&dcache_lock);
1930 if (pwd.dentry->d_parent == pwd.dentry || !d_unhashed(pwd.dentry)) {
1931 unsigned long len;
1932 char * cwd;
1933
1934 cwd = __d_path(pwd.dentry, pwd.mnt, &root, page, PAGE_SIZE);
1935 spin_unlock(&dcache_lock);
1936
1937 error = PTR_ERR(cwd);
1938 if (IS_ERR(cwd))
1939 goto out;
1940
1941 error = -ERANGE;
1942 len = PAGE_SIZE + page - cwd;
1943 if (len <= size) {
1944 error = len;
1945 if (copy_to_user(buf, cwd, len))
1946 error = -EFAULT;
1947 }
1948 } else
1949 spin_unlock(&dcache_lock);
1950
1951 out:
1952 path_put(&pwd);
1953 path_put(&root);
1954 free_page((unsigned long) page);
1955 return error;
1956 }
1957
1958 /*
1959 * Test whether new_dentry is a subdirectory of old_dentry.
1960 *
1961 * Trivially implemented using the dcache structure
1962 */
1963
1964 /**
1965 * is_subdir - is new dentry a subdirectory of old_dentry
1966 * @new_dentry: new dentry
1967 * @old_dentry: old dentry
1968 *
1969 * Returns 1 if new_dentry is a subdirectory of the parent (at any depth).
1970 * Returns 0 otherwise.
1971 * Caller must ensure that "new_dentry" is pinned before calling is_subdir()
1972 */
1973
1974 int is_subdir(struct dentry * new_dentry, struct dentry * old_dentry)
1975 {
1976 int result;
1977 struct dentry * saved = new_dentry;
1978 unsigned long seq;
1979
1980 /* need rcu_readlock to protect against the d_parent trashing due to
1981 * d_move
1982 */
1983 rcu_read_lock();
1984 do {
1985 /* for restarting inner loop in case of seq retry */
1986 new_dentry = saved;
1987 result = 0;
1988 seq = read_seqbegin(&rename_lock);
1989 for (;;) {
1990 if (new_dentry != old_dentry) {
1991 struct dentry * parent = new_dentry->d_parent;
1992 if (parent == new_dentry)
1993 break;
1994 new_dentry = parent;
1995 continue;
1996 }
1997 result = 1;
1998 break;
1999 }
2000 } while (read_seqretry(&rename_lock, seq));
2001 rcu_read_unlock();
2002
2003 return result;
2004 }
2005
2006 void d_genocide(struct dentry *root)
2007 {
2008 struct dentry *this_parent = root;
2009 struct list_head *next;
2010
2011 spin_lock(&dcache_lock);
2012 repeat:
2013 next = this_parent->d_subdirs.next;
2014 resume:
2015 while (next != &this_parent->d_subdirs) {
2016 struct list_head *tmp = next;
2017 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
2018 next = tmp->next;
2019 if (d_unhashed(dentry)||!dentry->d_inode)
2020 continue;
2021 if (!list_empty(&dentry->d_subdirs)) {
2022 this_parent = dentry;
2023 goto repeat;
2024 }
2025 atomic_dec(&dentry->d_count);
2026 }
2027 if (this_parent != root) {
2028 next = this_parent->d_u.d_child.next;
2029 atomic_dec(&this_parent->d_count);
2030 this_parent = this_parent->d_parent;
2031 goto resume;
2032 }
2033 spin_unlock(&dcache_lock);
2034 }
2035
2036 /**
2037 * find_inode_number - check for dentry with name
2038 * @dir: directory to check
2039 * @name: Name to find.
2040 *
2041 * Check whether a dentry already exists for the given name,
2042 * and return the inode number if it has an inode. Otherwise
2043 * 0 is returned.
2044 *
2045 * This routine is used to post-process directory listings for
2046 * filesystems using synthetic inode numbers, and is necessary
2047 * to keep getcwd() working.
2048 */
2049
2050 ino_t find_inode_number(struct dentry *dir, struct qstr *name)
2051 {
2052 struct dentry * dentry;
2053 ino_t ino = 0;
2054
2055 dentry = d_hash_and_lookup(dir, name);
2056 if (dentry) {
2057 if (dentry->d_inode)
2058 ino = dentry->d_inode->i_ino;
2059 dput(dentry);
2060 }
2061 return ino;
2062 }
2063
2064 static __initdata unsigned long dhash_entries;
2065 static int __init set_dhash_entries(char *str)
2066 {
2067 if (!str)
2068 return 0;
2069 dhash_entries = simple_strtoul(str, &str, 0);
2070 return 1;
2071 }
2072 __setup("dhash_entries=", set_dhash_entries);
2073
2074 static void __init dcache_init_early(void)
2075 {
2076 int loop;
2077
2078 /* If hashes are distributed across NUMA nodes, defer
2079 * hash allocation until vmalloc space is available.
2080 */
2081 if (hashdist)
2082 return;
2083
2084 dentry_hashtable =
2085 alloc_large_system_hash("Dentry cache",
2086 sizeof(struct hlist_head),
2087 dhash_entries,
2088 13,
2089 HASH_EARLY,
2090 &d_hash_shift,
2091 &d_hash_mask,
2092 0);
2093
2094 for (loop = 0; loop < (1 << d_hash_shift); loop++)
2095 INIT_HLIST_HEAD(&dentry_hashtable[loop]);
2096 }
2097
2098 static void __init dcache_init(void)
2099 {
2100 int loop;
2101
2102 /*
2103 * A constructor could be added for stable state like the lists,
2104 * but it is probably not worth it because of the cache nature
2105 * of the dcache.
2106 */
2107 dentry_cache = KMEM_CACHE(dentry,
2108 SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD);
2109
2110 register_shrinker(&dcache_shrinker);
2111
2112 /* Hash may have been set up in dcache_init_early */
2113 if (!hashdist)
2114 return;
2115
2116 dentry_hashtable =
2117 alloc_large_system_hash("Dentry cache",
2118 sizeof(struct hlist_head),
2119 dhash_entries,
2120 13,
2121 0,
2122 &d_hash_shift,
2123 &d_hash_mask,
2124 0);
2125
2126 for (loop = 0; loop < (1 << d_hash_shift); loop++)
2127 INIT_HLIST_HEAD(&dentry_hashtable[loop]);
2128 }
2129
2130 /* SLAB cache for __getname() consumers */
2131 struct kmem_cache *names_cachep __read_mostly;
2132
2133 /* SLAB cache for file structures */
2134 struct kmem_cache *filp_cachep __read_mostly;
2135
2136 EXPORT_SYMBOL(d_genocide);
2137
2138 void __init vfs_caches_init_early(void)
2139 {
2140 dcache_init_early();
2141 inode_init_early();
2142 }
2143
2144 void __init vfs_caches_init(unsigned long mempages)
2145 {
2146 unsigned long reserve;
2147
2148 /* Base hash sizes on available memory, with a reserve equal to
2149 150% of current kernel size */
2150
2151 reserve = min((mempages - nr_free_pages()) * 3/2, mempages - 1);
2152 mempages -= reserve;
2153
2154 names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0,
2155 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
2156
2157 filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0,
2158 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
2159
2160 dcache_init();
2161 inode_init();
2162 files_init(mempages);
2163 mnt_init();
2164 bdev_cache_init();
2165 chrdev_init();
2166 }
2167
2168 EXPORT_SYMBOL(d_alloc);
2169 EXPORT_SYMBOL(d_alloc_anon);
2170 EXPORT_SYMBOL(d_alloc_root);
2171 EXPORT_SYMBOL(d_delete);
2172 EXPORT_SYMBOL(d_find_alias);
2173 EXPORT_SYMBOL(d_instantiate);
2174 EXPORT_SYMBOL(d_invalidate);
2175 EXPORT_SYMBOL(d_lookup);
2176 EXPORT_SYMBOL(d_move);
2177 EXPORT_SYMBOL_GPL(d_materialise_unique);
2178 EXPORT_SYMBOL(d_path);
2179 EXPORT_SYMBOL(d_prune_aliases);
2180 EXPORT_SYMBOL(d_rehash);
2181 EXPORT_SYMBOL(d_splice_alias);
2182 EXPORT_SYMBOL(d_validate);
2183 EXPORT_SYMBOL(dget_locked);
2184 EXPORT_SYMBOL(dput);
2185 EXPORT_SYMBOL(find_inode_number);
2186 EXPORT_SYMBOL(have_submounts);
2187 EXPORT_SYMBOL(names_cachep);
2188 EXPORT_SYMBOL(shrink_dcache_parent);
2189 EXPORT_SYMBOL(shrink_dcache_sb);
2190
|
This page was automatically generated by the
LXR engine.
|