Linux kernel & device driver programming

Cross-Referenced Linux and Device Driver Code

[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ]
Version: [ 2.6.11.8 ] [ 2.6.25 ] [ 2.6.25.8 ] [ 2.6.31.13 ] Architecture: [ i386 ]
  1 /*
  2  *  linux/fs/proc/base.c
  3  *
  4  *  Copyright (C) 1991, 1992 Linus Torvalds
  5  *
  6  *  proc base directory handling functions
  7  *
  8  *  1999, Al Viro. Rewritten. Now it covers the whole per-process part.
  9  *  Instead of using magical inumbers to determine the kind of object
 10  *  we allocate and fill in-core inodes upon lookup. They don't even
 11  *  go into icache. We cache the reference to task_struct upon lookup too.
 12  *  Eventually it should become a filesystem in its own. We don't use the
 13  *  rest of procfs anymore.
 14  */
 15 
 16 #include <asm/uaccess.h>
 17 
 18 #include <linux/config.h>
 19 #include <linux/errno.h>
 20 #include <linux/time.h>
 21 #include <linux/proc_fs.h>
 22 #include <linux/stat.h>
 23 #include <linux/init.h>
 24 #include <linux/file.h>
 25 #include <linux/string.h>
 26 #include <linux/seq_file.h>
 27 #include <linux/namei.h>
 28 #include <linux/namespace.h>
 29 #include <linux/mm.h>
 30 #include <linux/smp_lock.h>
 31 #include <linux/kallsyms.h>
 32 #include <linux/mount.h>
 33 #include <linux/security.h>
 34 #include <linux/ptrace.h>
 35 #include "internal.h"
 36 
 37 /*
 38  * For hysterical raisins we keep the same inumbers as in the old procfs.
 39  * Feel free to change the macro below - just keep the range distinct from
 40  * inumbers of the rest of procfs (currently those are in 0x0000--0xffff).
 41  * As soon as we'll get a separate superblock we will be able to forget
 42  * about magical ranges too.
 43  */
 44 
 45 #define fake_ino(pid,ino) (((pid)<<16)|(ino))
 46 
 47 enum pid_directory_inos {
 48         PROC_TGID_INO = 2,
 49         PROC_TGID_TASK,
 50         PROC_TGID_STATUS,
 51         PROC_TGID_MEM,
 52         PROC_TGID_CWD,
 53         PROC_TGID_ROOT,
 54         PROC_TGID_EXE,
 55         PROC_TGID_FD,
 56         PROC_TGID_ENVIRON,
 57         PROC_TGID_AUXV,
 58         PROC_TGID_CMDLINE,
 59         PROC_TGID_STAT,
 60         PROC_TGID_STATM,
 61         PROC_TGID_MAPS,
 62         PROC_TGID_MOUNTS,
 63         PROC_TGID_WCHAN,
 64 #ifdef CONFIG_SCHEDSTATS
 65         PROC_TGID_SCHEDSTAT,
 66 #endif
 67 #ifdef CONFIG_SECURITY
 68         PROC_TGID_ATTR,
 69         PROC_TGID_ATTR_CURRENT,
 70         PROC_TGID_ATTR_PREV,
 71         PROC_TGID_ATTR_EXEC,
 72         PROC_TGID_ATTR_FSCREATE,
 73 #endif
 74 #ifdef CONFIG_AUDITSYSCALL
 75         PROC_TGID_LOGINUID,
 76 #endif
 77         PROC_TGID_FD_DIR,
 78         PROC_TGID_OOM_SCORE,
 79         PROC_TGID_OOM_ADJUST,
 80         PROC_TID_INO,
 81         PROC_TID_STATUS,
 82         PROC_TID_MEM,
 83         PROC_TID_CWD,
 84         PROC_TID_ROOT,
 85         PROC_TID_EXE,
 86         PROC_TID_FD,
 87         PROC_TID_ENVIRON,
 88         PROC_TID_AUXV,
 89         PROC_TID_CMDLINE,
 90         PROC_TID_STAT,
 91         PROC_TID_STATM,
 92         PROC_TID_MAPS,
 93         PROC_TID_MOUNTS,
 94         PROC_TID_WCHAN,
 95 #ifdef CONFIG_SCHEDSTATS
 96         PROC_TID_SCHEDSTAT,
 97 #endif
 98 #ifdef CONFIG_SECURITY
 99         PROC_TID_ATTR,
100         PROC_TID_ATTR_CURRENT,
101         PROC_TID_ATTR_PREV,
102         PROC_TID_ATTR_EXEC,
103         PROC_TID_ATTR_FSCREATE,
104 #endif
105 #ifdef CONFIG_AUDITSYSCALL
106         PROC_TID_LOGINUID,
107 #endif
108         PROC_TID_FD_DIR = 0x8000,       /* 0x8000-0xffff */
109         PROC_TID_OOM_SCORE,
110         PROC_TID_OOM_ADJUST,
111 };
112 
113 struct pid_entry {
114         int type;
115         int len;
116         char *name;
117         mode_t mode;
118 };
119 
120 #define E(type,name,mode) {(type),sizeof(name)-1,(name),(mode)}
121 
122 static struct pid_entry tgid_base_stuff[] = {
123         E(PROC_TGID_TASK,      "task",    S_IFDIR|S_IRUGO|S_IXUGO),
124         E(PROC_TGID_FD,        "fd",      S_IFDIR|S_IRUSR|S_IXUSR),
125         E(PROC_TGID_ENVIRON,   "environ", S_IFREG|S_IRUSR),
126         E(PROC_TGID_AUXV,      "auxv",    S_IFREG|S_IRUSR),
127         E(PROC_TGID_STATUS,    "status",  S_IFREG|S_IRUGO),
128         E(PROC_TGID_CMDLINE,   "cmdline", S_IFREG|S_IRUGO),
129         E(PROC_TGID_STAT,      "stat",    S_IFREG|S_IRUGO),
130         E(PROC_TGID_STATM,     "statm",   S_IFREG|S_IRUGO),
131         E(PROC_TGID_MAPS,      "maps",    S_IFREG|S_IRUGO),
132         E(PROC_TGID_MEM,       "mem",     S_IFREG|S_IRUSR|S_IWUSR),
133         E(PROC_TGID_CWD,       "cwd",     S_IFLNK|S_IRWXUGO),
134         E(PROC_TGID_ROOT,      "root",    S_IFLNK|S_IRWXUGO),
135         E(PROC_TGID_EXE,       "exe",     S_IFLNK|S_IRWXUGO),
136         E(PROC_TGID_MOUNTS,    "mounts",  S_IFREG|S_IRUGO),
137 #ifdef CONFIG_SECURITY
138         E(PROC_TGID_ATTR,      "attr",    S_IFDIR|S_IRUGO|S_IXUGO),
139 #endif
140 #ifdef CONFIG_KALLSYMS
141         E(PROC_TGID_WCHAN,     "wchan",   S_IFREG|S_IRUGO),
142 #endif
143 #ifdef CONFIG_SCHEDSTATS
144         E(PROC_TGID_SCHEDSTAT, "schedstat", S_IFREG|S_IRUGO),
145 #endif
146         E(PROC_TGID_OOM_SCORE, "oom_score",S_IFREG|S_IRUGO),
147         E(PROC_TGID_OOM_ADJUST,"oom_adj", S_IFREG|S_IRUGO|S_IWUSR),
148 #ifdef CONFIG_AUDITSYSCALL
149         E(PROC_TGID_LOGINUID, "loginuid", S_IFREG|S_IWUSR|S_IRUGO),
150 #endif
151         {0,0,NULL,0}
152 };
153 static struct pid_entry tid_base_stuff[] = {
154         E(PROC_TID_FD,         "fd",      S_IFDIR|S_IRUSR|S_IXUSR),
155         E(PROC_TID_ENVIRON,    "environ", S_IFREG|S_IRUSR),
156         E(PROC_TID_AUXV,       "auxv",    S_IFREG|S_IRUSR),
157         E(PROC_TID_STATUS,     "status",  S_IFREG|S_IRUGO),
158         E(PROC_TID_CMDLINE,    "cmdline", S_IFREG|S_IRUGO),
159         E(PROC_TID_STAT,       "stat",    S_IFREG|S_IRUGO),
160         E(PROC_TID_STATM,      "statm",   S_IFREG|S_IRUGO),
161         E(PROC_TID_MAPS,       "maps",    S_IFREG|S_IRUGO),
162         E(PROC_TID_MEM,        "mem",     S_IFREG|S_IRUSR|S_IWUSR),
163         E(PROC_TID_CWD,        "cwd",     S_IFLNK|S_IRWXUGO),
164         E(PROC_TID_ROOT,       "root",    S_IFLNK|S_IRWXUGO),
165         E(PROC_TID_EXE,        "exe",     S_IFLNK|S_IRWXUGO),
166         E(PROC_TID_MOUNTS,     "mounts",  S_IFREG|S_IRUGO),
167 #ifdef CONFIG_SECURITY
168         E(PROC_TID_ATTR,       "attr",    S_IFDIR|S_IRUGO|S_IXUGO),
169 #endif
170 #ifdef CONFIG_KALLSYMS
171         E(PROC_TID_WCHAN,      "wchan",   S_IFREG|S_IRUGO),
172 #endif
173 #ifdef CONFIG_SCHEDSTATS
174         E(PROC_TID_SCHEDSTAT, "schedstat",S_IFREG|S_IRUGO),
175 #endif
176         E(PROC_TID_OOM_SCORE,  "oom_score",S_IFREG|S_IRUGO),
177         E(PROC_TID_OOM_ADJUST, "oom_adj", S_IFREG|S_IRUGO|S_IWUSR),
178 #ifdef CONFIG_AUDITSYSCALL
179         E(PROC_TID_LOGINUID, "loginuid", S_IFREG|S_IWUSR|S_IRUGO),
180 #endif
181         {0,0,NULL,0}
182 };
183 
184 #ifdef CONFIG_SECURITY
185 static struct pid_entry tgid_attr_stuff[] = {
186         E(PROC_TGID_ATTR_CURRENT,  "current",  S_IFREG|S_IRUGO|S_IWUGO),
187         E(PROC_TGID_ATTR_PREV,     "prev",     S_IFREG|S_IRUGO),
188         E(PROC_TGID_ATTR_EXEC,     "exec",     S_IFREG|S_IRUGO|S_IWUGO),
189         E(PROC_TGID_ATTR_FSCREATE, "fscreate", S_IFREG|S_IRUGO|S_IWUGO),
190         {0,0,NULL,0}
191 };
192 static struct pid_entry tid_attr_stuff[] = {
193         E(PROC_TID_ATTR_CURRENT,   "current",  S_IFREG|S_IRUGO|S_IWUGO),
194         E(PROC_TID_ATTR_PREV,      "prev",     S_IFREG|S_IRUGO),
195         E(PROC_TID_ATTR_EXEC,      "exec",     S_IFREG|S_IRUGO|S_IWUGO),
196         E(PROC_TID_ATTR_FSCREATE,  "fscreate", S_IFREG|S_IRUGO|S_IWUGO),
197         {0,0,NULL,0}
198 };
199 #endif
200 
201 #undef E
202 
203 static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
204 {
205         struct task_struct *task = proc_task(inode);
206         struct files_struct *files;
207         struct file *file;
208         int fd = proc_type(inode) - PROC_TID_FD_DIR;
209 
210         files = get_files_struct(task);
211         if (files) {
212                 spin_lock(&files->file_lock);
213                 file = fcheck_files(files, fd);
214                 if (file) {
215                         *mnt = mntget(file->f_vfsmnt);
216                         *dentry = dget(file->f_dentry);
217                         spin_unlock(&files->file_lock);
218                         put_files_struct(files);
219                         return 0;
220                 }
221                 spin_unlock(&files->file_lock);
222                 put_files_struct(files);
223         }
224         return -ENOENT;
225 }
226 
227 static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
228 {
229         struct fs_struct *fs;
230         int result = -ENOENT;
231         task_lock(proc_task(inode));
232         fs = proc_task(inode)->fs;
233         if(fs)
234                 atomic_inc(&fs->count);
235         task_unlock(proc_task(inode));
236         if (fs) {
237                 read_lock(&fs->lock);
238                 *mnt = mntget(fs->pwdmnt);
239                 *dentry = dget(fs->pwd);
240                 read_unlock(&fs->lock);
241                 result = 0;
242                 put_fs_struct(fs);
243         }
244         return result;
245 }
246 
247 static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
248 {
249         struct fs_struct *fs;
250         int result = -ENOENT;
251         task_lock(proc_task(inode));
252         fs = proc_task(inode)->fs;
253         if(fs)
254                 atomic_inc(&fs->count);
255         task_unlock(proc_task(inode));
256         if (fs) {
257                 read_lock(&fs->lock);
258                 *mnt = mntget(fs->rootmnt);
259                 *dentry = dget(fs->root);
260                 read_unlock(&fs->lock);
261                 result = 0;
262                 put_fs_struct(fs);
263         }
264         return result;
265 }
266 
267 #define MAY_PTRACE(task) \
268         (task == current || \
269         (task->parent == current && \
270         (task->ptrace & PT_PTRACED) && \
271          (task->state == TASK_STOPPED || task->state == TASK_TRACED) && \
272          security_ptrace(current,task) == 0))
273 
274 static int may_ptrace_attach(struct task_struct *task)
275 {
276         int retval = 0;
277 
278         task_lock(task);
279 
280         if (!task->mm)
281                 goto out;
282         if (((current->uid != task->euid) ||
283              (current->uid != task->suid) ||
284              (current->uid != task->uid) ||
285              (current->gid != task->egid) ||
286              (current->gid != task->sgid) ||
287              (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE))
288                 goto out;
289         rmb();
290         if (!task->mm->dumpable && !capable(CAP_SYS_PTRACE))
291                 goto out;
292         if (security_ptrace(current, task))
293                 goto out;
294 
295         retval = 1;
296 out:
297         task_unlock(task);
298         return retval;
299 }
300 
301 static int proc_pid_environ(struct task_struct *task, char * buffer)
302 {
303         int res = 0;
304         struct mm_struct *mm = get_task_mm(task);
305         if (mm) {
306                 unsigned int len = mm->env_end - mm->env_start;
307                 if (len > PAGE_SIZE)
308                         len = PAGE_SIZE;
309                 res = access_process_vm(task, mm->env_start, buffer, len, 0);
310                 if (!may_ptrace_attach(task))
311                         res = -ESRCH;
312                 mmput(mm);
313         }
314         return res;
315 }
316 
317 static int proc_pid_cmdline(struct task_struct *task, char * buffer)
318 {
319         int res = 0;
320         unsigned int len;
321         struct mm_struct *mm = get_task_mm(task);
322         if (!mm)
323                 goto out;
324         if (!mm->arg_end)
325                 goto out_mm;    /* Shh! No looking before we're done */
326 
327         len = mm->arg_end - mm->arg_start;
328  
329         if (len > PAGE_SIZE)
330                 len = PAGE_SIZE;
331  
332         res = access_process_vm(task, mm->arg_start, buffer, len, 0);
333 
334         // If the nul at the end of args has been overwritten, then
335         // assume application is using setproctitle(3).
336         if (res > 0 && buffer[res-1] != '\0' && len < PAGE_SIZE) {
337                 len = strnlen(buffer, res);
338                 if (len < res) {
339                     res = len;
340                 } else {
341                         len = mm->env_end - mm->env_start;
342                         if (len > PAGE_SIZE - res)
343                                 len = PAGE_SIZE - res;
344                         res += access_process_vm(task, mm->env_start, buffer+res, len, 0);
345                         res = strnlen(buffer, res);
346                 }
347         }
348 out_mm:
349         mmput(mm);
350 out:
351         return res;
352 }
353 
354 static int proc_pid_auxv(struct task_struct *task, char *buffer)
355 {
356         int res = 0;
357         struct mm_struct *mm = get_task_mm(task);
358         if (mm) {
359                 unsigned int nwords = 0;
360                 do
361                         nwords += 2;
362                 while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */
363                 res = nwords * sizeof(mm->saved_auxv[0]);
364                 if (res > PAGE_SIZE)
365                         res = PAGE_SIZE;
366                 memcpy(buffer, mm->saved_auxv, res);
367                 mmput(mm);
368         }
369         return res;
370 }
371 
372 
373 #ifdef CONFIG_KALLSYMS
374 /*
375  * Provides a wchan file via kallsyms in a proper one-value-per-file format.
376  * Returns the resolved symbol.  If that fails, simply return the address.
377  */
378 static int proc_pid_wchan(struct task_struct *task, char *buffer)
379 {
380         char *modname;
381         const char *sym_name;
382         unsigned long wchan, size, offset;
383         char namebuf[KSYM_NAME_LEN+1];
384 
385         wchan = get_wchan(task);
386 
387         sym_name = kallsyms_lookup(wchan, &size, &offset, &modname, namebuf);
388         if (sym_name)
389                 return sprintf(buffer, "%s", sym_name);
390         return sprintf(buffer, "%lu", wchan);
391 }
392 #endif /* CONFIG_KALLSYMS */
393 
394 #ifdef CONFIG_SCHEDSTATS
395 /*
396  * Provides /proc/PID/schedstat
397  */
398 static int proc_pid_schedstat(struct task_struct *task, char *buffer)
399 {
400         return sprintf(buffer, "%lu %lu %lu\n",
401                         task->sched_info.cpu_time,
402                         task->sched_info.run_delay,
403                         task->sched_info.pcnt);
404 }
405 #endif
406 
407 /* The badness from the OOM killer */
408 unsigned long badness(struct task_struct *p, unsigned long uptime);
409 static int proc_oom_score(struct task_struct *task, char *buffer)
410 {
411         unsigned long points;
412         struct timespec uptime;
413 
414         do_posix_clock_monotonic_gettime(&uptime);
415         points = badness(task, uptime.tv_sec);
416         return sprintf(buffer, "%lu\n", points);
417 }
418 
419 /************************************************************************/
420 /*                       Here the fs part begins                        */
421 /************************************************************************/
422 
423 /* permission checks */
424 
425 static int proc_check_root(struct inode *inode)
426 {
427         struct dentry *de, *base, *root;
428         struct vfsmount *our_vfsmnt, *vfsmnt, *mnt;
429         int res = 0;
430 
431         if (proc_root_link(inode, &root, &vfsmnt)) /* Ewww... */
432                 return -ENOENT;
433         read_lock(&current->fs->lock);
434         our_vfsmnt = mntget(current->fs->rootmnt);
435         base = dget(current->fs->root);
436         read_unlock(&current->fs->lock);
437 
438         spin_lock(&vfsmount_lock);
439         de = root;
440         mnt = vfsmnt;
441 
442         while (vfsmnt != our_vfsmnt) {
443                 if (vfsmnt == vfsmnt->mnt_parent)
444                         goto out;
445                 de = vfsmnt->mnt_mountpoint;
446                 vfsmnt = vfsmnt->mnt_parent;
447         }
448 
449         if (!is_subdir(de, base))
450                 goto out;
451         spin_unlock(&vfsmount_lock);
452 
453 exit:
454         dput(base);
455         mntput(our_vfsmnt);
456         dput(root);
457         mntput(mnt);
458         return res;
459 out:
460         spin_unlock(&vfsmount_lock);
461         res = -EACCES;
462         goto exit;
463 }
464 
465 static int proc_permission(struct inode *inode, int mask, struct nameidata *nd)
466 {
467         if (generic_permission(inode, mask, NULL) != 0)
468                 return -EACCES;
469         return proc_check_root(inode);
470 }
471 
472 extern struct seq_operations proc_pid_maps_op;
473 static int maps_open(struct inode *inode, struct file *file)
474 {
475         struct task_struct *task = proc_task(inode);
476         int ret = seq_open(file, &proc_pid_maps_op);
477         if (!ret) {
478                 struct seq_file *m = file->private_data;
479                 m->private = task;
480         }
481         return ret;
482 }
483 
484 static struct file_operations proc_maps_operations = {
485         .open           = maps_open,
486         .read           = seq_read,
487         .llseek         = seq_lseek,
488         .release        = seq_release,
489 };
490 
491 extern struct seq_operations mounts_op;
492 static int mounts_open(struct inode *inode, struct file *file)
493 {
494         struct task_struct *task = proc_task(inode);
495         int ret = seq_open(file, &mounts_op);
496 
497         if (!ret) {
498                 struct seq_file *m = file->private_data;
499                 struct namespace *namespace;
500                 task_lock(task);
501                 namespace = task->namespace;
502                 if (namespace)
503                         get_namespace(namespace);
504                 task_unlock(task);
505 
506                 if (namespace)
507                         m->private = namespace;
508                 else {
509                         seq_release(inode, file);
510                         ret = -EINVAL;
511                 }
512         }
513         return ret;
514 }
515 
516 static int mounts_release(struct inode *inode, struct file *file)
517 {
518         struct seq_file *m = file->private_data;
519         struct namespace *namespace = m->private;
520         put_namespace(namespace);
521         return seq_release(inode, file);
522 }
523 
524 static struct file_operations proc_mounts_operations = {
525         .open           = mounts_open,
526         .read           = seq_read,
527         .llseek         = seq_lseek,
528         .release        = mounts_release,
529 };
530 
531 #define PROC_BLOCK_SIZE (3*1024)                /* 4K page size but our output routines use some slack for overruns */
532 
533 static ssize_t proc_info_read(struct file * file, char __user * buf,
534                           size_t count, loff_t *ppos)
535 {
536         struct inode * inode = file->f_dentry->d_inode;
537         unsigned long page;
538         ssize_t length;
539         struct task_struct *task = proc_task(inode);
540 
541         if (count > PROC_BLOCK_SIZE)
542                 count = PROC_BLOCK_SIZE;
543         if (!(page = __get_free_page(GFP_KERNEL)))
544                 return -ENOMEM;
545 
546         length = PROC_I(inode)->op.proc_read(task, (char*)page);
547 
548         if (length >= 0)
549                 length = simple_read_from_buffer(buf, count, ppos, (char *)page, length);
550         free_page(page);
551         return length;
552 }
553 
554 static struct file_operations proc_info_file_operations = {
555         .read           = proc_info_read,
556 };
557 
558 static int mem_open(struct inode* inode, struct file* file)
559 {
560         file->private_data = (void*)((long)current->self_exec_id);
561         return 0;
562 }
563 
564 static ssize_t mem_read(struct file * file, char __user * buf,
565                         size_t count, loff_t *ppos)
566 {
567         struct task_struct *task = proc_task(file->f_dentry->d_inode);
568         char *page;
569         unsigned long src = *ppos;
570         int ret = -ESRCH;
571         struct mm_struct *mm;
572 
573         if (!MAY_PTRACE(task) || !may_ptrace_attach(task))
574                 goto out;
575 
576         ret = -ENOMEM;
577         page = (char *)__get_free_page(GFP_USER);
578         if (!page)
579                 goto out;
580 
581         ret = 0;
582  
583         mm = get_task_mm(task);
584         if (!mm)
585                 goto out_free;
586 
587         ret = -EIO;
588  
589         if (file->private_data != (void*)((long)current->self_exec_id))
590                 goto out_put;
591 
592         ret = 0;
593  
594         while (count > 0) {
595                 int this_len, retval;
596 
597                 this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count;
598                 retval = access_process_vm(task, src, page, this_len, 0);
599                 if (!retval || !MAY_PTRACE(task) || !may_ptrace_attach(task)) {
600                         if (!ret)
601                                 ret = -EIO;
602                         break;
603                 }
604 
605                 if (copy_to_user(buf, page, retval)) {
606                         ret = -EFAULT;
607                         break;
608                 }
609  
610                 ret += retval;
611                 src += retval;
612                 buf += retval;
613                 count -= retval;
614         }
615         *ppos = src;
616 
617 out_put:
618         mmput(mm);
619 out_free:
620         free_page((unsigned long) page);
621 out:
622         return ret;
623 }
624 
625 #define mem_write NULL
626 
627 #ifndef mem_write
628 /* This is a security hazard */
629 static ssize_t mem_write(struct file * file, const char * buf,
630                          size_t count, loff_t *ppos)
631 {
632         int copied = 0;
633         char *page;
634         struct task_struct *task = proc_task(file->f_dentry->d_inode);
635         unsigned long dst = *ppos;
636 
637         if (!MAY_PTRACE(task) || !may_ptrace_attach(task))
638                 return -ESRCH;
639 
640         page = (char *)__get_free_page(GFP_USER);
641         if (!page)
642                 return -ENOMEM;
643 
644         while (count > 0) {
645                 int this_len, retval;
646 
647                 this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count;
648                 if (copy_from_user(page, buf, this_len)) {
649                         copied = -EFAULT;
650                         break;
651                 }
652                 retval = access_process_vm(task, dst, page, this_len, 1);
653                 if (!retval) {
654                         if (!copied)
655                                 copied = -EIO;
656                         break;
657                 }
658                 copied += retval;
659                 buf += retval;
660                 dst += retval;
661                 count -= retval;                        
662         }
663         *ppos = dst;
664         free_page((unsigned long) page);
665         return copied;
666 }
667 #endif
668 
669 static loff_t mem_lseek(struct file * file, loff_t offset, int orig)
670 {
671         switch (orig) {
672         case 0:
673                 file->f_pos = offset;
674                 break;
675         case 1:
676                 file->f_pos += offset;
677                 break;
678         default:
679                 return -EINVAL;
680         }
681         force_successful_syscall_return();
682         return file->f_pos;
683 }
684 
685 static struct file_operations proc_mem_operations = {
686         .llseek         = mem_lseek,
687         .read           = mem_read,
688         .write          = mem_write,
689         .open           = mem_open,
690 };
691 
692 static ssize_t oom_adjust_read(struct file *file, char *buf,
693                                 size_t count, loff_t *ppos)
694 {
695         struct task_struct *task = proc_task(file->f_dentry->d_inode);
696         char buffer[8];
697         size_t len;
698         int oom_adjust = task->oomkilladj;
699         loff_t __ppos = *ppos;
700 
701         len = sprintf(buffer, "%i\n", oom_adjust);
702         if (__ppos >= len)
703                 return 0;
704         if (count > len-__ppos)
705                 count = len-__ppos;
706         if (copy_to_user(buf, buffer + __ppos, count))
707                 return -EFAULT;
708         *ppos = __ppos + count;
709         return count;
710 }
711 
712 static ssize_t oom_adjust_write(struct file *file, const char *buf,
713                                 size_t count, loff_t *ppos)
714 {
715         struct task_struct *task = proc_task(file->f_dentry->d_inode);
716         char buffer[8], *end;
717         int oom_adjust;
718 
719         if (!capable(CAP_SYS_RESOURCE))
720                 return -EPERM;
721         memset(buffer, 0, 8);
722         if (count > 6)
723                 count = 6;
724         if (copy_from_user(buffer, buf, count))
725                 return -EFAULT;
726         oom_adjust = simple_strtol(buffer, &end, 0);
727         if (oom_adjust < -16 || oom_adjust > 15)
728                 return -EINVAL;
729         if (*end == '\n')
730                 end++;
731         task->oomkilladj = oom_adjust;
732         if (end - buffer == 0)
733                 return -EIO;
734         return end - buffer;
735 }
736 
737 static struct file_operations proc_oom_adjust_operations = {
738         read:           oom_adjust_read,
739         write:          oom_adjust_write,
740 };
741 
742 static struct inode_operations proc_mem_inode_operations = {
743         .permission     = proc_permission,
744 };
745 
746 #ifdef CONFIG_AUDITSYSCALL
747 #define TMPBUFLEN 21
748 static ssize_t proc_loginuid_read(struct file * file, char __user * buf,
749                                   size_t count, loff_t *ppos)
750 {
751         struct inode * inode = file->f_dentry->d_inode;
752         struct task_struct *task = proc_task(inode);
753         ssize_t length;
754         char tmpbuf[TMPBUFLEN];
755 
756         length = scnprintf(tmpbuf, TMPBUFLEN, "%u",
757                                 audit_get_loginuid(task->audit_context));
758         return simple_read_from_buffer(buf, count, ppos, tmpbuf, length);
759 }
760 
761 static ssize_t proc_loginuid_write(struct file * file, const char __user * buf,
762                                    size_t count, loff_t *ppos)
763 {
764         struct inode * inode = file->f_dentry->d_inode;
765         char *page, *tmp;
766         ssize_t length;
767         struct task_struct *task = proc_task(inode);
768         uid_t loginuid;
769 
770         if (!capable(CAP_AUDIT_CONTROL))
771                 return -EPERM;
772 
773         if (current != task)
774                 return -EPERM;
775 
776         if (count > PAGE_SIZE)
777                 count = PAGE_SIZE;
778 
779         if (*ppos != 0) {
780                 /* No partial writes. */
781                 return -EINVAL;
782         }
783         page = (char*)__get_free_page(GFP_USER);
784         if (!page)
785                 return -ENOMEM;
786         length = -EFAULT;
787         if (copy_from_user(page, buf, count))
788                 goto out_free_page;
789 
790         loginuid = simple_strtoul(page, &tmp, 10);
791         if (tmp == page) {
792                 length = -EINVAL;
793                 goto out_free_page;
794 
795         }
796         length = audit_set_loginuid(task->audit_context, loginuid);
797         if (likely(length == 0))
798                 length = count;
799 
800 out_free_page:
801         free_page((unsigned long) page);
802         return length;
803 }
804 
805 static struct file_operations proc_loginuid_operations = {
806         .read           = proc_loginuid_read,
807         .write          = proc_loginuid_write,
808 };
809 #endif
810 
811 static int proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
812 {
813         struct inode *inode = dentry->d_inode;
814         int error = -EACCES;
815 
816         /* We don't need a base pointer in the /proc filesystem */
817         path_release(nd);
818 
819         if (current->fsuid != inode->i_uid && !capable(CAP_DAC_OVERRIDE))
820                 goto out;
821         error = proc_check_root(inode);
822         if (error)
823                 goto out;
824 
825         error = PROC_I(inode)->op.proc_get_link(inode, &nd->dentry, &nd->mnt);
826         nd->last_type = LAST_BIND;
827 out:
828         return error;
829 }
830 
831 static int do_proc_readlink(struct dentry *dentry, struct vfsmount *mnt,
832                             char __user *buffer, int buflen)
833 {
834         struct inode * inode;
835         char *tmp = (char*)__get_free_page(GFP_KERNEL), *path;
836         int len;
837 
838         if (!tmp)
839                 return -ENOMEM;
840                 
841         inode = dentry->d_inode;
842         path = d_path(dentry, mnt, tmp, PAGE_SIZE);
843         len = PTR_ERR(path);
844         if (IS_ERR(path))
845                 goto out;
846         len = tmp + PAGE_SIZE - 1 - path;
847 
848         if (len > buflen)
849                 len = buflen;
850         if (copy_to_user(buffer, path, len))
851                 len = -EFAULT;
852  out:
853         free_page((unsigned long)tmp);
854         return len;
855 }
856 
857 static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int buflen)
858 {
859         int error = -EACCES;
860         struct inode *inode = dentry->d_inode;
861         struct dentry *de;
862         struct vfsmount *mnt = NULL;
863 
864         lock_kernel();
865 
866         if (current->fsuid != inode->i_uid && !capable(CAP_DAC_OVERRIDE))
867                 goto out;
868         error = proc_check_root(inode);
869         if (error)
870                 goto out;
871 
872         error = PROC_I(inode)->op.proc_get_link(inode, &de, &mnt);
873         if (error)
874                 goto out;
875 
876         error = do_proc_readlink(de, mnt, buffer, buflen);
877         dput(de);
878         mntput(mnt);
879 out:
880         unlock_kernel();
881         return error;
882 }
883 
884 static struct inode_operations proc_pid_link_inode_operations = {
885         .readlink       = proc_pid_readlink,
886         .follow_link    = proc_pid_follow_link
887 };
888 
889 #define NUMBUF 10
890 
891 static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
892 {
893         struct inode *inode = filp->f_dentry->d_inode;
894         struct task_struct *p = proc_task(inode);
895         unsigned int fd, tid, ino;
896         int retval;
897         char buf[NUMBUF];
898         struct files_struct * files;
899 
900         retval = -ENOENT;
901         if (!pid_alive(p))
902                 goto out;
903         retval = 0;
904         tid = p->pid;
905 
906         fd = filp->f_pos;
907         switch (fd) {
908                 case 0:
909                         if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0)
910                                 goto out;
911                         filp->f_pos++;
912                 case 1:
913                         ino = fake_ino(tid, PROC_TID_INO);
914                         if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0)
915                                 goto out;
916                         filp->f_pos++;
917                 default:
918                         files = get_files_struct(p);
919                         if (!files)
920                                 goto out;
921                         spin_lock(&files->file_lock);
922                         for (fd = filp->f_pos-2;
923                              fd < files->max_fds;
924                              fd++, filp->f_pos++) {
925                                 unsigned int i,j;
926 
927                                 if (!fcheck_files(files, fd))
928                                         continue;
929                                 spin_unlock(&files->file_lock);
930 
931                                 j = NUMBUF;
932                                 i = fd;
933                                 do {
934                                         j--;
935                                         buf[j] = '' + (i % 10);
936                                         i /= 10;
937                                 } while (i);
938 
939                                 ino = fake_ino(tid, PROC_TID_FD_DIR + fd);
940                                 if (filldir(dirent, buf+j, NUMBUF-j, fd+2, ino, DT_LNK) < 0) {
941                                         spin_lock(&files->file_lock);
942                                         break;
943                                 }
944                                 spin_lock(&files->file_lock);
945                         }
946                         spin_unlock(&files->file_lock);
947                         put_files_struct(files);
948         }
949 out:
950         return retval;
951 }
952 
953 static int proc_pident_readdir(struct file *filp,
954                 void *dirent, filldir_t filldir,
955                 struct pid_entry *ents, unsigned int nents)
956 {
957         int i;
958         int pid;
959         struct dentry *dentry = filp->f_dentry;
960         struct inode *inode = dentry->d_inode;
961         struct pid_entry *p;
962         ino_t ino;
963         int ret;
964 
965         ret = -ENOENT;
966         if (!pid_alive(proc_task(inode)))
967                 goto out;
968 
969         ret = 0;
970         pid = proc_task(inode)->pid;
971         i = filp->f_pos;
972         switch (i) {
973         case 0:
974                 ino = inode->i_ino;
975                 if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0)
976                         goto out;
977                 i++;
978                 filp->f_pos++;
979                 /* fall through */
980         case 1:
981                 ino = parent_ino(dentry);
982                 if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0)
983                         goto out;
984                 i++;
985                 filp->f_pos++;
986                 /* fall through */
987         default:
988                 i -= 2;
989                 if (i >= nents) {
990                         ret = 1;
991                         goto out;
992                 }
993                 p = ents + i;
994                 while (p->name) {
995                         if (filldir(dirent, p->name, p->len, filp->f_pos,
996                                     fake_ino(pid, p->type), p->mode >> 12) < 0)
997                                 goto out;
998                         filp->f_pos++;
999                         p++;
1000                 }
1001         }
1002 
1003         ret = 1;
1004 out:
1005         return ret;
1006 }
1007 
1008 static int proc_tgid_base_readdir(struct file * filp,
1009                              void * dirent, filldir_t filldir)
1010 {
1011         return proc_pident_readdir(filp,dirent,filldir,
1012                                    tgid_base_stuff,ARRAY_SIZE(tgid_base_stuff));
1013 }
1014 
1015 static int proc_tid_base_readdir(struct file * filp,
1016                              void * dirent, filldir_t filldir)
1017 {
1018         return proc_pident_readdir(filp,dirent,filldir,
1019                                    tid_base_stuff,ARRAY_SIZE(tid_base_stuff));
1020 }
1021 
1022 /* building an inode */
1023 
1024 static int task_dumpable(struct task_struct *task)
1025 {
1026         int dumpable = 0;
1027         struct mm_struct *mm;
1028 
1029         task_lock(task);
1030         mm = task->mm;
1031         if (mm)
1032                 dumpable = mm->dumpable;
1033         task_unlock(task);
1034         return dumpable;
1035 }
1036 
1037 
1038 static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task, int ino)
1039 {
1040         struct inode * inode;
1041         struct proc_inode *ei;
1042 
1043         /* We need a new inode */
1044         
1045         inode = new_inode(sb);
1046         if (!inode)
1047                 goto out;
1048 
1049         /* Common stuff */
1050         ei = PROC_I(inode);
1051         ei->task = NULL;
1052         inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
1053         inode->i_ino = fake_ino(task->pid, ino);
1054 
1055         if (!pid_alive(task))
1056                 goto out_unlock;
1057 
1058         /*
1059          * grab the reference to task.
1060          */
1061         get_task_struct(task);
1062         ei->task = task;
1063         ei->type = ino;
1064         inode->i_uid = 0;
1065         inode->i_gid = 0;
1066         if (ino == PROC_TGID_INO || ino == PROC_TID_INO || task_dumpable(task)) {
1067                 inode->i_uid = task->euid;
1068                 inode->i_gid = task->egid;
1069         }
1070         security_task_to_inode(task, inode);
1071 
1072 out:
1073         return inode;
1074 
1075 out_unlock:
1076         ei->pde = NULL;
1077         iput(inode);
1078         return NULL;
1079 }
1080 
1081 /* dentry stuff */
1082 
1083 /*
1084  *      Exceptional case: normally we are not allowed to unhash a busy
1085  * directory. In this case, however, we can do it - no aliasing problems
1086  * due to the way we treat inodes.
1087  *
1088  * Rewrite the inode's ownerships here because the owning task may have
1089  * performed a setuid(), etc.
1090  */
1091 static int pid_revalidate(struct dentry *dentry, struct nameidata *nd)
1092 {
1093         struct inode *inode = dentry->d_inode;
1094         struct task_struct *task = proc_task(inode);
1095         if (pid_alive(task)) {
1096                 if (proc_type(inode) == PROC_TGID_INO || proc_type(inode) == PROC_TID_INO || task_dumpable(task)) {
1097                         inode->i_uid = task->euid;
1098                         inode->i_gid = task->egid;
1099                 } else {
1100                         inode->i_uid = 0;
1101                         inode->i_gid = 0;
1102                 }
1103                 security_task_to_inode(task, inode);
1104                 return 1;
1105         }
1106         d_drop(dentry);
1107         return 0;
1108 }
1109 
1110 static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
1111 {
1112         struct inode *inode = dentry->d_inode;
1113         struct task_struct *task = proc_task(inode);
1114         int fd = proc_type(inode) - PROC_TID_FD_DIR;
1115         struct files_struct *files;
1116 
1117         files = get_files_struct(task);
1118         if (files) {
1119                 spin_lock(&files->file_lock);
1120                 if (fcheck_files(files, fd)) {
1121                         spin_unlock(&files->file_lock);
1122                         put_files_struct(files);
1123                         if (task_dumpable(task)) {
1124                                 inode->i_uid = task->euid;
1125                                 inode->i_gid = task->egid;
1126                         } else {
1127                                 inode->i_uid = 0;
1128                                 inode->i_gid = 0;
1129                         }
1130                         security_task_to_inode(task, inode);
1131                         return 1;
1132                 }
1133                 spin_unlock(&files->file_lock);
1134                 put_files_struct(files);
1135         }
1136         d_drop(dentry);
1137         return 0;
1138 }
1139 
1140 static void pid_base_iput(struct dentry *dentry, struct inode *inode)
1141 {
1142         struct task_struct *task = proc_task(inode);
1143         spin_lock(&task->proc_lock);
1144         if (task->proc_dentry == dentry)
1145                 task->proc_dentry = NULL;
1146         spin_unlock(&task->proc_lock);
1147         iput(inode);
1148 }
1149 
1150 static int pid_delete_dentry(struct dentry * dentry)
1151 {
1152         /* Is the task we represent dead?
1153          * If so, then don't put the dentry on the lru list,
1154          * kill it immediately.
1155          */
1156         return !pid_alive(proc_task(dentry->d_inode));
1157 }
1158 
1159 static struct dentry_operations tid_fd_dentry_operations =
1160 {
1161         .d_revalidate   = tid_fd_revalidate,
1162         .d_delete       = pid_delete_dentry,
1163 };
1164 
1165 static struct dentry_operations pid_dentry_operations =
1166 {
1167         .d_revalidate   = pid_revalidate,
1168         .d_delete       = pid_delete_dentry,
1169 };
1170 
1171 static struct dentry_operations pid_base_dentry_operations =
1172 {
1173         .d_revalidate   = pid_revalidate,
1174         .d_iput         = pid_base_iput,
1175         .d_delete       = pid_delete_dentry,
1176 };
1177 
1178 /* Lookups */
1179 
1180 static unsigned name_to_int(struct dentry *dentry)
1181 {
1182         const char *name = dentry->d_name.name;
1183         int len = dentry->d_name.len;
1184         unsigned n = 0;
1185 
1186         if (len > 1 && *name == '')
1187                 goto out;
1188         while (len-- > 0) {
1189                 unsigned c = *name++ - '';
1190                 if (c > 9)
1191                         goto out;
1192                 if (n >= (~0U-9)/10)
1193                         goto out;
1194                 n *= 10;
1195                 n += c;
1196         }
1197         return n;
1198 out:
1199         return ~0U;
1200 }
1201 
1202 /* SMP-safe */
1203 static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, struct nameidata *nd)
1204 {
1205         struct task_struct *task = proc_task(dir);
1206         unsigned fd = name_to_int(dentry);
1207         struct file * file;
1208         struct files_struct * files;
1209         struct inode *inode;
1210         struct proc_inode *ei;
1211 
1212         if (fd == ~0U)
1213                 goto out;
1214         if (!pid_alive(task))
1215                 goto out;
1216 
1217         inode = proc_pid_make_inode(dir->i_sb, task, PROC_TID_FD_DIR+fd);
1218         if (!inode)
1219                 goto out;
1220         ei = PROC_I(inode);
1221         files = get_files_struct(task);
1222         if (!files)
1223                 goto out_unlock;
1224         inode->i_mode = S_IFLNK;
1225         spin_lock(&files->file_lock);
1226         file = fcheck_files(files, fd);
1227         if (!file)
1228                 goto out_unlock2;
1229         if (file->f_mode & 1)
1230                 inode->i_mode |= S_IRUSR | S_IXUSR;
1231         if (file->f_mode & 2)
1232                 inode->i_mode |= S_IWUSR | S_IXUSR;
1233         spin_unlock(&files->file_lock);
1234         put_files_struct(files);
1235         inode->i_op = &proc_pid_link_inode_operations;
1236         inode->i_size = 64;
1237         ei->op.proc_get_link = proc_fd_link;
1238         dentry->d_op = &tid_fd_dentry_operations;
1239         d_add(dentry, inode);
1240         return NULL;
1241 
1242 out_unlock2:
1243         spin_unlock(&files->file_lock);
1244         put_files_struct(files);
1245 out_unlock:
1246         iput(inode);
1247 out:
1248         return ERR_PTR(-ENOENT);
1249 }
1250 
1251 static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir);
1252 static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd);
1253 
1254 static struct file_operations proc_fd_operations = {
1255         .read           = generic_read_dir,
1256         .readdir        = proc_readfd,
1257 };
1258 
1259 static struct file_operations proc_task_operations = {
1260         .read           = generic_read_dir,
1261         .readdir        = proc_task_readdir,
1262 };
1263 
1264 /*
1265  * proc directories can do almost nothing..
1266  */
1267 static struct inode_operations proc_fd_inode_operations = {
1268         .lookup         = proc_lookupfd,
1269         .permission     = proc_permission,
1270 };
1271 
1272 static struct inode_operations proc_task_inode_operations = {
1273         .lookup         = proc_task_lookup,
1274         .permission     = proc_permission,
1275 };
1276 
1277 #ifdef CONFIG_SECURITY
1278 static ssize_t proc_pid_attr_read(struct file * file, char __user * buf,
1279                                   size_t count, loff_t *ppos)
1280 {
1281         struct inode * inode = file->f_dentry->d_inode;
1282         unsigned long page;
1283         ssize_t length;
1284         struct task_struct *task = proc_task(inode);
1285 
1286         if (count > PAGE_SIZE)
1287                 count = PAGE_SIZE;
1288         if (!(page = __get_free_page(GFP_KERNEL)))
1289                 return -ENOMEM;
1290 
1291         length = security_getprocattr(task, 
1292                                       (char*)file->f_dentry->d_name.name, 
1293                                       (void*)page, count);
1294         if (length >= 0)
1295                 length = simple_read_from_buffer(buf, count, ppos, (char *)page, length);
1296         free_page(page);
1297         return length;
1298 }
1299 
1300 static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf,
1301                                    size_t count, loff_t *ppos)
1302 { 
1303         struct inode * inode = file->f_dentry->d_inode;
1304         char *page; 
1305         ssize_t length; 
1306         struct task_struct *task = proc_task(inode); 
1307 
1308         if (count > PAGE_SIZE) 
1309                 count = PAGE_SIZE; 
1310         if (*ppos != 0) {
1311                 /* No partial writes. */
1312                 return -EINVAL;
1313         }
1314         page = (char*)__get_free_page(GFP_USER); 
1315         if (!page) 
1316                 return -ENOMEM;
1317         length = -EFAULT; 
1318         if (copy_from_user(page, buf, count)) 
1319                 goto out;
1320 
1321         length = security_setprocattr(task, 
1322                                       (char*)file->f_dentry->d_name.name, 
1323                                       (void*)page, count);
1324 out:
1325         free_page((unsigned long) page);
1326         return length;
1327 } 
1328 
1329 static struct file_operations proc_pid_attr_operations = {
1330         .read           = proc_pid_attr_read,
1331         .write          = proc_pid_attr_write,
1332 };
1333 
1334 static struct file_operations proc_tid_attr_operations;
1335 static struct inode_operations proc_tid_attr_inode_operations;
1336 static struct file_operations proc_tgid_attr_operations;
1337 static struct inode_operations proc_tgid_attr_inode_operations;
1338 #endif
1339 
1340 /* SMP-safe */
1341 static struct dentry *proc_pident_lookup(struct inode *dir, 
1342                                          struct dentry *dentry,
1343                                          struct pid_entry *ents)
1344 {
1345         struct inode *inode;
1346         int error;
1347         struct task_struct *task = proc_task(dir);
1348         struct pid_entry *p;
1349         struct proc_inode *ei;
1350 
1351         error = -ENOENT;
1352         inode = NULL;
1353 
1354         if (!pid_alive(task))
1355                 goto out;
1356 
1357         for (p = ents; p->name; p++) {
1358                 if (p->len != dentry->d_name.len)
1359                         continue;
1360                 if (!memcmp(dentry->d_name.name, p->name, p->len))
1361                         break;
1362         }
1363         if (!p->name)
1364                 goto out;
1365 
1366         error = -EINVAL;
1367         inode = proc_pid_make_inode(dir->i_sb, task, p->type);
1368         if (!inode)
1369                 goto out;
1370 
1371         ei = PROC_I(inode);
1372         inode->i_mode = p->mode;
1373         /*
1374          * Yes, it does not scale. And it should not. Don't add
1375          * new entries into /proc/<tgid>/ without very good reasons.
1376          */
1377         switch(p->type) {
1378                 case PROC_TGID_TASK:
1379                         inode->i_nlink = 3;
1380                         inode->i_op = &proc_task_inode_operations;
1381                         inode->i_fop = &proc_task_operations;
1382                         break;
1383                 case PROC_TID_FD:
1384                 case PROC_TGID_FD:
1385                         inode->i_nlink = 2;
1386                         inode->i_op = &proc_fd_inode_operations;
1387                         inode->i_fop = &proc_fd_operations;
1388                         break;
1389                 case PROC_TID_EXE:
1390                 case PROC_TGID_EXE:
1391                         inode->i_op = &proc_pid_link_inode_operations;
1392                         ei->op.proc_get_link = proc_exe_link;
1393                         break;
1394                 case PROC_TID_CWD:
1395                 case PROC_TGID_CWD:
1396                         inode->i_op = &proc_pid_link_inode_operations;
1397                         ei->op.proc_get_link = proc_cwd_link;
1398                         break;
1399                 case PROC_TID_ROOT:
1400                 case PROC_TGID_ROOT:
1401                         inode->i_op = &proc_pid_link_inode_operations;
1402                         ei->op.proc_get_link = proc_root_link;
1403                         break;
1404                 case PROC_TID_ENVIRON:
1405                 case PROC_TGID_ENVIRON:
1406                         inode->i_fop = &proc_info_file_operations;
1407                         ei->op.proc_read = proc_pid_environ;
1408                         break;
1409                 case PROC_TID_AUXV:
1410                 case PROC_TGID_AUXV:
1411                         inode->i_fop = &proc_info_file_operations;
1412                         ei->op.proc_read = proc_pid_auxv;
1413                         break;
1414                 case PROC_TID_STATUS:
1415                 case PROC_TGID_STATUS:
1416                         inode->i_fop = &proc_info_file_operations;
1417                         ei->op.proc_read = proc_pid_status;
1418                         break;
1419                 case PROC_TID_STAT:
1420                         inode->i_fop = &proc_info_file_operations;
1421                         ei->op.proc_read = proc_tid_stat;
1422                         break;
1423                 case PROC_TGID_STAT:
1424                         inode->i_fop = &proc_info_file_operations;
1425                         ei->op.proc_read = proc_tgid_stat;
1426                         break;
1427                 case PROC_TID_CMDLINE:
1428                 case PROC_TGID_CMDLINE:
1429                         inode->i_fop = &proc_info_file_operations;
1430                         ei->op.proc_read = proc_pid_cmdline;
1431                         break;
1432                 case PROC_TID_STATM:
1433                 case PROC_TGID_STATM:
1434                         inode->i_fop = &proc_info_file_operations;
1435                         ei->op.proc_read = proc_pid_statm;
1436                         break;
1437                 case PROC_TID_MAPS:
1438                 case PROC_TGID_MAPS:
1439                         inode->i_fop = &proc_maps_operations;
1440                         break;
1441                 case PROC_TID_MEM:
1442                 case PROC_TGID_MEM:
1443                         inode->i_op = &proc_mem_inode_operations;
1444                         inode->i_fop = &proc_mem_operations;
1445                         break;
1446                 case PROC_TID_MOUNTS:
1447                 case PROC_TGID_MOUNTS:
1448                         inode->i_fop = &proc_mounts_operations;
1449                         break;
1450 #ifdef CONFIG_SECURITY
1451                 case PROC_TID_ATTR:
1452                         inode->i_nlink = 2;
1453                         inode->i_op = &proc_tid_attr_inode_operations;
1454                         inode->i_fop = &proc_tid_attr_operations;
1455                         break;
1456                 case PROC_TGID_ATTR:
1457                         inode->i_nlink = 2;
1458                         inode->i_op = &proc_tgid_attr_inode_operations;
1459                         inode->i_fop = &proc_tgid_attr_operations;
1460                         break;
1461                 case PROC_TID_ATTR_CURRENT:
1462                 case PROC_TGID_ATTR_CURRENT:
1463                 case PROC_TID_ATTR_PREV:
1464                 case PROC_TGID_ATTR_PREV:
1465                 case PROC_TID_ATTR_EXEC:
1466                 case PROC_TGID_ATTR_EXEC:
1467                 case PROC_TID_ATTR_FSCREATE:
1468                 case PROC_TGID_ATTR_FSCREATE:
1469                         inode->i_fop = &proc_pid_attr_operations;
1470                         break;
1471 #endif
1472 #ifdef CONFIG_KALLSYMS
1473                 case PROC_TID_WCHAN:
1474                 case PROC_TGID_WCHAN:
1475                         inode->i_fop = &proc_info_file_operations;
1476                         ei->op.proc_read = proc_pid_wchan;
1477                         break;
1478 #endif
1479 #ifdef CONFIG_SCHEDSTATS
1480                 case PROC_TID_SCHEDSTAT:
1481                 case PROC_TGID_SCHEDSTAT:
1482                         inode->i_fop = &proc_info_file_operations;
1483                         ei->op.proc_read = proc_pid_schedstat;
1484                         break;
1485 #endif
1486                 case PROC_TID_OOM_SCORE:
1487                 case PROC_TGID_OOM_SCORE:
1488                         inode->i_fop = &proc_info_file_operations;
1489                         ei->op.proc_read = proc_oom_score;
1490                         break;
1491                 case PROC_TID_OOM_ADJUST:
1492                 case PROC_TGID_OOM_ADJUST:
1493                         inode->i_fop = &proc_oom_adjust_operations;
1494                         break;
1495 #ifdef CONFIG_AUDITSYSCALL
1496                 case PROC_TID_LOGINUID:
1497                 case PROC_TGID_LOGINUID:
1498                         inode->i_fop = &proc_loginuid_operations;
1499                         break;
1500 #endif
1501                 default:
1502                         printk("procfs: impossible type (%d)",p->type);
1503                         iput(inode);
1504                         return ERR_PTR(-EINVAL);
1505         }
1506         dentry->d_op = &pid_dentry_operations;
1507         d_add(dentry, inode);
1508         return NULL;
1509 
1510 out:
1511         return ERR_PTR(error);
1512 }
1513 
1514 static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){
1515         return proc_pident_lookup(dir, dentry, tgid_base_stuff);
1516 }
1517 
1518 static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){
1519         return proc_pident_lookup(dir, dentry, tid_base_stuff);
1520 }
1521 
1522 static struct file_operations proc_tgid_base_operations = {
1523         .read           = generic_read_dir,
1524         .readdir        = proc_tgid_base_readdir,
1525 };
1526 
1527 static struct file_operations proc_tid_base_operations = {
1528         .read           = generic_read_dir,
1529         .readdir        = proc_tid_base_readdir,
1530 };
1531 
1532 static struct inode_operations proc_tgid_base_inode_operations = {
1533         .lookup         = proc_tgid_base_lookup,
1534 };
1535 
1536 static struct inode_operations proc_tid_base_inode_operations = {
1537         .lookup         = proc_tid_base_lookup,
1538 };
1539 
1540 #ifdef CONFIG_SECURITY
1541 static int proc_tgid_attr_readdir(struct file * filp,
1542                              void * dirent, filldir_t filldir)
1543 {
1544         return proc_pident_readdir(filp,dirent,filldir,
1545                                    tgid_attr_stuff,ARRAY_SIZE(tgid_attr_stuff));
1546 }
1547 
1548 static int proc_tid_attr_readdir(struct file * filp,
1549                              void * dirent, filldir_t filldir)
1550 {
1551         return proc_pident_readdir(filp,dirent,filldir,
1552                                    tid_attr_stuff,ARRAY_SIZE(tid_attr_stuff));
1553 }
1554 
1555 static struct file_operations proc_tgid_attr_operations = {
1556         .read           = generic_read_dir,
1557         .readdir        = proc_tgid_attr_readdir,
1558 };
1559 
1560 static struct file_operations proc_tid_attr_operations = {
1561         .read           = generic_read_dir,
1562         .readdir        = proc_tid_attr_readdir,
1563 };
1564 
1565 static struct dentry *proc_tgid_attr_lookup(struct inode *dir,
1566                                 struct dentry *dentry, struct nameidata *nd)
1567 {
1568         return proc_pident_lookup(dir, dentry, tgid_attr_stuff);
1569 }
1570 
1571 static struct dentry *proc_tid_attr_lookup(struct inode *dir,
1572                                 struct dentry *dentry, struct nameidata *nd)
1573 {
1574         return proc_pident_lookup(dir, dentry, tid_attr_stuff);
1575 }
1576 
1577 static struct inode_operations proc_tgid_attr_inode_operations = {
1578         .lookup         = proc_tgid_attr_lookup,
1579 };
1580 
1581 static struct inode_operations proc_tid_attr_inode_operations = {
1582         .lookup         = proc_tid_attr_lookup,
1583 };
1584 #endif
1585 
1586 /*
1587  * /proc/self:
1588  */
1589 static int proc_self_readlink(struct dentry *dentry, char __user *buffer,
1590                               int buflen)
1591 {
1592         char tmp[30];
1593         sprintf(tmp, "%d", current->tgid);
1594         return vfs_readlink(dentry,buffer,buflen,tmp);
1595 }
1596 
1597 static int proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
1598 {
1599         char tmp[30];
1600         sprintf(tmp, "%d", current->tgid);
1601         return vfs_follow_link(nd,tmp);
1602 }       
1603 
1604 static struct inode_operations proc_self_inode_operations = {
1605         .readlink       = proc_self_readlink,
1606         .follow_link    = proc_self_follow_link,
1607 };
1608 
1609 /**
1610  * proc_pid_unhash -  Unhash /proc/<pid> entry from the dcache.
1611  * @p: task that should be flushed.
1612  *
1613  * Drops the /proc/<pid> dcache entry from the hash chains.
1614  *
1615  * Dropping /proc/<pid> entries and detach_pid must be synchroneous,
1616  * otherwise e.g. /proc/<pid>/exe might point to the wrong executable,
1617  * if the pid value is immediately reused. This is enforced by
1618  * - caller must acquire spin_lock(p->proc_lock)
1619  * - must be called before detach_pid()
1620  * - proc_pid_lookup acquires proc_lock, and checks that
1621  *   the target is not dead by looking at the attach count
1622  *   of PIDTYPE_PID.
1623  */
1624 
1625 struct dentry *proc_pid_unhash(struct task_struct *p)
1626 {
1627         struct dentry *proc_dentry;
1628 
1629         proc_dentry = p->proc_dentry;
1630         if (proc_dentry != NULL) {
1631 
1632                 spin_lock(&dcache_lock);
1633                 if (!d_unhashed(proc_dentry)) {
1634                         dget_locked(proc_dentry);
1635                         __d_drop(proc_dentry);
1636                 } else
1637                         proc_dentry = NULL;
1638                 spin_unlock(&dcache_lock);
1639         }
1640         return proc_dentry;
1641 }
1642 
1643 /**
1644  * proc_pid_flush - recover memory used by stale /proc/<pid>/x entries
1645  * @proc_entry: directoy to prune.
1646  *
1647  * Shrink the /proc directory that was used by the just killed thread.
1648  */
1649         
1650 void proc_pid_flush(struct dentry *proc_dentry)
1651 {
1652         might_sleep();
1653         if(proc_dentry != NULL) {
1654                 shrink_dcache_parent(proc_dentry);
1655                 dput(proc_dentry);
1656         }
1657 }
1658 
1659 /* SMP-safe */
1660 struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
1661 {
1662         struct task_struct *task;
1663         struct inode *inode;
1664         struct proc_inode *ei;
1665         unsigned tgid;
1666         int died;
1667 
1668         if (dentry->d_name.len == 4 && !memcmp(dentry->d_name.name,"self",4)) {
1669                 inode = new_inode(dir->i_sb);
1670                 if (!inode)
1671                         return ERR_PTR(-ENOMEM);
1672                 ei = PROC_I(inode);
1673                 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
1674                 inode->i_ino = fake_ino(0, PROC_TGID_INO);
1675                 ei->pde = NULL;
1676                 inode->i_mode = S_IFLNK|S_IRWXUGO;
1677                 inode->i_uid = inode->i_gid = 0;
1678                 inode->i_size = 64;
1679                 inode->i_op = &proc_self_inode_operations;
1680                 d_add(dentry, inode);
1681                 return NULL;
1682         }
1683         tgid = name_to_int(dentry);
1684         if (tgid == ~0U)
1685                 goto out;
1686 
1687         read_lock(&tasklist_lock);
1688         task = find_task_by_pid(tgid);
1689         if (task)
1690                 get_task_struct(task);
1691         read_unlock(&tasklist_lock);
1692         if (!task)
1693                 goto out;
1694 
1695         inode = proc_pid_make_inode(dir->i_sb, task, PROC_TGID_INO);
1696 
1697 
1698         if (!inode) {
1699                 put_task_struct(task);
1700                 goto out;
1701         }
1702         inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO;
1703         inode->i_op = &proc_tgid_base_inode_operations;
1704         inode->i_fop = &proc_tgid_base_operations;
1705         inode->i_nlink = 3;
1706         inode->i_flags|=S_IMMUTABLE;
1707 
1708         dentry->d_op = &pid_base_dentry_operations;
1709 
1710         died = 0;
1711         d_add(dentry, inode);
1712         spin_lock(&task->proc_lock);
1713         task->proc_dentry = dentry;
1714         if (!pid_alive(task)) {
1715                 dentry = proc_pid_unhash(task);
1716                 died = 1;
1717         }
1718         spin_unlock(&task->proc_lock);
1719 
1720         put_task_struct(task);
1721         if (died) {
1722                 proc_pid_flush(dentry);
1723                 goto out;
1724         }
1725         return NULL;
1726 out:
1727         return ERR_PTR(-ENOENT);
1728 }
1729 
1730 /* SMP-safe */
1731 static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
1732 {
1733         struct task_struct *task;
1734         struct task_struct *leader = proc_task(dir);
1735         struct inode *inode;
1736         unsigned tid;
1737 
1738         tid = name_to_int(dentry);
1739         if (tid == ~0U)
1740                 goto out;
1741 
1742         read_lock(&tasklist_lock);
1743         task = find_task_by_pid(tid);
1744         if (task)
1745                 get_task_struct(task);
1746         read_unlock(&tasklist_lock);
1747         if (!task)
1748                 goto out;
1749         if (leader->tgid != task->tgid)
1750                 goto out_drop_task;
1751 
1752         inode = proc_pid_make_inode(dir->i_sb, task, PROC_TID_INO);
1753 
1754 
1755         if (!inode)
1756                 goto out_drop_task;
1757         inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO;
1758         inode->i_op = &proc_tid_base_inode_operations;
1759         inode->i_fop = &proc_tid_base_operations;
1760         inode->i_nlink = 3;
1761         inode->i_flags|=S_IMMUTABLE;
1762 
1763         dentry->d_op = &pid_base_dentry_operations;
1764 
1765         d_add(dentry, inode);
1766 
1767         put_task_struct(task);
1768         return NULL;
1769 out_drop_task:
1770         put_task_struct(task);
1771 out:
1772         return ERR_PTR(-ENOENT);
1773 }
1774 
1775 #define PROC_NUMBUF 10
1776 #define PROC_MAXPIDS 20
1777 
1778 /*
1779  * Get a few tgid's to return for filldir - we need to hold the
1780  * tasklist lock while doing this, and we must release it before
1781  * we actually do the filldir itself, so we use a temp buffer..
1782  */
1783 static int get_tgid_list(int index, unsigned long version, unsigned int *tgids)
1784 {
1785         struct task_struct *p;
1786         int nr_tgids = 0;
1787 
1788         index--;
1789         read_lock(&tasklist_lock);
1790         p = NULL;
1791         if (version) {
1792                 p = find_task_by_pid(version);
1793                 if (p && !thread_group_leader(p))
1794                         p = NULL;
1795         }
1796 
1797         if (p)
1798                 index = 0;
1799         else
1800                 p = next_task(&init_task);
1801 
1802         for ( ; p != &init_task; p = next_task(p)) {
1803                 int tgid = p->pid;
1804                 if (!pid_alive(p))
1805                         continue;
1806                 if (--index >= 0)
1807                         continue;
1808                 tgids[nr_tgids] = tgid;
1809                 nr_tgids++;
1810                 if (nr_tgids >= PROC_MAXPIDS)
1811                         break;
1812         }
1813         read_unlock(&tasklist_lock);
1814         return nr_tgids;
1815 }
1816 
1817 /*
1818  * Get a few tid's to return for filldir - we need to hold the
1819  * tasklist lock while doing this, and we must release it before
1820  * we actually do the filldir itself, so we use a temp buffer..
1821  */
1822 static int get_tid_list(int index, unsigned int *tids, struct inode *dir)
1823 {
1824         struct task_struct *leader_task = proc_task(dir);
1825         struct task_struct *task = leader_task;
1826         int nr_tids = 0;
1827 
1828         index -= 2;
1829         read_lock(&tasklist_lock);
1830         /*
1831          * The starting point task (leader_task) might be an already
1832          * unlinked task, which cannot be used to access the task-list
1833          * via next_thread().
1834          */
1835         if (pid_alive(task)) do {
1836                 int tid = task->pid;
1837 
1838                 if (--index >= 0)
1839                         continue;
1840                 tids[nr_tids] = tid;
1841                 nr_tids++;
1842                 if (nr_tids >= PROC_MAXPIDS)
1843                         break;
1844         } while ((task = next_thread(task)) != leader_task);
1845         read_unlock(&tasklist_lock);
1846         return nr_tids;
1847 }
1848 
1849 /* for the /proc/ directory itself, after non-process stuff has been done */
1850 int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
1851 {
1852         unsigned int tgid_array[PROC_MAXPIDS];
1853         char buf[PROC_NUMBUF];
1854         unsigned int nr = filp->f_pos - FIRST_PROCESS_ENTRY;
1855         unsigned int nr_tgids, i;
1856         int next_tgid;
1857 
1858         if (!nr) {
1859                 ino_t ino = fake_ino(0,PROC_TGID_INO);
1860                 if (filldir(dirent, "self", 4, filp->f_pos, ino, DT_LNK) < 0)
1861                         return 0;
1862                 filp->f_pos++;
1863                 nr++;
1864         }
1865 
1866         /* f_version caches the tgid value that the last readdir call couldn't
1867          * return. lseek aka telldir automagically resets f_version to 0.
1868          */
1869         next_tgid = filp->f_version;
1870         filp->f_version = 0;
1871         for (;;) {
1872                 nr_tgids = get_tgid_list(nr, next_tgid, tgid_array);
1873                 if (!nr_tgids) {
1874                         /* no more entries ! */
1875                         break;
1876                 }
1877                 next_tgid = 0;
1878 
1879                 /* do not use the last found pid, reserve it for next_tgid */
1880                 if (nr_tgids == PROC_MAXPIDS) {
1881                         nr_tgids--;
1882                         next_tgid = tgid_array[nr_tgids];
1883                 }
1884 
1885                 for (i=0;i<nr_tgids;i++) {
1886                         int tgid = tgid_array[i];
1887                         ino_t ino = fake_ino(tgid,PROC_TGID_INO);
1888                         unsigned long j = PROC_NUMBUF;
1889 
1890                         do
1891                                 buf[--j] = '' + (tgid % 10);
1892                         while ((tgid /= 10) != 0);
1893 
1894                         if (filldir(dirent, buf+j, PROC_NUMBUF-j, filp->f_pos, ino, DT_DIR) < 0) {
1895                                 /* returning this tgid failed, save it as the first
1896                                  * pid for the next readir call */
1897                                 filp->f_version = tgid_array[i];
1898                                 goto out;
1899                         }
1900                         filp->f_pos++;
1901                         nr++;
1902                 }
1903         }
1904 out:
1905         return 0;
1906 }
1907 
1908 /* for the /proc/TGID/task/ directories */
1909 static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir)
1910 {
1911         unsigned int tid_array[PROC_MAXPIDS];
1912         char buf[PROC_NUMBUF];
1913         unsigned int nr_tids, i;
1914         struct dentry *dentry = filp->f_dentry;
1915         struct inode *inode = dentry->d_inode;
1916         int retval = -ENOENT;
1917         ino_t ino;
1918         unsigned long pos = filp->f_pos;  /* avoiding "long long" filp->f_pos */
1919 
1920         if (!pid_alive(proc_task(inode)))
1921                 goto out;
1922         retval = 0;
1923 
1924         switch (pos) {
1925         case 0:
1926                 ino = inode->i_ino;
1927                 if (filldir(dirent, ".", 1, pos, ino, DT_DIR) < 0)
1928                         goto out;
1929                 pos++;
1930                 /* fall through */
1931         case 1:
1932                 ino = parent_ino(dentry);
1933                 if (filldir(dirent, "..", 2, pos, ino, DT_DIR) < 0)
1934                         goto out;
1935                 pos++;
1936                 /* fall through */
1937         }
1938 
1939         nr_tids = get_tid_list(pos, tid_array, inode);
1940 
1941         for (i = 0; i < nr_tids; i++) {
1942                 unsigned long j = PROC_NUMBUF;
1943                 int tid = tid_array[i];
1944 
1945                 ino = fake_ino(tid,PROC_TID_INO);
1946 
1947                 do
1948                         buf[--j] = '' + (tid % 10);
1949                 while ((tid /= 10) != 0);
1950 
1951                 if (filldir(dirent, buf+j, PROC_NUMBUF-j, pos, ino, DT_DIR) < 0)
1952                         break;
1953                 pos++;
1954         }
1955 out:
1956         filp->f_pos = pos;
1957         return retval;
1958 }
1959 
  This page was automatically generated by the LXR engine.