Linux kernel & device driver programming

Cross-Referenced Linux and Device Driver Code

[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ]
Version: [ 2.6.11.8 ] [ 2.6.25 ] [ 2.6.25.8 ] [ 2.6.31.13 ] Architecture: [ i386 ]
  1 /*
  2  *  Copyright (C) 1995  Linus Torvalds
  3  *
  4  *  Pentium III FXSR, SSE support
  5  *      Gareth Hughes <gareth@valinux.com>, May 2000
  6  *
  7  *  X86-64 port
  8  *      Andi Kleen.
  9  *
 10  *      CPU hotplug support - ashok.raj@intel.com
 11  */
 12 
 13 /*
 14  * This file handles the architecture-dependent parts of process handling..
 15  */
 16 
 17 #include <stdarg.h>
 18 
 19 #include <linux/cpu.h>
 20 #include <linux/errno.h>
 21 #include <linux/sched.h>
 22 #include <linux/fs.h>
 23 #include <linux/kernel.h>
 24 #include <linux/mm.h>
 25 #include <linux/elfcore.h>
 26 #include <linux/smp.h>
 27 #include <linux/slab.h>
 28 #include <linux/user.h>
 29 #include <linux/interrupt.h>
 30 #include <linux/utsname.h>
 31 #include <linux/delay.h>
 32 #include <linux/module.h>
 33 #include <linux/ptrace.h>
 34 #include <linux/random.h>
 35 #include <linux/notifier.h>
 36 #include <linux/kprobes.h>
 37 #include <linux/kdebug.h>
 38 #include <linux/tick.h>
 39 
 40 #include <asm/uaccess.h>
 41 #include <asm/pgtable.h>
 42 #include <asm/system.h>
 43 #include <asm/io.h>
 44 #include <asm/processor.h>
 45 #include <asm/i387.h>
 46 #include <asm/mmu_context.h>
 47 #include <asm/pda.h>
 48 #include <asm/prctl.h>
 49 #include <asm/desc.h>
 50 #include <asm/proto.h>
 51 #include <asm/ia32.h>
 52 #include <asm/idle.h>
 53 
 54 asmlinkage extern void ret_from_fork(void);
 55 
 56 unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
 57 
 58 unsigned long boot_option_idle_override = 0;
 59 EXPORT_SYMBOL(boot_option_idle_override);
 60 
 61 /*
 62  * Powermanagement idle function, if any..
 63  */
 64 void (*pm_idle)(void);
 65 EXPORT_SYMBOL(pm_idle);
 66 
 67 static ATOMIC_NOTIFIER_HEAD(idle_notifier);
 68 
 69 void idle_notifier_register(struct notifier_block *n)
 70 {
 71         atomic_notifier_chain_register(&idle_notifier, n);
 72 }
 73 
 74 void enter_idle(void)
 75 {
 76         write_pda(isidle, 1);
 77         atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
 78 }
 79 
 80 static void __exit_idle(void)
 81 {
 82         if (test_and_clear_bit_pda(0, isidle) == 0)
 83                 return;
 84         atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
 85 }
 86 
 87 /* Called from interrupts to signify idle end */
 88 void exit_idle(void)
 89 {
 90         /* idle loop has pid 0 */
 91         if (current->pid)
 92                 return;
 93         __exit_idle();
 94 }
 95 
 96 /*
 97  * We use this if we don't have any better
 98  * idle routine..
 99  */
100 void default_idle(void)
101 {
102         current_thread_info()->status &= ~TS_POLLING;
103         /*
104          * TS_POLLING-cleared state must be visible before we
105          * test NEED_RESCHED:
106          */
107         smp_mb();
108         local_irq_disable();
109         if (!need_resched() && !need_resched_delayed()) {
110                 ktime_t t0, t1;
111                 u64 t0n, t1n;
112 
113                 t0 = ktime_get();
114                 t0n = ktime_to_ns(t0);
115                 safe_halt();    /* enables interrupts racelessly */
116                 local_irq_disable();
117                 t1 = ktime_get();
118                 t1n = ktime_to_ns(t1);
119                 sched_clock_idle_wakeup_event(t1n - t0n);
120         }
121         local_irq_enable();
122         current_thread_info()->status |= TS_POLLING;
123 }
124 
125 /*
126  * On SMP it's slightly faster (but much more power-consuming!)
127  * to poll the ->need_resched flag instead of waiting for the
128  * cross-CPU IPI to arrive. Use this option with caution.
129  */
130 static void poll_idle(void)
131 {
132         local_irq_enable();
133         cpu_relax();
134 }
135 
136 #ifdef CONFIG_HOTPLUG_CPU
137 DECLARE_PER_CPU(int, cpu_state);
138 
139 #include <asm/nmi.h>
140 /* We halt the CPU with physical CPU hotplug */
141 static inline void play_dead(void)
142 {
143         idle_task_exit();
144         wbinvd();
145         mb();
146         /* Ack it */
147         __get_cpu_var(cpu_state) = CPU_DEAD;
148 
149         local_irq_disable();
150         while (1)
151                 halt();
152 }
153 #else
154 static inline void play_dead(void)
155 {
156         BUG();
157 }
158 #endif /* CONFIG_HOTPLUG_CPU */
159 
160 /*
161  * The idle thread. There's no useful work to be
162  * done, so just try to conserve power and have a
163  * low exit latency (ie sit in a loop waiting for
164  * somebody to say that they'd like to reschedule)
165  */
166 void cpu_idle(void)
167 {
168         current_thread_info()->status |= TS_POLLING;
169         /* endless idle loop with no priority at all */
170         while (1) {
171                 tick_nohz_stop_sched_tick();
172                 while (!need_resched() && !need_resched_delayed()) {
173                         void (*idle)(void);
174 
175                         rmb();
176                         idle = pm_idle;
177                         if (!idle)
178                                 idle = default_idle;
179                         if (cpu_is_offline(smp_processor_id()))
180                                 play_dead();
181                         /*
182                          * Idle routines should keep interrupts disabled
183                          * from here on, until they go to idle.
184                          * Otherwise, idle callbacks can misfire.
185                          */
186                         local_irq_disable();
187                         enter_idle();
188                         /* Don't trace irqs off for idle */
189                         stop_critical_timings();
190                         idle();
191                         start_critical_timings();
192                         /* In many cases the interrupt that ended idle
193                            has already called exit_idle. But some idle
194                            loops can be woken up without interrupt. */
195                         __exit_idle();
196                 }
197 
198                 tick_nohz_restart_sched_tick();
199                 local_irq_disable();
200                 __preempt_enable_no_resched();
201                 __schedule();
202                 preempt_disable();
203                 local_irq_enable();
204         }
205 }
206 
207 static void do_nothing(void *unused)
208 {
209 }
210 
211 /*
212  * cpu_idle_wait - Used to ensure that all the CPUs discard old value of
213  * pm_idle and update to new pm_idle value. Required while changing pm_idle
214  * handler on SMP systems.
215  *
216  * Caller must have changed pm_idle to the new value before the call. Old
217  * pm_idle value will not be used by any CPU after the return of this function.
218  */
219 void cpu_idle_wait(void)
220 {
221         smp_mb();
222         /* kick all the CPUs so that they exit out of pm_idle */
223         smp_call_function(do_nothing, NULL, 0, 1);
224 }
225 EXPORT_SYMBOL_GPL(cpu_idle_wait);
226 
227 /*
228  * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
229  * which can obviate IPI to trigger checking of need_resched.
230  * We execute MONITOR against need_resched and enter optimized wait state
231  * through MWAIT. Whenever someone changes need_resched, we would be woken
232  * up from MWAIT (without an IPI).
233  *
234  * New with Core Duo processors, MWAIT can take some hints based on CPU
235  * capability.
236  */
237 void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
238 {
239         if (!need_resched() && !need_resched_delayed()) {
240                 __monitor((void *)&current_thread_info()->flags, 0, 0);
241                 smp_mb();
242                 if (!need_resched() && !need_resched_delayed())
243                         __mwait(ax, cx);
244         }
245 }
246 
247 /* Default MONITOR/MWAIT with no hints, used for default C1 state */
248 static void mwait_idle(void)
249 {
250         if (!need_resched() && !need_resched_delayed()) {
251                 __monitor((void *)&current_thread_info()->flags, 0, 0);
252                 smp_mb();
253                 if (!need_resched() && !need_resched_delayed())
254                         __sti_mwait(0, 0);
255                 else
256                         local_irq_enable();
257         } else {
258                 local_irq_enable();
259         }
260 }
261 
262 /*
263  * mwait selection logic:
264  *
265  * It depends on the CPU. For AMD CPUs that support MWAIT this is
266  * wrong. Family 0x10 and 0x11 CPUs will enter C1 on HLT. Powersavings
267  * then depend on a clock divisor and current Pstate of the core. If
268  * all cores of a processor are in halt state (C1) the processor can
269  * enter the C1E (C1 enhanced) state. If mwait is used this will never
270  * happen.
271  *
272  * idle=mwait overrides this decision and forces the usage of mwait.
273  */
274 static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c)
275 {
276         if (force_mwait)
277                 return 1;
278 
279         if (c->x86_vendor == X86_VENDOR_AMD) {
280                 switch(c->x86) {
281                 case 0x10:
282                 case 0x11:
283                         return 0;
284                 }
285         }
286         return 1;
287 }
288 
289 void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
290 {
291         static int selected;
292 
293         if (selected)
294                 return;
295 #ifdef CONFIG_X86_SMP
296         if (pm_idle == poll_idle && smp_num_siblings > 1) {
297                 printk(KERN_WARNING "WARNING: polling idle and HT enabled,"
298                         " performance may degrade.\n");
299         }
300 #endif
301         if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) {
302                 /*
303                  * Skip, if setup has overridden idle.
304                  * One CPU supports mwait => All CPUs supports mwait
305                  */
306                 if (!pm_idle) {
307                         printk(KERN_INFO "using mwait in idle threads.\n");
308                         pm_idle = mwait_idle;
309                 }
310         }
311         selected = 1;
312 }
313 
314 static int __init idle_setup(char *str)
315 {
316         if (!strcmp(str, "poll")) {
317                 printk("using polling idle threads.\n");
318                 pm_idle = poll_idle;
319         } else if (!strcmp(str, "mwait"))
320                 force_mwait = 1;
321         else
322                 return -1;
323 
324         boot_option_idle_override = 1;
325         return 0;
326 }
327 early_param("idle", idle_setup);
328 
329 /* Prints also some state that isn't saved in the pt_regs */
330 void __show_regs(struct pt_regs * regs)
331 {
332         unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
333         unsigned long d0, d1, d2, d3, d6, d7;
334         unsigned int fsindex, gsindex;
335         unsigned int ds, cs, es;
336 
337         printk("\n");
338         print_modules();
339         printk("Pid: %d, comm: %.20s %s %s %.*s\n",
340                 current->pid, current->comm, print_tainted(),
341                 init_utsname()->release,
342                 (int)strcspn(init_utsname()->version, " "),
343                 init_utsname()->version);
344         printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
345         printk_address(regs->ip, 1);
346         printk("RSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss, regs->sp,
347                 regs->flags);
348         printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
349                regs->ax, regs->bx, regs->cx);
350         printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
351                regs->dx, regs->si, regs->di);
352         printk("RBP: %016lx R08: %016lx R09: %016lx\n",
353                regs->bp, regs->r8, regs->r9);
354         printk("R10: %016lx R11: %016lx R12: %016lx\n",
355                regs->r10, regs->r11, regs->r12); 
356         printk("R13: %016lx R14: %016lx R15: %016lx\n",
357                regs->r13, regs->r14, regs->r15); 
358 
359         asm("movl %%ds,%0" : "=r" (ds)); 
360         asm("movl %%cs,%0" : "=r" (cs)); 
361         asm("movl %%es,%0" : "=r" (es)); 
362         asm("movl %%fs,%0" : "=r" (fsindex));
363         asm("movl %%gs,%0" : "=r" (gsindex));
364 
365         rdmsrl(MSR_FS_BASE, fs);
366         rdmsrl(MSR_GS_BASE, gs); 
367         rdmsrl(MSR_KERNEL_GS_BASE, shadowgs); 
368 
369         cr0 = read_cr0();
370         cr2 = read_cr2();
371         cr3 = read_cr3();
372         cr4 = read_cr4();
373 
374         printk("FS:  %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n", 
375                fs,fsindex,gs,gsindex,shadowgs); 
376         printk("CS:  %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0); 
377         printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4);
378 
379         get_debugreg(d0, 0);
380         get_debugreg(d1, 1);
381         get_debugreg(d2, 2);
382         printk("DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
383         get_debugreg(d3, 3);
384         get_debugreg(d6, 6);
385         get_debugreg(d7, 7);
386         printk("DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
387 }
388 
389 void show_regs(struct pt_regs *regs)
390 {
391         printk("CPU %d:", smp_processor_id());
392         __show_regs(regs);
393         show_trace(NULL, regs, (void *)(regs + 1), regs->bp);
394 }
395 
396 /*
397  * Free current thread data structures etc..
398  */
399 void exit_thread(void)
400 {
401         struct task_struct *me = current;
402         struct thread_struct *t = &me->thread;
403 
404         if (me->thread.io_bitmap_ptr) {
405                 struct tss_struct *tss;
406 
407                 kfree(t->io_bitmap_ptr);
408                 t->io_bitmap_ptr = NULL;
409                 clear_thread_flag(TIF_IO_BITMAP);
410                 /*
411                  * Careful, clear this in the TSS too:
412                  */
413                 tss = &per_cpu(init_tss, get_cpu());
414                 memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
415                 t->io_bitmap_max = 0;
416                 put_cpu();
417         }
418 }
419 
420 void flush_thread(void)
421 {
422         struct task_struct *tsk = current;
423 
424         if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) {
425                 clear_tsk_thread_flag(tsk, TIF_ABI_PENDING);
426                 if (test_tsk_thread_flag(tsk, TIF_IA32)) {
427                         clear_tsk_thread_flag(tsk, TIF_IA32);
428                 } else {
429                         set_tsk_thread_flag(tsk, TIF_IA32);
430                         current_thread_info()->status |= TS_COMPAT;
431                 }
432         }
433         clear_tsk_thread_flag(tsk, TIF_DEBUG);
434 
435         tsk->thread.debugreg0 = 0;
436         tsk->thread.debugreg1 = 0;
437         tsk->thread.debugreg2 = 0;
438         tsk->thread.debugreg3 = 0;
439         tsk->thread.debugreg6 = 0;
440         tsk->thread.debugreg7 = 0;
441         memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
442         /*
443          * Forget coprocessor state..
444          */
445         clear_fpu(tsk);
446         clear_used_math();
447 }
448 
449 void release_thread(struct task_struct *dead_task)
450 {
451         if (dead_task->mm) {
452                 if (dead_task->mm->context.size) {
453                         printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
454                                         dead_task->comm,
455                                         dead_task->mm->context.ldt,
456                                         dead_task->mm->context.size);
457                         BUG();
458                 }
459         }
460 }
461 
462 static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
463 {
464         struct user_desc ud = {
465                 .base_addr = addr,
466                 .limit = 0xfffff,
467                 .seg_32bit = 1,
468                 .limit_in_pages = 1,
469                 .useable = 1,
470         };
471         struct desc_struct *desc = t->thread.tls_array;
472         desc += tls;
473         fill_ldt(desc, &ud);
474 }
475 
476 static inline u32 read_32bit_tls(struct task_struct *t, int tls)
477 {
478         return get_desc_base(&t->thread.tls_array[tls]);
479 }
480 
481 /*
482  * This gets called before we allocate a new thread and copy
483  * the current task into it.
484  */
485 void prepare_to_copy(struct task_struct *tsk)
486 {
487         unlazy_fpu(tsk);
488 }
489 
490 int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
491                 unsigned long unused,
492         struct task_struct * p, struct pt_regs * regs)
493 {
494         int err;
495         struct pt_regs * childregs;
496         struct task_struct *me = current;
497 
498         childregs = ((struct pt_regs *)
499                         (THREAD_SIZE + task_stack_page(p))) - 1;
500         *childregs = *regs;
501 
502         childregs->ax = 0;
503         childregs->sp = sp;
504         if (sp == ~0UL)
505                 childregs->sp = (unsigned long)childregs;
506 
507         p->thread.sp = (unsigned long) childregs;
508         p->thread.sp0 = (unsigned long) (childregs+1);
509         p->thread.usersp = me->thread.usersp;
510 
511         set_tsk_thread_flag(p, TIF_FORK);
512 
513         p->thread.fs = me->thread.fs;
514         p->thread.gs = me->thread.gs;
515 
516         asm("mov %%gs,%0" : "=m" (p->thread.gsindex));
517         asm("mov %%fs,%0" : "=m" (p->thread.fsindex));
518         asm("mov %%es,%0" : "=m" (p->thread.es));
519         asm("mov %%ds,%0" : "=m" (p->thread.ds));
520 
521         if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
522                 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
523                 if (!p->thread.io_bitmap_ptr) {
524                         p->thread.io_bitmap_max = 0;
525                         return -ENOMEM;
526                 }
527                 memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
528                                 IO_BITMAP_BYTES);
529                 set_tsk_thread_flag(p, TIF_IO_BITMAP);
530         }
531 
532         /*
533          * Set a new TLS for the child thread?
534          */
535         if (clone_flags & CLONE_SETTLS) {
536 #ifdef CONFIG_IA32_EMULATION
537                 if (test_thread_flag(TIF_IA32))
538                         err = do_set_thread_area(p, -1,
539                                 (struct user_desc __user *)childregs->si, 0);
540                 else                    
541 #endif   
542                         err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8); 
543                 if (err) 
544                         goto out;
545         }
546         err = 0;
547 out:
548         if (err && p->thread.io_bitmap_ptr) {
549                 kfree(p->thread.io_bitmap_ptr);
550                 p->thread.io_bitmap_max = 0;
551         }
552         return err;
553 }
554 
555 /*
556  * This special macro can be used to load a debugging register
557  */
558 #define loaddebug(thread, r) set_debugreg(thread->debugreg ## r, r)
559 
560 static inline void __switch_to_xtra(struct task_struct *prev_p,
561                                     struct task_struct *next_p,
562                                     struct tss_struct *tss)
563 {
564         struct thread_struct *prev, *next;
565         unsigned long debugctl;
566 
567         prev = &prev_p->thread,
568         next = &next_p->thread;
569 
570         debugctl = prev->debugctlmsr;
571         if (next->ds_area_msr != prev->ds_area_msr) {
572                 /* we clear debugctl to make sure DS
573                  * is not in use when we change it */
574                 debugctl = 0;
575                 wrmsrl(MSR_IA32_DEBUGCTLMSR, 0);
576                 wrmsrl(MSR_IA32_DS_AREA, next->ds_area_msr);
577         }
578 
579         if (next->debugctlmsr != debugctl)
580                 wrmsrl(MSR_IA32_DEBUGCTLMSR, next->debugctlmsr);
581 
582         if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
583                 loaddebug(next, 0);
584                 loaddebug(next, 1);
585                 loaddebug(next, 2);
586                 loaddebug(next, 3);
587                 /* no 4 and 5 */
588                 loaddebug(next, 6);
589                 loaddebug(next, 7);
590         }
591 
592         if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
593                 /*
594                  * Copy the relevant range of the IO bitmap.
595                  * Normally this is 128 bytes or less:
596                  */
597                 memcpy(tss->io_bitmap, next->io_bitmap_ptr,
598                        max(prev->io_bitmap_max, next->io_bitmap_max));
599         } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
600                 /*
601                  * Clear any possible leftover bits:
602                  */
603                 memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
604         }
605 
606 #ifdef X86_BTS
607         if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
608                 ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
609 
610         if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
611                 ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
612 #endif
613 }
614 
615 /*
616  *      switch_to(x,y) should switch tasks from x to y.
617  *
618  * This could still be optimized:
619  * - fold all the options into a flag word and test it with a single test.
620  * - could test fs/gs bitsliced
621  *
622  * Kprobes not supported here. Set the probe on schedule instead.
623  */
624 struct task_struct *
625 __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
626 {
627         struct thread_struct *prev = &prev_p->thread,
628                                  *next = &next_p->thread;
629         int cpu = smp_processor_id();
630         struct tss_struct *tss = &per_cpu(init_tss, cpu);
631 
632         /* we're going to use this soon, after a few expensive things */
633         if (next_p->fpu_counter>5)
634                 prefetch(&next->i387.fxsave);
635 
636         /*
637          * Reload esp0, LDT and the page table pointer:
638          */
639         load_sp0(tss, next);
640 
641         /* 
642          * Switch DS and ES.
643          * This won't pick up thread selector changes, but I guess that is ok.
644          */
645         asm volatile("mov %%es,%0" : "=m" (prev->es));
646         if (unlikely(next->es | prev->es))
647                 loadsegment(es, next->es); 
648         
649         asm volatile ("mov %%ds,%0" : "=m" (prev->ds));
650         if (unlikely(next->ds | prev->ds))
651                 loadsegment(ds, next->ds);
652 
653         load_TLS(next, cpu);
654 
655         /* 
656          * Switch FS and GS.
657          */
658         { 
659                 unsigned fsindex;
660                 asm volatile("movl %%fs,%0" : "=r" (fsindex)); 
661                 /* segment register != 0 always requires a reload. 
662                    also reload when it has changed. 
663                    when prev process used 64bit base always reload
664                    to avoid an information leak. */
665                 if (unlikely(fsindex | next->fsindex | prev->fs)) {
666                         loadsegment(fs, next->fsindex);
667                         /* check if the user used a selector != 0
668                          * if yes clear 64bit base, since overloaded base
669                          * is always mapped to the Null selector
670                          */
671                         if (fsindex)
672                         prev->fs = 0;                           
673                 }
674                 /* when next process has a 64bit base use it */
675                 if (next->fs) 
676                         wrmsrl(MSR_FS_BASE, next->fs); 
677                 prev->fsindex = fsindex;
678         }
679         { 
680                 unsigned gsindex;
681                 asm volatile("movl %%gs,%0" : "=r" (gsindex)); 
682                 if (unlikely(gsindex | next->gsindex | prev->gs)) {
683                         load_gs_index(next->gsindex);
684                         if (gsindex)
685                         prev->gs = 0;                           
686                 }
687                 if (next->gs)
688                         wrmsrl(MSR_KERNEL_GS_BASE, next->gs); 
689                 prev->gsindex = gsindex;
690         }
691 
692         /* Must be after DS reload */
693         unlazy_fpu(prev_p);
694 
695         /* 
696          * Switch the PDA and FPU contexts.
697          */
698         prev->usersp = read_pda(oldrsp);
699         write_pda(oldrsp, next->usersp);
700         write_pda(pcurrent, next_p); 
701 
702         write_pda(kernelstack,
703         (unsigned long)task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET);
704 #ifdef CONFIG_CC_STACKPROTECTOR
705         write_pda(stack_canary, next_p->stack_canary);
706         /*
707          * Build time only check to make sure the stack_canary is at
708          * offset 40 in the pda; this is a gcc ABI requirement
709          */
710         BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40);
711 #endif
712 
713         /*
714          * Now maybe reload the debug registers and handle I/O bitmaps
715          */
716         if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT ||
717                      task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
718                 __switch_to_xtra(prev_p, next_p, tss);
719 
720         /* If the task has used fpu the last 5 timeslices, just do a full
721          * restore of the math state immediately to avoid the trap; the
722          * chances of needing FPU soon are obviously high now
723          *
724          * tsk_used_math() checks prevent calling math_state_restore(),
725          * which can sleep in the case of !tsk_used_math()
726          */
727         if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
728                 math_state_restore();
729         return prev_p;
730 }
731 
732 /*
733  * sys_execve() executes a new program.
734  */
735 asmlinkage
736 long sys_execve(char __user *name, char __user * __user *argv,
737                 char __user * __user *envp, struct pt_regs *regs)
738 {
739         long error;
740         char * filename;
741 
742         filename = getname(name);
743         error = PTR_ERR(filename);
744         if (IS_ERR(filename))
745                 return error;
746         error = do_execve(filename, argv, envp, regs);
747         putname(filename);
748         return error;
749 }
750 
751 void set_personality_64bit(void)
752 {
753         /* inherit personality from parent */
754 
755         /* Make sure to be in 64bit mode */
756         clear_thread_flag(TIF_IA32);
757 
758         /* TBD: overwrites user setup. Should have two bits.
759            But 64bit processes have always behaved this way,
760            so it's not too bad. The main problem is just that
761            32bit childs are affected again. */
762         current->personality &= ~READ_IMPLIES_EXEC;
763 }
764 
765 asmlinkage long sys_fork(struct pt_regs *regs)
766 {
767         return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
768 }
769 
770 asmlinkage long
771 sys_clone(unsigned long clone_flags, unsigned long newsp,
772           void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
773 {
774         if (!newsp)
775                 newsp = regs->sp;
776         return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
777 }
778 
779 /*
780  * This is trivial, and on the face of it looks like it
781  * could equally well be done in user mode.
782  *
783  * Not so, for quite unobvious reasons - register pressure.
784  * In user mode vfork() cannot have a stack frame, and if
785  * done by calling the "clone()" system call directly, you
786  * do not have enough call-clobbered registers to hold all
787  * the information you need.
788  */
789 asmlinkage long sys_vfork(struct pt_regs *regs)
790 {
791         return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0,
792                     NULL, NULL);
793 }
794 
795 unsigned long get_wchan(struct task_struct *p)
796 {
797         unsigned long stack;
798         u64 fp,ip;
799         int count = 0;
800 
801         if (!p || p == current || p->state==TASK_RUNNING)
802                 return 0; 
803         stack = (unsigned long)task_stack_page(p);
804         if (p->thread.sp < stack || p->thread.sp > stack+THREAD_SIZE)
805                 return 0;
806         fp = *(u64 *)(p->thread.sp);
807         do { 
808                 if (fp < (unsigned long)stack ||
809                     fp > (unsigned long)stack+THREAD_SIZE)
810                         return 0; 
811                 ip = *(u64 *)(fp+8);
812                 if (!in_sched_functions(ip))
813                         return ip;
814                 fp = *(u64 *)fp; 
815         } while (count++ < 16); 
816         return 0;
817 }
818 
819 long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
820 { 
821         int ret = 0; 
822         int doit = task == current;
823         int cpu;
824 
825         switch (code) { 
826         case ARCH_SET_GS:
827                 if (addr >= TASK_SIZE_OF(task))
828                         return -EPERM; 
829                 cpu = get_cpu();
830                 /* handle small bases via the GDT because that's faster to 
831                    switch. */
832                 if (addr <= 0xffffffff) {  
833                         set_32bit_tls(task, GS_TLS, addr); 
834                         if (doit) { 
835                                 load_TLS(&task->thread, cpu);
836                                 load_gs_index(GS_TLS_SEL); 
837                         }
838                         task->thread.gsindex = GS_TLS_SEL; 
839                         task->thread.gs = 0;
840                 } else { 
841                         task->thread.gsindex = 0;
842                         task->thread.gs = addr;
843                         if (doit) {
844                                 load_gs_index(0);
845                                 ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
846                         } 
847                 }
848                 put_cpu();
849                 break;
850         case ARCH_SET_FS:
851                 /* Not strictly needed for fs, but do it for symmetry
852                    with gs */
853                 if (addr >= TASK_SIZE_OF(task))
854                         return -EPERM;
855                 cpu = get_cpu();
856                 /* handle small bases via the GDT because that's faster to
857                    switch. */
858                 if (addr <= 0xffffffff) {
859                         set_32bit_tls(task, FS_TLS, addr);
860                         if (doit) {
861                                 load_TLS(&task->thread, cpu);
862                                 asm volatile("movl %0,%%fs" :: "r"(FS_TLS_SEL));
863                         }
864                         task->thread.fsindex = FS_TLS_SEL;
865                         task->thread.fs = 0;
866                 } else {
867                         task->thread.fsindex = 0;
868                         task->thread.fs = addr;
869                         if (doit) {
870                                 /* set the selector to 0 to not confuse
871                                    __switch_to */
872                                 asm volatile("movl %0,%%fs" :: "r" (0));
873                                 ret = checking_wrmsrl(MSR_FS_BASE, addr);
874                         }
875                 }
876                 put_cpu();
877                 break;
878         case ARCH_GET_FS: {
879                 unsigned long base;
880                 if (task->thread.fsindex == FS_TLS_SEL)
881                         base = read_32bit_tls(task, FS_TLS);
882                 else if (doit)
883                         rdmsrl(MSR_FS_BASE, base);
884                 else
885                         base = task->thread.fs;
886                 ret = put_user(base, (unsigned long __user *)addr);
887                 break;
888         }
889         case ARCH_GET_GS: {
890                 unsigned long base;
891                 unsigned gsindex;
892                 if (task->thread.gsindex == GS_TLS_SEL)
893                         base = read_32bit_tls(task, GS_TLS);
894                 else if (doit) {
895                         asm("movl %%gs,%0" : "=r" (gsindex));
896                         if (gsindex)
897                                 rdmsrl(MSR_KERNEL_GS_BASE, base);
898                         else
899                                 base = task->thread.gs;
900                 }
901                 else
902                         base = task->thread.gs;
903                 ret = put_user(base, (unsigned long __user *)addr);
904                 break;
905         }
906 
907         default:
908                 ret = -EINVAL;
909                 break;
910         }
911 
912         return ret;
913 }
914 
915 long sys_arch_prctl(int code, unsigned long addr)
916 {
917         return do_arch_prctl(current, code, addr);
918 }
919 
920 unsigned long arch_align_stack(unsigned long sp)
921 {
922         if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
923                 sp -= get_random_int() % 8192;
924         return sp & ~0xf;
925 }
926 
927 unsigned long arch_randomize_brk(struct mm_struct *mm)
928 {
929         unsigned long range_end = mm->brk + 0x02000000;
930         return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
931 }
932 
  This page was automatically generated by the LXR engine.