1 /*
2 * Copyright (C) 1995 Linus Torvalds
3 *
4 * Pentium III FXSR, SSE support
5 * Gareth Hughes <gareth@valinux.com>, May 2000
6 *
7 * X86-64 port
8 * Andi Kleen.
9 *
10 * CPU hotplug support - ashok.raj@intel.com
11 */
12
13 /*
14 * This file handles the architecture-dependent parts of process handling..
15 */
16
17 #include <stdarg.h>
18
19 #include <linux/cpu.h>
20 #include <linux/errno.h>
21 #include <linux/sched.h>
22 #include <linux/fs.h>
23 #include <linux/kernel.h>
24 #include <linux/mm.h>
25 #include <linux/elfcore.h>
26 #include <linux/smp.h>
27 #include <linux/slab.h>
28 #include <linux/user.h>
29 #include <linux/interrupt.h>
30 #include <linux/utsname.h>
31 #include <linux/delay.h>
32 #include <linux/module.h>
33 #include <linux/ptrace.h>
34 #include <linux/random.h>
35 #include <linux/notifier.h>
36 #include <linux/kprobes.h>
37 #include <linux/kdebug.h>
38 #include <linux/tick.h>
39
40 #include <asm/uaccess.h>
41 #include <asm/pgtable.h>
42 #include <asm/system.h>
43 #include <asm/io.h>
44 #include <asm/processor.h>
45 #include <asm/i387.h>
46 #include <asm/mmu_context.h>
47 #include <asm/pda.h>
48 #include <asm/prctl.h>
49 #include <asm/desc.h>
50 #include <asm/proto.h>
51 #include <asm/ia32.h>
52 #include <asm/idle.h>
53
54 asmlinkage extern void ret_from_fork(void);
55
56 unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
57
58 unsigned long boot_option_idle_override = 0;
59 EXPORT_SYMBOL(boot_option_idle_override);
60
61 /*
62 * Powermanagement idle function, if any..
63 */
64 void (*pm_idle)(void);
65 EXPORT_SYMBOL(pm_idle);
66
67 static ATOMIC_NOTIFIER_HEAD(idle_notifier);
68
69 void idle_notifier_register(struct notifier_block *n)
70 {
71 atomic_notifier_chain_register(&idle_notifier, n);
72 }
73
74 void enter_idle(void)
75 {
76 write_pda(isidle, 1);
77 atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
78 }
79
80 static void __exit_idle(void)
81 {
82 if (test_and_clear_bit_pda(0, isidle) == 0)
83 return;
84 atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
85 }
86
87 /* Called from interrupts to signify idle end */
88 void exit_idle(void)
89 {
90 /* idle loop has pid 0 */
91 if (current->pid)
92 return;
93 __exit_idle();
94 }
95
96 /*
97 * We use this if we don't have any better
98 * idle routine..
99 */
100 void default_idle(void)
101 {
102 current_thread_info()->status &= ~TS_POLLING;
103 /*
104 * TS_POLLING-cleared state must be visible before we
105 * test NEED_RESCHED:
106 */
107 smp_mb();
108 local_irq_disable();
109 if (!need_resched() && !need_resched_delayed()) {
110 ktime_t t0, t1;
111 u64 t0n, t1n;
112
113 t0 = ktime_get();
114 t0n = ktime_to_ns(t0);
115 safe_halt(); /* enables interrupts racelessly */
116 local_irq_disable();
117 t1 = ktime_get();
118 t1n = ktime_to_ns(t1);
119 sched_clock_idle_wakeup_event(t1n - t0n);
120 }
121 local_irq_enable();
122 current_thread_info()->status |= TS_POLLING;
123 }
124
125 /*
126 * On SMP it's slightly faster (but much more power-consuming!)
127 * to poll the ->need_resched flag instead of waiting for the
128 * cross-CPU IPI to arrive. Use this option with caution.
129 */
130 static void poll_idle(void)
131 {
132 local_irq_enable();
133 cpu_relax();
134 }
135
136 #ifdef CONFIG_HOTPLUG_CPU
137 DECLARE_PER_CPU(int, cpu_state);
138
139 #include <asm/nmi.h>
140 /* We halt the CPU with physical CPU hotplug */
141 static inline void play_dead(void)
142 {
143 idle_task_exit();
144 wbinvd();
145 mb();
146 /* Ack it */
147 __get_cpu_var(cpu_state) = CPU_DEAD;
148
149 local_irq_disable();
150 while (1)
151 halt();
152 }
153 #else
154 static inline void play_dead(void)
155 {
156 BUG();
157 }
158 #endif /* CONFIG_HOTPLUG_CPU */
159
160 /*
161 * The idle thread. There's no useful work to be
162 * done, so just try to conserve power and have a
163 * low exit latency (ie sit in a loop waiting for
164 * somebody to say that they'd like to reschedule)
165 */
166 void cpu_idle(void)
167 {
168 current_thread_info()->status |= TS_POLLING;
169 /* endless idle loop with no priority at all */
170 while (1) {
171 tick_nohz_stop_sched_tick();
172 while (!need_resched() && !need_resched_delayed()) {
173 void (*idle)(void);
174
175 rmb();
176 idle = pm_idle;
177 if (!idle)
178 idle = default_idle;
179 if (cpu_is_offline(smp_processor_id()))
180 play_dead();
181 /*
182 * Idle routines should keep interrupts disabled
183 * from here on, until they go to idle.
184 * Otherwise, idle callbacks can misfire.
185 */
186 local_irq_disable();
187 enter_idle();
188 /* Don't trace irqs off for idle */
189 stop_critical_timings();
190 idle();
191 start_critical_timings();
192 /* In many cases the interrupt that ended idle
193 has already called exit_idle. But some idle
194 loops can be woken up without interrupt. */
195 __exit_idle();
196 }
197
198 tick_nohz_restart_sched_tick();
199 local_irq_disable();
200 __preempt_enable_no_resched();
201 __schedule();
202 preempt_disable();
203 local_irq_enable();
204 }
205 }
206
207 static void do_nothing(void *unused)
208 {
209 }
210
211 /*
212 * cpu_idle_wait - Used to ensure that all the CPUs discard old value of
213 * pm_idle and update to new pm_idle value. Required while changing pm_idle
214 * handler on SMP systems.
215 *
216 * Caller must have changed pm_idle to the new value before the call. Old
217 * pm_idle value will not be used by any CPU after the return of this function.
218 */
219 void cpu_idle_wait(void)
220 {
221 smp_mb();
222 /* kick all the CPUs so that they exit out of pm_idle */
223 smp_call_function(do_nothing, NULL, 0, 1);
224 }
225 EXPORT_SYMBOL_GPL(cpu_idle_wait);
226
227 /*
228 * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
229 * which can obviate IPI to trigger checking of need_resched.
230 * We execute MONITOR against need_resched and enter optimized wait state
231 * through MWAIT. Whenever someone changes need_resched, we would be woken
232 * up from MWAIT (without an IPI).
233 *
234 * New with Core Duo processors, MWAIT can take some hints based on CPU
235 * capability.
236 */
237 void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
238 {
239 if (!need_resched() && !need_resched_delayed()) {
240 __monitor((void *)¤t_thread_info()->flags, 0, 0);
241 smp_mb();
242 if (!need_resched() && !need_resched_delayed())
243 __mwait(ax, cx);
244 }
245 }
246
247 /* Default MONITOR/MWAIT with no hints, used for default C1 state */
248 static void mwait_idle(void)
249 {
250 if (!need_resched() && !need_resched_delayed()) {
251 __monitor((void *)¤t_thread_info()->flags, 0, 0);
252 smp_mb();
253 if (!need_resched() && !need_resched_delayed())
254 __sti_mwait(0, 0);
255 else
256 local_irq_enable();
257 } else {
258 local_irq_enable();
259 }
260 }
261
262 /*
263 * mwait selection logic:
264 *
265 * It depends on the CPU. For AMD CPUs that support MWAIT this is
266 * wrong. Family 0x10 and 0x11 CPUs will enter C1 on HLT. Powersavings
267 * then depend on a clock divisor and current Pstate of the core. If
268 * all cores of a processor are in halt state (C1) the processor can
269 * enter the C1E (C1 enhanced) state. If mwait is used this will never
270 * happen.
271 *
272 * idle=mwait overrides this decision and forces the usage of mwait.
273 */
274 static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c)
275 {
276 if (force_mwait)
277 return 1;
278
279 if (c->x86_vendor == X86_VENDOR_AMD) {
280 switch(c->x86) {
281 case 0x10:
282 case 0x11:
283 return 0;
284 }
285 }
286 return 1;
287 }
288
289 void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
290 {
291 static int selected;
292
293 if (selected)
294 return;
295 #ifdef CONFIG_X86_SMP
296 if (pm_idle == poll_idle && smp_num_siblings > 1) {
297 printk(KERN_WARNING "WARNING: polling idle and HT enabled,"
298 " performance may degrade.\n");
299 }
300 #endif
301 if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) {
302 /*
303 * Skip, if setup has overridden idle.
304 * One CPU supports mwait => All CPUs supports mwait
305 */
306 if (!pm_idle) {
307 printk(KERN_INFO "using mwait in idle threads.\n");
308 pm_idle = mwait_idle;
309 }
310 }
311 selected = 1;
312 }
313
314 static int __init idle_setup(char *str)
315 {
316 if (!strcmp(str, "poll")) {
317 printk("using polling idle threads.\n");
318 pm_idle = poll_idle;
319 } else if (!strcmp(str, "mwait"))
320 force_mwait = 1;
321 else
322 return -1;
323
324 boot_option_idle_override = 1;
325 return 0;
326 }
327 early_param("idle", idle_setup);
328
329 /* Prints also some state that isn't saved in the pt_regs */
330 void __show_regs(struct pt_regs * regs)
331 {
332 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
333 unsigned long d0, d1, d2, d3, d6, d7;
334 unsigned int fsindex, gsindex;
335 unsigned int ds, cs, es;
336
337 printk("\n");
338 print_modules();
339 printk("Pid: %d, comm: %.20s %s %s %.*s\n",
340 current->pid, current->comm, print_tainted(),
341 init_utsname()->release,
342 (int)strcspn(init_utsname()->version, " "),
343 init_utsname()->version);
344 printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
345 printk_address(regs->ip, 1);
346 printk("RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->sp,
347 regs->flags);
348 printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
349 regs->ax, regs->bx, regs->cx);
350 printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
351 regs->dx, regs->si, regs->di);
352 printk("RBP: %016lx R08: %016lx R09: %016lx\n",
353 regs->bp, regs->r8, regs->r9);
354 printk("R10: %016lx R11: %016lx R12: %016lx\n",
355 regs->r10, regs->r11, regs->r12);
356 printk("R13: %016lx R14: %016lx R15: %016lx\n",
357 regs->r13, regs->r14, regs->r15);
358
359 asm("movl %%ds,%0" : "=r" (ds));
360 asm("movl %%cs,%0" : "=r" (cs));
361 asm("movl %%es,%0" : "=r" (es));
362 asm("movl %%fs,%0" : "=r" (fsindex));
363 asm("movl %%gs,%0" : "=r" (gsindex));
364
365 rdmsrl(MSR_FS_BASE, fs);
366 rdmsrl(MSR_GS_BASE, gs);
367 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
368
369 cr0 = read_cr0();
370 cr2 = read_cr2();
371 cr3 = read_cr3();
372 cr4 = read_cr4();
373
374 printk("FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
375 fs,fsindex,gs,gsindex,shadowgs);
376 printk("CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0);
377 printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4);
378
379 get_debugreg(d0, 0);
380 get_debugreg(d1, 1);
381 get_debugreg(d2, 2);
382 printk("DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
383 get_debugreg(d3, 3);
384 get_debugreg(d6, 6);
385 get_debugreg(d7, 7);
386 printk("DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
387 }
388
389 void show_regs(struct pt_regs *regs)
390 {
391 printk("CPU %d:", smp_processor_id());
392 __show_regs(regs);
393 show_trace(NULL, regs, (void *)(regs + 1), regs->bp);
394 }
395
396 /*
397 * Free current thread data structures etc..
398 */
399 void exit_thread(void)
400 {
401 struct task_struct *me = current;
402 struct thread_struct *t = &me->thread;
403
404 if (me->thread.io_bitmap_ptr) {
405 struct tss_struct *tss;
406
407 kfree(t->io_bitmap_ptr);
408 t->io_bitmap_ptr = NULL;
409 clear_thread_flag(TIF_IO_BITMAP);
410 /*
411 * Careful, clear this in the TSS too:
412 */
413 tss = &per_cpu(init_tss, get_cpu());
414 memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
415 t->io_bitmap_max = 0;
416 put_cpu();
417 }
418 }
419
420 void flush_thread(void)
421 {
422 struct task_struct *tsk = current;
423
424 if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) {
425 clear_tsk_thread_flag(tsk, TIF_ABI_PENDING);
426 if (test_tsk_thread_flag(tsk, TIF_IA32)) {
427 clear_tsk_thread_flag(tsk, TIF_IA32);
428 } else {
429 set_tsk_thread_flag(tsk, TIF_IA32);
430 current_thread_info()->status |= TS_COMPAT;
431 }
432 }
433 clear_tsk_thread_flag(tsk, TIF_DEBUG);
434
435 tsk->thread.debugreg0 = 0;
436 tsk->thread.debugreg1 = 0;
437 tsk->thread.debugreg2 = 0;
438 tsk->thread.debugreg3 = 0;
439 tsk->thread.debugreg6 = 0;
440 tsk->thread.debugreg7 = 0;
441 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
442 /*
443 * Forget coprocessor state..
444 */
445 clear_fpu(tsk);
446 clear_used_math();
447 }
448
449 void release_thread(struct task_struct *dead_task)
450 {
451 if (dead_task->mm) {
452 if (dead_task->mm->context.size) {
453 printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
454 dead_task->comm,
455 dead_task->mm->context.ldt,
456 dead_task->mm->context.size);
457 BUG();
458 }
459 }
460 }
461
462 static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
463 {
464 struct user_desc ud = {
465 .base_addr = addr,
466 .limit = 0xfffff,
467 .seg_32bit = 1,
468 .limit_in_pages = 1,
469 .useable = 1,
470 };
471 struct desc_struct *desc = t->thread.tls_array;
472 desc += tls;
473 fill_ldt(desc, &ud);
474 }
475
476 static inline u32 read_32bit_tls(struct task_struct *t, int tls)
477 {
478 return get_desc_base(&t->thread.tls_array[tls]);
479 }
480
481 /*
482 * This gets called before we allocate a new thread and copy
483 * the current task into it.
484 */
485 void prepare_to_copy(struct task_struct *tsk)
486 {
487 unlazy_fpu(tsk);
488 }
489
490 int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
491 unsigned long unused,
492 struct task_struct * p, struct pt_regs * regs)
493 {
494 int err;
495 struct pt_regs * childregs;
496 struct task_struct *me = current;
497
498 childregs = ((struct pt_regs *)
499 (THREAD_SIZE + task_stack_page(p))) - 1;
500 *childregs = *regs;
501
502 childregs->ax = 0;
503 childregs->sp = sp;
504 if (sp == ~0UL)
505 childregs->sp = (unsigned long)childregs;
506
507 p->thread.sp = (unsigned long) childregs;
508 p->thread.sp0 = (unsigned long) (childregs+1);
509 p->thread.usersp = me->thread.usersp;
510
511 set_tsk_thread_flag(p, TIF_FORK);
512
513 p->thread.fs = me->thread.fs;
514 p->thread.gs = me->thread.gs;
515
516 asm("mov %%gs,%0" : "=m" (p->thread.gsindex));
517 asm("mov %%fs,%0" : "=m" (p->thread.fsindex));
518 asm("mov %%es,%0" : "=m" (p->thread.es));
519 asm("mov %%ds,%0" : "=m" (p->thread.ds));
520
521 if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
522 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
523 if (!p->thread.io_bitmap_ptr) {
524 p->thread.io_bitmap_max = 0;
525 return -ENOMEM;
526 }
527 memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
528 IO_BITMAP_BYTES);
529 set_tsk_thread_flag(p, TIF_IO_BITMAP);
530 }
531
532 /*
533 * Set a new TLS for the child thread?
534 */
535 if (clone_flags & CLONE_SETTLS) {
536 #ifdef CONFIG_IA32_EMULATION
537 if (test_thread_flag(TIF_IA32))
538 err = do_set_thread_area(p, -1,
539 (struct user_desc __user *)childregs->si, 0);
540 else
541 #endif
542 err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
543 if (err)
544 goto out;
545 }
546 err = 0;
547 out:
548 if (err && p->thread.io_bitmap_ptr) {
549 kfree(p->thread.io_bitmap_ptr);
550 p->thread.io_bitmap_max = 0;
551 }
552 return err;
553 }
554
555 /*
556 * This special macro can be used to load a debugging register
557 */
558 #define loaddebug(thread, r) set_debugreg(thread->debugreg ## r, r)
559
560 static inline void __switch_to_xtra(struct task_struct *prev_p,
561 struct task_struct *next_p,
562 struct tss_struct *tss)
563 {
564 struct thread_struct *prev, *next;
565 unsigned long debugctl;
566
567 prev = &prev_p->thread,
568 next = &next_p->thread;
569
570 debugctl = prev->debugctlmsr;
571 if (next->ds_area_msr != prev->ds_area_msr) {
572 /* we clear debugctl to make sure DS
573 * is not in use when we change it */
574 debugctl = 0;
575 wrmsrl(MSR_IA32_DEBUGCTLMSR, 0);
576 wrmsrl(MSR_IA32_DS_AREA, next->ds_area_msr);
577 }
578
579 if (next->debugctlmsr != debugctl)
580 wrmsrl(MSR_IA32_DEBUGCTLMSR, next->debugctlmsr);
581
582 if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
583 loaddebug(next, 0);
584 loaddebug(next, 1);
585 loaddebug(next, 2);
586 loaddebug(next, 3);
587 /* no 4 and 5 */
588 loaddebug(next, 6);
589 loaddebug(next, 7);
590 }
591
592 if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
593 /*
594 * Copy the relevant range of the IO bitmap.
595 * Normally this is 128 bytes or less:
596 */
597 memcpy(tss->io_bitmap, next->io_bitmap_ptr,
598 max(prev->io_bitmap_max, next->io_bitmap_max));
599 } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
600 /*
601 * Clear any possible leftover bits:
602 */
603 memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
604 }
605
606 #ifdef X86_BTS
607 if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
608 ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
609
610 if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
611 ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
612 #endif
613 }
614
615 /*
616 * switch_to(x,y) should switch tasks from x to y.
617 *
618 * This could still be optimized:
619 * - fold all the options into a flag word and test it with a single test.
620 * - could test fs/gs bitsliced
621 *
622 * Kprobes not supported here. Set the probe on schedule instead.
623 */
624 struct task_struct *
625 __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
626 {
627 struct thread_struct *prev = &prev_p->thread,
628 *next = &next_p->thread;
629 int cpu = smp_processor_id();
630 struct tss_struct *tss = &per_cpu(init_tss, cpu);
631
632 /* we're going to use this soon, after a few expensive things */
633 if (next_p->fpu_counter>5)
634 prefetch(&next->i387.fxsave);
635
636 /*
637 * Reload esp0, LDT and the page table pointer:
638 */
639 load_sp0(tss, next);
640
641 /*
642 * Switch DS and ES.
643 * This won't pick up thread selector changes, but I guess that is ok.
644 */
645 asm volatile("mov %%es,%0" : "=m" (prev->es));
646 if (unlikely(next->es | prev->es))
647 loadsegment(es, next->es);
648
649 asm volatile ("mov %%ds,%0" : "=m" (prev->ds));
650 if (unlikely(next->ds | prev->ds))
651 loadsegment(ds, next->ds);
652
653 load_TLS(next, cpu);
654
655 /*
656 * Switch FS and GS.
657 */
658 {
659 unsigned fsindex;
660 asm volatile("movl %%fs,%0" : "=r" (fsindex));
661 /* segment register != 0 always requires a reload.
662 also reload when it has changed.
663 when prev process used 64bit base always reload
664 to avoid an information leak. */
665 if (unlikely(fsindex | next->fsindex | prev->fs)) {
666 loadsegment(fs, next->fsindex);
667 /* check if the user used a selector != 0
668 * if yes clear 64bit base, since overloaded base
669 * is always mapped to the Null selector
670 */
671 if (fsindex)
672 prev->fs = 0;
673 }
674 /* when next process has a 64bit base use it */
675 if (next->fs)
676 wrmsrl(MSR_FS_BASE, next->fs);
677 prev->fsindex = fsindex;
678 }
679 {
680 unsigned gsindex;
681 asm volatile("movl %%gs,%0" : "=r" (gsindex));
682 if (unlikely(gsindex | next->gsindex | prev->gs)) {
683 load_gs_index(next->gsindex);
684 if (gsindex)
685 prev->gs = 0;
686 }
687 if (next->gs)
688 wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
689 prev->gsindex = gsindex;
690 }
691
692 /* Must be after DS reload */
693 unlazy_fpu(prev_p);
694
695 /*
696 * Switch the PDA and FPU contexts.
697 */
698 prev->usersp = read_pda(oldrsp);
699 write_pda(oldrsp, next->usersp);
700 write_pda(pcurrent, next_p);
701
702 write_pda(kernelstack,
703 (unsigned long)task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET);
704 #ifdef CONFIG_CC_STACKPROTECTOR
705 write_pda(stack_canary, next_p->stack_canary);
706 /*
707 * Build time only check to make sure the stack_canary is at
708 * offset 40 in the pda; this is a gcc ABI requirement
709 */
710 BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40);
711 #endif
712
713 /*
714 * Now maybe reload the debug registers and handle I/O bitmaps
715 */
716 if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT ||
717 task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
718 __switch_to_xtra(prev_p, next_p, tss);
719
720 /* If the task has used fpu the last 5 timeslices, just do a full
721 * restore of the math state immediately to avoid the trap; the
722 * chances of needing FPU soon are obviously high now
723 *
724 * tsk_used_math() checks prevent calling math_state_restore(),
725 * which can sleep in the case of !tsk_used_math()
726 */
727 if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
728 math_state_restore();
729 return prev_p;
730 }
731
732 /*
733 * sys_execve() executes a new program.
734 */
735 asmlinkage
736 long sys_execve(char __user *name, char __user * __user *argv,
737 char __user * __user *envp, struct pt_regs *regs)
738 {
739 long error;
740 char * filename;
741
742 filename = getname(name);
743 error = PTR_ERR(filename);
744 if (IS_ERR(filename))
745 return error;
746 error = do_execve(filename, argv, envp, regs);
747 putname(filename);
748 return error;
749 }
750
751 void set_personality_64bit(void)
752 {
753 /* inherit personality from parent */
754
755 /* Make sure to be in 64bit mode */
756 clear_thread_flag(TIF_IA32);
757
758 /* TBD: overwrites user setup. Should have two bits.
759 But 64bit processes have always behaved this way,
760 so it's not too bad. The main problem is just that
761 32bit childs are affected again. */
762 current->personality &= ~READ_IMPLIES_EXEC;
763 }
764
765 asmlinkage long sys_fork(struct pt_regs *regs)
766 {
767 return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
768 }
769
770 asmlinkage long
771 sys_clone(unsigned long clone_flags, unsigned long newsp,
772 void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
773 {
774 if (!newsp)
775 newsp = regs->sp;
776 return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
777 }
778
779 /*
780 * This is trivial, and on the face of it looks like it
781 * could equally well be done in user mode.
782 *
783 * Not so, for quite unobvious reasons - register pressure.
784 * In user mode vfork() cannot have a stack frame, and if
785 * done by calling the "clone()" system call directly, you
786 * do not have enough call-clobbered registers to hold all
787 * the information you need.
788 */
789 asmlinkage long sys_vfork(struct pt_regs *regs)
790 {
791 return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0,
792 NULL, NULL);
793 }
794
795 unsigned long get_wchan(struct task_struct *p)
796 {
797 unsigned long stack;
798 u64 fp,ip;
799 int count = 0;
800
801 if (!p || p == current || p->state==TASK_RUNNING)
802 return 0;
803 stack = (unsigned long)task_stack_page(p);
804 if (p->thread.sp < stack || p->thread.sp > stack+THREAD_SIZE)
805 return 0;
806 fp = *(u64 *)(p->thread.sp);
807 do {
808 if (fp < (unsigned long)stack ||
809 fp > (unsigned long)stack+THREAD_SIZE)
810 return 0;
811 ip = *(u64 *)(fp+8);
812 if (!in_sched_functions(ip))
813 return ip;
814 fp = *(u64 *)fp;
815 } while (count++ < 16);
816 return 0;
817 }
818
819 long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
820 {
821 int ret = 0;
822 int doit = task == current;
823 int cpu;
824
825 switch (code) {
826 case ARCH_SET_GS:
827 if (addr >= TASK_SIZE_OF(task))
828 return -EPERM;
829 cpu = get_cpu();
830 /* handle small bases via the GDT because that's faster to
831 switch. */
832 if (addr <= 0xffffffff) {
833 set_32bit_tls(task, GS_TLS, addr);
834 if (doit) {
835 load_TLS(&task->thread, cpu);
836 load_gs_index(GS_TLS_SEL);
837 }
838 task->thread.gsindex = GS_TLS_SEL;
839 task->thread.gs = 0;
840 } else {
841 task->thread.gsindex = 0;
842 task->thread.gs = addr;
843 if (doit) {
844 load_gs_index(0);
845 ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
846 }
847 }
848 put_cpu();
849 break;
850 case ARCH_SET_FS:
851 /* Not strictly needed for fs, but do it for symmetry
852 with gs */
853 if (addr >= TASK_SIZE_OF(task))
854 return -EPERM;
855 cpu = get_cpu();
856 /* handle small bases via the GDT because that's faster to
857 switch. */
858 if (addr <= 0xffffffff) {
859 set_32bit_tls(task, FS_TLS, addr);
860 if (doit) {
861 load_TLS(&task->thread, cpu);
862 asm volatile("movl %0,%%fs" :: "r"(FS_TLS_SEL));
863 }
864 task->thread.fsindex = FS_TLS_SEL;
865 task->thread.fs = 0;
866 } else {
867 task->thread.fsindex = 0;
868 task->thread.fs = addr;
869 if (doit) {
870 /* set the selector to 0 to not confuse
871 __switch_to */
872 asm volatile("movl %0,%%fs" :: "r" (0));
873 ret = checking_wrmsrl(MSR_FS_BASE, addr);
874 }
875 }
876 put_cpu();
877 break;
878 case ARCH_GET_FS: {
879 unsigned long base;
880 if (task->thread.fsindex == FS_TLS_SEL)
881 base = read_32bit_tls(task, FS_TLS);
882 else if (doit)
883 rdmsrl(MSR_FS_BASE, base);
884 else
885 base = task->thread.fs;
886 ret = put_user(base, (unsigned long __user *)addr);
887 break;
888 }
889 case ARCH_GET_GS: {
890 unsigned long base;
891 unsigned gsindex;
892 if (task->thread.gsindex == GS_TLS_SEL)
893 base = read_32bit_tls(task, GS_TLS);
894 else if (doit) {
895 asm("movl %%gs,%0" : "=r" (gsindex));
896 if (gsindex)
897 rdmsrl(MSR_KERNEL_GS_BASE, base);
898 else
899 base = task->thread.gs;
900 }
901 else
902 base = task->thread.gs;
903 ret = put_user(base, (unsigned long __user *)addr);
904 break;
905 }
906
907 default:
908 ret = -EINVAL;
909 break;
910 }
911
912 return ret;
913 }
914
915 long sys_arch_prctl(int code, unsigned long addr)
916 {
917 return do_arch_prctl(current, code, addr);
918 }
919
920 unsigned long arch_align_stack(unsigned long sp)
921 {
922 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
923 sp -= get_random_int() % 8192;
924 return sp & ~0xf;
925 }
926
927 unsigned long arch_randomize_brk(struct mm_struct *mm)
928 {
929 unsigned long range_end = mm->brk + 0x02000000;
930 return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
931 }
932
|
This page was automatically generated by the
LXR engine.
|