1 /*
2 * Copyright (C) 1995 Linus Torvalds
3 *
4 * Pentium III FXSR, SSE support
5 * Gareth Hughes <gareth@valinux.com>, May 2000
6 *
7 * X86-64 port
8 * Andi Kleen.
9 *
10 * CPU hotplug support - ashok.raj@intel.com
11 */
12
13 /*
14 * This file handles the architecture-dependent parts of process handling..
15 */
16
17 #include <linux/stackprotector.h>
18 #include <linux/cpu.h>
19 #include <linux/errno.h>
20 #include <linux/sched.h>
21 #include <linux/fs.h>
22 #include <linux/kernel.h>
23 #include <linux/mm.h>
24 #include <linux/elfcore.h>
25 #include <linux/smp.h>
26 #include <linux/slab.h>
27 #include <linux/user.h>
28 #include <linux/interrupt.h>
29 #include <linux/utsname.h>
30 #include <linux/delay.h>
31 #include <linux/module.h>
32 #include <linux/ptrace.h>
33 #include <linux/notifier.h>
34 #include <linux/kprobes.h>
35 #include <linux/kdebug.h>
36 #include <linux/tick.h>
37 #include <linux/prctl.h>
38 #include <linux/uaccess.h>
39 #include <linux/io.h>
40 #include <linux/ftrace.h>
41 #include <linux/dmi.h>
42
43 #include <asm/pgtable.h>
44 #include <asm/system.h>
45 #include <asm/processor.h>
46 #include <asm/i387.h>
47 #include <asm/mmu_context.h>
48 #include <asm/prctl.h>
49 #include <asm/desc.h>
50 #include <asm/proto.h>
51 #include <asm/ia32.h>
52 #include <asm/idle.h>
53 #include <asm/syscalls.h>
54 #include <asm/ds.h>
55
56 asmlinkage extern void ret_from_fork(void);
57
58 DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
59 EXPORT_PER_CPU_SYMBOL(current_task);
60
61 DEFINE_PER_CPU(unsigned long, old_rsp);
62 static DEFINE_PER_CPU(unsigned char, is_idle);
63
64 unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
65
66 static ATOMIC_NOTIFIER_HEAD(idle_notifier);
67
68 void idle_notifier_register(struct notifier_block *n)
69 {
70 atomic_notifier_chain_register(&idle_notifier, n);
71 }
72 EXPORT_SYMBOL_GPL(idle_notifier_register);
73
74 void idle_notifier_unregister(struct notifier_block *n)
75 {
76 atomic_notifier_chain_unregister(&idle_notifier, n);
77 }
78 EXPORT_SYMBOL_GPL(idle_notifier_unregister);
79
80 void enter_idle(void)
81 {
82 percpu_write(is_idle, 1);
83 atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
84 }
85
86 static void __exit_idle(void)
87 {
88 if (x86_test_and_clear_bit_percpu(0, is_idle) == 0)
89 return;
90 atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
91 }
92
93 /* Called from interrupts to signify idle end */
94 void exit_idle(void)
95 {
96 /* idle loop has pid 0 */
97 if (current->pid)
98 return;
99 __exit_idle();
100 }
101
102 #ifndef CONFIG_SMP
103 static inline void play_dead(void)
104 {
105 BUG();
106 }
107 #endif
108
109 /*
110 * The idle thread. There's no useful work to be
111 * done, so just try to conserve power and have a
112 * low exit latency (ie sit in a loop waiting for
113 * somebody to say that they'd like to reschedule)
114 */
115 void cpu_idle(void)
116 {
117 current_thread_info()->status |= TS_POLLING;
118
119 /*
120 * If we're the non-boot CPU, nothing set the stack canary up
121 * for us. CPU0 already has it initialized but no harm in
122 * doing it again. This is a good place for updating it, as
123 * we wont ever return from this function (so the invalid
124 * canaries already on the stack wont ever trigger).
125 */
126 boot_init_stack_canary();
127
128 /* endless idle loop with no priority at all */
129 while (1) {
130 tick_nohz_stop_sched_tick(1);
131 while (!need_resched()) {
132
133 rmb();
134
135 if (cpu_is_offline(smp_processor_id()))
136 play_dead();
137 /*
138 * Idle routines should keep interrupts disabled
139 * from here on, until they go to idle.
140 * Otherwise, idle callbacks can misfire.
141 */
142 local_irq_disable();
143 enter_idle();
144 /* Don't trace irqs off for idle */
145 stop_critical_timings();
146 pm_idle();
147 start_critical_timings();
148 /* In many cases the interrupt that ended idle
149 has already called exit_idle. But some idle
150 loops can be woken up without interrupt. */
151 __exit_idle();
152 }
153
154 tick_nohz_restart_sched_tick();
155 preempt_enable_no_resched();
156 schedule();
157 preempt_disable();
158 }
159 }
160
161 /* Prints also some state that isn't saved in the pt_regs */
162 void __show_regs(struct pt_regs *regs, int all)
163 {
164 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
165 unsigned long d0, d1, d2, d3, d6, d7;
166 unsigned int fsindex, gsindex;
167 unsigned int ds, cs, es;
168 const char *board;
169
170 printk("\n");
171 print_modules();
172 board = dmi_get_system_info(DMI_PRODUCT_NAME);
173 if (!board)
174 board = "";
175 printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s %s\n",
176 current->pid, current->comm, print_tainted(),
177 init_utsname()->release,
178 (int)strcspn(init_utsname()->version, " "),
179 init_utsname()->version, board);
180 printk(KERN_INFO "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
181 printk_address(regs->ip, 1);
182 printk(KERN_INFO "RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss,
183 regs->sp, regs->flags);
184 printk(KERN_INFO "RAX: %016lx RBX: %016lx RCX: %016lx\n",
185 regs->ax, regs->bx, regs->cx);
186 printk(KERN_INFO "RDX: %016lx RSI: %016lx RDI: %016lx\n",
187 regs->dx, regs->si, regs->di);
188 printk(KERN_INFO "RBP: %016lx R08: %016lx R09: %016lx\n",
189 regs->bp, regs->r8, regs->r9);
190 printk(KERN_INFO "R10: %016lx R11: %016lx R12: %016lx\n",
191 regs->r10, regs->r11, regs->r12);
192 printk(KERN_INFO "R13: %016lx R14: %016lx R15: %016lx\n",
193 regs->r13, regs->r14, regs->r15);
194
195 asm("movl %%ds,%0" : "=r" (ds));
196 asm("movl %%cs,%0" : "=r" (cs));
197 asm("movl %%es,%0" : "=r" (es));
198 asm("movl %%fs,%0" : "=r" (fsindex));
199 asm("movl %%gs,%0" : "=r" (gsindex));
200
201 rdmsrl(MSR_FS_BASE, fs);
202 rdmsrl(MSR_GS_BASE, gs);
203 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
204
205 if (!all)
206 return;
207
208 cr0 = read_cr0();
209 cr2 = read_cr2();
210 cr3 = read_cr3();
211 cr4 = read_cr4();
212
213 printk(KERN_INFO "FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
214 fs, fsindex, gs, gsindex, shadowgs);
215 printk(KERN_INFO "CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds,
216 es, cr0);
217 printk(KERN_INFO "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3,
218 cr4);
219
220 get_debugreg(d0, 0);
221 get_debugreg(d1, 1);
222 get_debugreg(d2, 2);
223 printk(KERN_INFO "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
224 get_debugreg(d3, 3);
225 get_debugreg(d6, 6);
226 get_debugreg(d7, 7);
227 printk(KERN_INFO "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
228 }
229
230 void show_regs(struct pt_regs *regs)
231 {
232 printk(KERN_INFO "CPU %d:", smp_processor_id());
233 __show_regs(regs, 1);
234 show_trace(NULL, regs, (void *)(regs + 1), regs->bp);
235 }
236
237 void release_thread(struct task_struct *dead_task)
238 {
239 if (dead_task->mm) {
240 if (dead_task->mm->context.size) {
241 printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
242 dead_task->comm,
243 dead_task->mm->context.ldt,
244 dead_task->mm->context.size);
245 BUG();
246 }
247 }
248 }
249
250 static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
251 {
252 struct user_desc ud = {
253 .base_addr = addr,
254 .limit = 0xfffff,
255 .seg_32bit = 1,
256 .limit_in_pages = 1,
257 .useable = 1,
258 };
259 struct desc_struct *desc = t->thread.tls_array;
260 desc += tls;
261 fill_ldt(desc, &ud);
262 }
263
264 static inline u32 read_32bit_tls(struct task_struct *t, int tls)
265 {
266 return get_desc_base(&t->thread.tls_array[tls]);
267 }
268
269 /*
270 * This gets called before we allocate a new thread and copy
271 * the current task into it.
272 */
273 void prepare_to_copy(struct task_struct *tsk)
274 {
275 unlazy_fpu(tsk);
276 }
277
278 int copy_thread(unsigned long clone_flags, unsigned long sp,
279 unsigned long unused,
280 struct task_struct *p, struct pt_regs *regs)
281 {
282 int err;
283 struct pt_regs *childregs;
284 struct task_struct *me = current;
285
286 childregs = ((struct pt_regs *)
287 (THREAD_SIZE + task_stack_page(p))) - 1;
288 *childregs = *regs;
289
290 childregs->ax = 0;
291 childregs->sp = sp;
292 if (sp == ~0UL)
293 childregs->sp = (unsigned long)childregs;
294
295 p->thread.sp = (unsigned long) childregs;
296 p->thread.sp0 = (unsigned long) (childregs+1);
297 p->thread.usersp = me->thread.usersp;
298
299 set_tsk_thread_flag(p, TIF_FORK);
300
301 p->thread.fs = me->thread.fs;
302 p->thread.gs = me->thread.gs;
303
304 savesegment(gs, p->thread.gsindex);
305 savesegment(fs, p->thread.fsindex);
306 savesegment(es, p->thread.es);
307 savesegment(ds, p->thread.ds);
308
309 if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
310 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
311 if (!p->thread.io_bitmap_ptr) {
312 p->thread.io_bitmap_max = 0;
313 return -ENOMEM;
314 }
315 memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
316 IO_BITMAP_BYTES);
317 set_tsk_thread_flag(p, TIF_IO_BITMAP);
318 }
319
320 /*
321 * Set a new TLS for the child thread?
322 */
323 if (clone_flags & CLONE_SETTLS) {
324 #ifdef CONFIG_IA32_EMULATION
325 if (test_thread_flag(TIF_IA32))
326 err = do_set_thread_area(p, -1,
327 (struct user_desc __user *)childregs->si, 0);
328 else
329 #endif
330 err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
331 if (err)
332 goto out;
333 }
334
335 clear_tsk_thread_flag(p, TIF_DS_AREA_MSR);
336 p->thread.ds_ctx = NULL;
337
338 clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR);
339 p->thread.debugctlmsr = 0;
340
341 err = 0;
342 out:
343 if (err && p->thread.io_bitmap_ptr) {
344 kfree(p->thread.io_bitmap_ptr);
345 p->thread.io_bitmap_max = 0;
346 }
347 return err;
348 }
349
350 void
351 start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
352 {
353 loadsegment(fs, 0);
354 loadsegment(es, 0);
355 loadsegment(ds, 0);
356 load_gs_index(0);
357 regs->ip = new_ip;
358 regs->sp = new_sp;
359 percpu_write(old_rsp, new_sp);
360 regs->cs = __USER_CS;
361 regs->ss = __USER_DS;
362 regs->flags = 0x200;
363 set_fs(USER_DS);
364 /*
365 * Free the old FP and other extended state
366 */
367 free_thread_xstate(current);
368 }
369 EXPORT_SYMBOL_GPL(start_thread);
370
371 /*
372 * switch_to(x,y) should switch tasks from x to y.
373 *
374 * This could still be optimized:
375 * - fold all the options into a flag word and test it with a single test.
376 * - could test fs/gs bitsliced
377 *
378 * Kprobes not supported here. Set the probe on schedule instead.
379 * Function graph tracer not supported too.
380 */
381 __notrace_funcgraph struct task_struct *
382 __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
383 {
384 struct thread_struct *prev = &prev_p->thread;
385 struct thread_struct *next = &next_p->thread;
386 int cpu = smp_processor_id();
387 struct tss_struct *tss = &per_cpu(init_tss, cpu);
388 unsigned fsindex, gsindex;
389
390 /* we're going to use this soon, after a few expensive things */
391 if (next_p->fpu_counter > 5)
392 prefetch(next->xstate);
393
394 /*
395 * Reload esp0, LDT and the page table pointer:
396 */
397 load_sp0(tss, next);
398
399 /*
400 * Switch DS and ES.
401 * This won't pick up thread selector changes, but I guess that is ok.
402 */
403 savesegment(es, prev->es);
404 if (unlikely(next->es | prev->es))
405 loadsegment(es, next->es);
406
407 savesegment(ds, prev->ds);
408 if (unlikely(next->ds | prev->ds))
409 loadsegment(ds, next->ds);
410
411
412 /* We must save %fs and %gs before load_TLS() because
413 * %fs and %gs may be cleared by load_TLS().
414 *
415 * (e.g. xen_load_tls())
416 */
417 savesegment(fs, fsindex);
418 savesegment(gs, gsindex);
419
420 load_TLS(next, cpu);
421
422 /*
423 * Leave lazy mode, flushing any hypercalls made here.
424 * This must be done before restoring TLS segments so
425 * the GDT and LDT are properly updated, and must be
426 * done before math_state_restore, so the TS bit is up
427 * to date.
428 */
429 arch_end_context_switch(next_p);
430
431 /*
432 * Switch FS and GS.
433 *
434 * Segment register != 0 always requires a reload. Also
435 * reload when it has changed. When prev process used 64bit
436 * base always reload to avoid an information leak.
437 */
438 if (unlikely(fsindex | next->fsindex | prev->fs)) {
439 loadsegment(fs, next->fsindex);
440 /*
441 * Check if the user used a selector != 0; if yes
442 * clear 64bit base, since overloaded base is always
443 * mapped to the Null selector
444 */
445 if (fsindex)
446 prev->fs = 0;
447 }
448 /* when next process has a 64bit base use it */
449 if (next->fs)
450 wrmsrl(MSR_FS_BASE, next->fs);
451 prev->fsindex = fsindex;
452
453 if (unlikely(gsindex | next->gsindex | prev->gs)) {
454 load_gs_index(next->gsindex);
455 if (gsindex)
456 prev->gs = 0;
457 }
458 if (next->gs)
459 wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
460 prev->gsindex = gsindex;
461
462 /* Must be after DS reload */
463 unlazy_fpu(prev_p);
464
465 /*
466 * Switch the PDA and FPU contexts.
467 */
468 prev->usersp = percpu_read(old_rsp);
469 percpu_write(old_rsp, next->usersp);
470 percpu_write(current_task, next_p);
471
472 percpu_write(kernel_stack,
473 (unsigned long)task_stack_page(next_p) +
474 THREAD_SIZE - KERNEL_STACK_OFFSET);
475
476 /*
477 * Now maybe reload the debug registers and handle I/O bitmaps
478 */
479 if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT ||
480 task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
481 __switch_to_xtra(prev_p, next_p, tss);
482
483 /* If the task has used fpu the last 5 timeslices, just do a full
484 * restore of the math state immediately to avoid the trap; the
485 * chances of needing FPU soon are obviously high now
486 *
487 * tsk_used_math() checks prevent calling math_state_restore(),
488 * which can sleep in the case of !tsk_used_math()
489 */
490 if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
491 math_state_restore();
492 return prev_p;
493 }
494
495 /*
496 * sys_execve() executes a new program.
497 */
498 asmlinkage
499 long sys_execve(char __user *name, char __user * __user *argv,
500 char __user * __user *envp, struct pt_regs *regs)
501 {
502 long error;
503 char *filename;
504
505 filename = getname(name);
506 error = PTR_ERR(filename);
507 if (IS_ERR(filename))
508 return error;
509 error = do_execve(filename, argv, envp, regs);
510 putname(filename);
511 return error;
512 }
513
514 void set_personality_64bit(void)
515 {
516 /* inherit personality from parent */
517
518 /* Make sure to be in 64bit mode */
519 clear_thread_flag(TIF_IA32);
520
521 /* TBD: overwrites user setup. Should have two bits.
522 But 64bit processes have always behaved this way,
523 so it's not too bad. The main problem is just that
524 32bit childs are affected again. */
525 current->personality &= ~READ_IMPLIES_EXEC;
526 }
527
528 asmlinkage long
529 sys_clone(unsigned long clone_flags, unsigned long newsp,
530 void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
531 {
532 if (!newsp)
533 newsp = regs->sp;
534 return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
535 }
536
537 void set_personality_ia32(void)
538 {
539 /* inherit personality from parent */
540
541 /* Make sure to be in 32bit mode */
542 set_thread_flag(TIF_IA32);
543
544 /* Prepare the first "return" to user space */
545 current_thread_info()->status |= TS_COMPAT;
546 }
547
548 unsigned long get_wchan(struct task_struct *p)
549 {
550 unsigned long stack;
551 u64 fp, ip;
552 int count = 0;
553
554 if (!p || p == current || p->state == TASK_RUNNING)
555 return 0;
556 stack = (unsigned long)task_stack_page(p);
557 if (p->thread.sp < stack || p->thread.sp >= stack+THREAD_SIZE)
558 return 0;
559 fp = *(u64 *)(p->thread.sp);
560 do {
561 if (fp < (unsigned long)stack ||
562 fp >= (unsigned long)stack+THREAD_SIZE)
563 return 0;
564 ip = *(u64 *)(fp+8);
565 if (!in_sched_functions(ip))
566 return ip;
567 fp = *(u64 *)fp;
568 } while (count++ < 16);
569 return 0;
570 }
571
572 long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
573 {
574 int ret = 0;
575 int doit = task == current;
576 int cpu;
577
578 switch (code) {
579 case ARCH_SET_GS:
580 if (addr >= TASK_SIZE_OF(task))
581 return -EPERM;
582 cpu = get_cpu();
583 /* handle small bases via the GDT because that's faster to
584 switch. */
585 if (addr <= 0xffffffff) {
586 set_32bit_tls(task, GS_TLS, addr);
587 if (doit) {
588 load_TLS(&task->thread, cpu);
589 load_gs_index(GS_TLS_SEL);
590 }
591 task->thread.gsindex = GS_TLS_SEL;
592 task->thread.gs = 0;
593 } else {
594 task->thread.gsindex = 0;
595 task->thread.gs = addr;
596 if (doit) {
597 load_gs_index(0);
598 ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
599 }
600 }
601 put_cpu();
602 break;
603 case ARCH_SET_FS:
604 /* Not strictly needed for fs, but do it for symmetry
605 with gs */
606 if (addr >= TASK_SIZE_OF(task))
607 return -EPERM;
608 cpu = get_cpu();
609 /* handle small bases via the GDT because that's faster to
610 switch. */
611 if (addr <= 0xffffffff) {
612 set_32bit_tls(task, FS_TLS, addr);
613 if (doit) {
614 load_TLS(&task->thread, cpu);
615 loadsegment(fs, FS_TLS_SEL);
616 }
617 task->thread.fsindex = FS_TLS_SEL;
618 task->thread.fs = 0;
619 } else {
620 task->thread.fsindex = 0;
621 task->thread.fs = addr;
622 if (doit) {
623 /* set the selector to 0 to not confuse
624 __switch_to */
625 loadsegment(fs, 0);
626 ret = checking_wrmsrl(MSR_FS_BASE, addr);
627 }
628 }
629 put_cpu();
630 break;
631 case ARCH_GET_FS: {
632 unsigned long base;
633 if (task->thread.fsindex == FS_TLS_SEL)
634 base = read_32bit_tls(task, FS_TLS);
635 else if (doit)
636 rdmsrl(MSR_FS_BASE, base);
637 else
638 base = task->thread.fs;
639 ret = put_user(base, (unsigned long __user *)addr);
640 break;
641 }
642 case ARCH_GET_GS: {
643 unsigned long base;
644 unsigned gsindex;
645 if (task->thread.gsindex == GS_TLS_SEL)
646 base = read_32bit_tls(task, GS_TLS);
647 else if (doit) {
648 savesegment(gs, gsindex);
649 if (gsindex)
650 rdmsrl(MSR_KERNEL_GS_BASE, base);
651 else
652 base = task->thread.gs;
653 } else
654 base = task->thread.gs;
655 ret = put_user(base, (unsigned long __user *)addr);
656 break;
657 }
658
659 default:
660 ret = -EINVAL;
661 break;
662 }
663
664 return ret;
665 }
666
667 long sys_arch_prctl(int code, unsigned long addr)
668 {
669 return do_arch_prctl(current, code, addr);
670 }
671
672
|
This page was automatically generated by the
LXR engine.
|