1 /*
2 * RT-Mutexes: simple blocking mutual exclusion locks with PI support
3 *
4 * started by Ingo Molnar and Thomas Gleixner.
5 *
6 * Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
7 * Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
8 * Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt
9 * Copyright (C) 2006 Esben Nielsen
10 *
11 * Adaptive Spinlocks:
12 * Copyright (C) 2008 Novell, Inc., Gregory Haskins, Sven Dietrich,
13 * and Peter Morreale,
14 * Adaptive Spinlocks simplification:
15 * Copyright (C) 2008 Red Hat, Inc., Steven Rostedt <srostedt@redhat.com>
16 *
17 * See Documentation/rt-mutex-design.txt for details.
18 */
19 #include <linux/spinlock.h>
20 #include <linux/module.h>
21 #include <linux/sched.h>
22 #include <linux/timer.h>
23 #include <linux/hardirq.h>
24
25 #include "rtmutex_common.h"
26
27 /*
28 * lock->owner state tracking:
29 *
30 * lock->owner holds the task_struct pointer of the owner. Bit 0 and 1
31 * are used to keep track of the "owner is pending" and "lock has
32 * waiters" state.
33 *
34 * owner bit1 bit0
35 * NULL 0 0 lock is free (fast acquire possible)
36 * NULL 0 1 invalid state
37 * NULL 1 0 Transitional State*
38 * NULL 1 1 invalid state
39 * taskpointer 0 0 lock is held (fast release possible)
40 * taskpointer 0 1 task is pending owner
41 * taskpointer 1 0 lock is held and has waiters
42 * taskpointer 1 1 task is pending owner and lock has more waiters
43 *
44 * Pending ownership is assigned to the top (highest priority)
45 * waiter of the lock, when the lock is released. The thread is woken
46 * up and can now take the lock. Until the lock is taken (bit 0
47 * cleared) a competing higher priority thread can steal the lock
48 * which puts the woken up thread back on the waiters list.
49 *
50 * The fast atomic compare exchange based acquire and release is only
51 * possible when bit 0 and 1 of lock->owner are 0.
52 *
53 * (*) There's a small time where the owner can be NULL and the
54 * "lock has waiters" bit is set. This can happen when grabbing the lock.
55 * To prevent a cmpxchg of the owner releasing the lock, we need to set this
56 * bit before looking at the lock, hence the reason this is a transitional
57 * state.
58 */
59
60 static void
61 rt_mutex_set_owner(struct rt_mutex *lock, struct task_struct *owner,
62 unsigned long mask)
63 {
64 unsigned long val = (unsigned long)owner | mask;
65
66 if (rt_mutex_has_waiters(lock))
67 val |= RT_MUTEX_HAS_WAITERS;
68
69 lock->owner = (struct task_struct *)val;
70 }
71
72 static inline void clear_rt_mutex_waiters(struct rt_mutex *lock)
73 {
74 lock->owner = (struct task_struct *)
75 ((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS);
76 }
77
78 static void fixup_rt_mutex_waiters(struct rt_mutex *lock)
79 {
80 if (!rt_mutex_has_waiters(lock))
81 clear_rt_mutex_waiters(lock);
82 }
83
84 /*
85 * We can speed up the acquire/release, if the architecture
86 * supports cmpxchg and if there's no debugging state to be set up
87 */
88 #if defined(__HAVE_ARCH_CMPXCHG) && !defined(CONFIG_DEBUG_RT_MUTEXES)
89 # define rt_mutex_cmpxchg(l,c,n) (cmpxchg(&l->owner, c, n) == c)
90 # define rt_rwlock_cmpxchg(rwm,c,n) (cmpxchg(&(rwm)->owner, c, n) == c)
91 static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
92 {
93 unsigned long owner, *p = (unsigned long *) &lock->owner;
94
95 do {
96 owner = *p;
97 } while (cmpxchg(p, owner, owner | RT_MUTEX_HAS_WAITERS) != owner);
98 }
99 #ifdef CONFIG_PREEMPT_RT
100 static inline void mark_rt_rwlock_check(struct rw_mutex *rwm)
101 {
102 unsigned long owner, *p = (unsigned long *) &rwm->owner;
103
104 do {
105 owner = *p;
106 } while (cmpxchg(p, owner, owner | RT_RWLOCK_CHECK) != owner);
107 }
108 #endif /* CONFIG_PREEMPT_RT */
109 #else
110 # define rt_mutex_cmpxchg(l,c,n) (0)
111 # define rt_rwlock_cmpxchg(l,c,n) ({ (void)c; (void)n; 0; })
112 static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
113 {
114 lock->owner = (struct task_struct *)
115 ((unsigned long)lock->owner | RT_MUTEX_HAS_WAITERS);
116 }
117 #ifdef CONFIG_PREEMPT_RT
118 static inline void mark_rt_rwlock_check(struct rw_mutex *rwm)
119 {
120 rwm->owner = (struct task_struct *)
121 ((unsigned long)rwm->owner | RT_RWLOCK_CHECK);
122 }
123 #endif /* CONFIG_PREEMPT_RT */
124 #endif
125
126 #ifdef CONFIG_PREEMPT_RT
127 static inline int task_is_reader(struct task_struct *task)
128 {
129 return task == RT_RW_READER;
130 }
131 #else
132 static inline int task_is_reader(struct task_struct *task) { return 0; }
133 #endif
134
135 int pi_initialized;
136
137 /*
138 * we initialize the wait_list runtime. (Could be done build-time and/or
139 * boot-time.)
140 */
141 static inline void init_lists(struct rt_mutex *lock)
142 {
143 if (unlikely(!lock->wait_list.prio_list.prev)) {
144 plist_head_init(&lock->wait_list, &lock->wait_lock);
145 #ifdef CONFIG_DEBUG_RT_MUTEXES
146 pi_initialized++;
147 #endif
148 }
149 }
150
151 static int rt_mutex_get_readers_prio(struct task_struct *task, int prio);
152
153 /*
154 * Calculate task priority from the waiter list priority
155 *
156 * Return task->normal_prio when the waiter list is empty or when
157 * the waiter is not allowed to do priority boosting
158 */
159 int rt_mutex_getprio(struct task_struct *task)
160 {
161 int prio = min(task->normal_prio, get_rcu_prio(task));
162
163 prio = rt_mutex_get_readers_prio(task, prio);
164
165 if (likely(!task_has_pi_waiters(task)))
166 return prio;
167
168 return min(task_top_pi_waiter(task)->pi_list_entry.prio, prio);
169 }
170
171 /*
172 * Adjust the priority of a task, after its pi_waiters got modified.
173 *
174 * This can be both boosting and unboosting. task->pi_lock must be held.
175 */
176 static void __rt_mutex_adjust_prio(struct task_struct *task)
177 {
178 int prio = rt_mutex_getprio(task);
179
180 if (task->prio != prio)
181 rt_mutex_setprio(task, prio);
182 }
183
184 /*
185 * Adjust task priority (undo boosting). Called from the exit path of
186 * rt_mutex_slowunlock() and rt_mutex_slowlock().
187 *
188 * (Note: We do this outside of the protection of lock->wait_lock to
189 * allow the lock to be taken while or before we readjust the priority
190 * of task. We do not use the spin_xx_mutex() variants here as we are
191 * outside of the debug path.)
192 */
193 static void rt_mutex_adjust_prio(struct task_struct *task)
194 {
195 unsigned long flags;
196
197 spin_lock_irqsave(&task->pi_lock, flags);
198 __rt_mutex_adjust_prio(task);
199 spin_unlock_irqrestore(&task->pi_lock, flags);
200 }
201
202 /*
203 * Max number of times we'll walk the boosting chain:
204 */
205 int max_lock_depth = 1024;
206
207 static int rt_mutex_adjust_readers(struct rt_mutex *orig_lock,
208 struct rt_mutex_waiter *orig_waiter,
209 struct task_struct *top_task,
210 struct rt_mutex *lock,
211 int recursion_depth);
212 /*
213 * Adjust the priority chain. Also used for deadlock detection.
214 * Decreases task's usage by one - may thus free the task.
215 * Returns 0 or -EDEADLK.
216 */
217 static int rt_mutex_adjust_prio_chain(struct task_struct *task,
218 int deadlock_detect,
219 struct rt_mutex *orig_lock,
220 struct rt_mutex_waiter *orig_waiter,
221 struct task_struct *top_task,
222 int recursion_depth)
223 {
224 struct rt_mutex *lock;
225 struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter;
226 int detect_deadlock, ret = 0, depth = 0;
227 unsigned long flags;
228
229 detect_deadlock = debug_rt_mutex_detect_deadlock(orig_waiter,
230 deadlock_detect);
231
232 /*
233 * The (de)boosting is a step by step approach with a lot of
234 * pitfalls. We want this to be preemptible and we want hold a
235 * maximum of two locks per step. So we have to check
236 * carefully whether things change under us.
237 */
238 again:
239 if (++depth > max_lock_depth) {
240 static int prev_max;
241
242 /*
243 * Print this only once. If the admin changes the limit,
244 * print a new message when reaching the limit again.
245 */
246 if (prev_max != max_lock_depth) {
247 prev_max = max_lock_depth;
248 printk(KERN_WARNING "Maximum lock depth %d reached "
249 "task: %s (%d)\n", max_lock_depth,
250 top_task->comm, task_pid_nr(top_task));
251 }
252 put_task_struct(task);
253
254 return deadlock_detect ? -EDEADLK : 0;
255 }
256 retry:
257 /*
258 * Task can not go away as we did a get_task() before !
259 */
260 spin_lock_irqsave(&task->pi_lock, flags);
261
262 waiter = task->pi_blocked_on;
263 /*
264 * Check whether the end of the boosting chain has been
265 * reached or the state of the chain has changed while we
266 * dropped the locks.
267 */
268 if (!waiter || !waiter->task)
269 goto out_unlock_pi;
270
271 /*
272 * Check the orig_waiter state. After we dropped the locks,
273 * the previous owner of the lock might have released the lock
274 * and made us the pending owner:
275 */
276 if (orig_waiter && !orig_waiter->task)
277 goto out_unlock_pi;
278
279 /*
280 * Drop out, when the task has no waiters. Note,
281 * top_waiter can be NULL, when we are in the deboosting
282 * mode!
283 */
284 if (top_waiter && (!task_has_pi_waiters(task) ||
285 top_waiter != task_top_pi_waiter(task)))
286 goto out_unlock_pi;
287
288 /*
289 * When deadlock detection is off then we check, if further
290 * priority adjustment is necessary.
291 */
292 if (!detect_deadlock && waiter->list_entry.prio == task->prio)
293 goto out_unlock_pi;
294
295 lock = waiter->lock;
296 if (!spin_trylock(&lock->wait_lock)) {
297 spin_unlock_irqrestore(&task->pi_lock, flags);
298 cpu_relax();
299 goto retry;
300 }
301
302 /* Deadlock detection */
303 if (lock == orig_lock || rt_mutex_owner(lock) == top_task) {
304 debug_rt_mutex_deadlock(deadlock_detect, orig_waiter, lock);
305 spin_unlock(&lock->wait_lock);
306 ret = deadlock_detect ? -EDEADLK : 0;
307 goto out_unlock_pi;
308 }
309
310 top_waiter = rt_mutex_top_waiter(lock);
311
312 /* Requeue the waiter */
313 plist_del(&waiter->list_entry, &lock->wait_list);
314 waiter->list_entry.prio = task->prio;
315 plist_add(&waiter->list_entry, &lock->wait_list);
316
317 /* Release the task */
318 spin_unlock(&task->pi_lock);
319 put_task_struct(task);
320
321 /* Grab the next task */
322 task = rt_mutex_owner(lock);
323
324 /*
325 * Readers are special. We may need to boost more than one owner.
326 */
327 if (task_is_reader(task)) {
328 ret = rt_mutex_adjust_readers(orig_lock, orig_waiter,
329 top_task, lock,
330 recursion_depth);
331 spin_unlock_irqrestore(&lock->wait_lock, flags);
332 goto out;
333 }
334
335 get_task_struct(task);
336 spin_lock(&task->pi_lock);
337
338 if (waiter == rt_mutex_top_waiter(lock)) {
339 /* Boost the owner */
340 plist_del(&top_waiter->pi_list_entry, &task->pi_waiters);
341 waiter->pi_list_entry.prio = waiter->list_entry.prio;
342 plist_add(&waiter->pi_list_entry, &task->pi_waiters);
343 __rt_mutex_adjust_prio(task);
344
345 } else if (top_waiter == waiter) {
346 /* Deboost the owner */
347 plist_del(&waiter->pi_list_entry, &task->pi_waiters);
348 waiter = rt_mutex_top_waiter(lock);
349 waiter->pi_list_entry.prio = waiter->list_entry.prio;
350 plist_add(&waiter->pi_list_entry, &task->pi_waiters);
351 __rt_mutex_adjust_prio(task);
352 }
353
354 spin_unlock(&task->pi_lock);
355
356 top_waiter = rt_mutex_top_waiter(lock);
357 spin_unlock_irqrestore(&lock->wait_lock, flags);
358
359 if (!detect_deadlock && waiter != top_waiter)
360 goto out_put_task;
361
362 goto again;
363
364 out_unlock_pi:
365 spin_unlock_irqrestore(&task->pi_lock, flags);
366 out_put_task:
367 put_task_struct(task);
368 out:
369 return ret;
370 }
371
372 /*
373 * Optimization: check if we can steal the lock from the
374 * assigned pending owner [which might not have taken the
375 * lock yet]:
376 */
377 static inline int try_to_steal_lock(struct rt_mutex *lock, int mode)
378 {
379 struct task_struct *pendowner = rt_mutex_owner(lock);
380 struct rt_mutex_waiter *next;
381
382 if (!rt_mutex_owner_pending(lock))
383 return 0;
384
385 if (pendowner == current)
386 return 1;
387
388 WARN_ON(task_is_reader(rt_mutex_owner(lock)));
389
390 spin_lock(&pendowner->pi_lock);
391 if (!lock_is_stealable(pendowner, mode)) {
392 spin_unlock(&pendowner->pi_lock);
393 return 0;
394 }
395
396 /*
397 * Check if a waiter is enqueued on the pending owners
398 * pi_waiters list. Remove it and readjust pending owners
399 * priority.
400 */
401 if (likely(!rt_mutex_has_waiters(lock))) {
402 spin_unlock(&pendowner->pi_lock);
403 return 1;
404 }
405
406 /* No chain handling, pending owner is not blocked on anything: */
407 next = rt_mutex_top_waiter(lock);
408 plist_del(&next->pi_list_entry, &pendowner->pi_waiters);
409 __rt_mutex_adjust_prio(pendowner);
410 spin_unlock(&pendowner->pi_lock);
411
412 /*
413 * We are going to steal the lock and a waiter was
414 * enqueued on the pending owners pi_waiters queue. So
415 * we have to enqueue this waiter into
416 * current->pi_waiters list. This covers the case,
417 * where current is boosted because it holds another
418 * lock and gets unboosted because the booster is
419 * interrupted, so we would delay a waiter with higher
420 * priority as current->normal_prio.
421 *
422 * Note: in the rare case of a SCHED_OTHER task changing
423 * its priority and thus stealing the lock, next->task
424 * might be current:
425 */
426 if (likely(next->task != current)) {
427 spin_lock(¤t->pi_lock);
428 plist_add(&next->pi_list_entry, ¤t->pi_waiters);
429 __rt_mutex_adjust_prio(current);
430 spin_unlock(¤t->pi_lock);
431 }
432 return 1;
433 }
434
435 /*
436 * Try to take an rt-mutex
437 *
438 * This fails
439 * - when the lock has a real owner
440 * - when a different pending owner exists and has higher priority than current
441 *
442 * Must be called with lock->wait_lock held.
443 */
444 static int do_try_to_take_rt_mutex(struct rt_mutex *lock, int mode)
445 {
446 /*
447 * We have to be careful here if the atomic speedups are
448 * enabled, such that, when
449 * - no other waiter is on the lock
450 * - the lock has been released since we did the cmpxchg
451 * the lock can be released or taken while we are doing the
452 * checks and marking the lock with RT_MUTEX_HAS_WAITERS.
453 *
454 * The atomic acquire/release aware variant of
455 * mark_rt_mutex_waiters uses a cmpxchg loop. After setting
456 * the WAITERS bit, the atomic release / acquire can not
457 * happen anymore and lock->wait_lock protects us from the
458 * non-atomic case.
459 *
460 * Note, that this might set lock->owner =
461 * RT_MUTEX_HAS_WAITERS in the case the lock is not contended
462 * any more. This is fixed up when we take the ownership.
463 * This is the transitional state explained at the top of this file.
464 */
465 mark_rt_mutex_waiters(lock);
466
467 if (rt_mutex_owner(lock) && !try_to_steal_lock(lock, mode))
468 return 0;
469
470 /* We got the lock. */
471 debug_rt_mutex_lock(lock);
472
473 rt_mutex_set_owner(lock, current, 0);
474
475 rt_mutex_deadlock_account_lock(lock, current);
476
477 return 1;
478 }
479
480 static inline int try_to_take_rt_mutex(struct rt_mutex *lock)
481 {
482 return do_try_to_take_rt_mutex(lock, STEAL_NORMAL);
483 }
484
485 /*
486 * Task blocks on lock.
487 *
488 * Prepare waiter and propagate pi chain
489 *
490 * This must be called with lock->wait_lock held.
491 */
492 static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
493 struct rt_mutex_waiter *waiter,
494 int detect_deadlock, unsigned long flags)
495 {
496 struct task_struct *owner = rt_mutex_owner(lock);
497 struct rt_mutex_waiter *top_waiter = waiter;
498 int chain_walk = 0, res;
499
500 spin_lock(¤t->pi_lock);
501 __rt_mutex_adjust_prio(current);
502 waiter->task = current;
503 waiter->lock = lock;
504 plist_node_init(&waiter->list_entry, current->prio);
505 plist_node_init(&waiter->pi_list_entry, current->prio);
506
507 /* Get the top priority waiter on the lock */
508 if (rt_mutex_has_waiters(lock))
509 top_waiter = rt_mutex_top_waiter(lock);
510 plist_add(&waiter->list_entry, &lock->wait_list);
511
512 current->pi_blocked_on = waiter;
513
514 spin_unlock(¤t->pi_lock);
515
516 if (waiter == rt_mutex_top_waiter(lock)) {
517 /* readers are handled differently */
518 if (task_is_reader(owner)) {
519 res = rt_mutex_adjust_readers(lock, waiter,
520 current, lock, 0);
521 return res;
522 }
523
524 spin_lock(&owner->pi_lock);
525 plist_del(&top_waiter->pi_list_entry, &owner->pi_waiters);
526 plist_add(&waiter->pi_list_entry, &owner->pi_waiters);
527
528 __rt_mutex_adjust_prio(owner);
529 if (owner->pi_blocked_on)
530 chain_walk = 1;
531 spin_unlock(&owner->pi_lock);
532 }
533 else if (debug_rt_mutex_detect_deadlock(waiter, detect_deadlock))
534 chain_walk = 1;
535
536 if (!chain_walk || task_is_reader(owner))
537 return 0;
538
539 /*
540 * The owner can't disappear while holding a lock,
541 * so the owner struct is protected by wait_lock.
542 * Gets dropped in rt_mutex_adjust_prio_chain()!
543 */
544 get_task_struct(owner);
545
546 spin_unlock_irqrestore(&lock->wait_lock, flags);
547
548 res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock, waiter,
549 current, 0);
550
551 spin_lock_irq(&lock->wait_lock);
552
553 return res;
554 }
555
556 /*
557 * Wake up the next waiter on the lock.
558 *
559 * Remove the top waiter from the current tasks waiter list and from
560 * the lock waiter list. Set it as pending owner. Then wake it up.
561 *
562 * Called with lock->wait_lock held.
563 */
564 static void wakeup_next_waiter(struct rt_mutex *lock, int savestate)
565 {
566 struct rt_mutex_waiter *waiter;
567 struct task_struct *pendowner;
568 struct rt_mutex_waiter *next;
569
570 spin_lock(¤t->pi_lock);
571
572 waiter = rt_mutex_top_waiter(lock);
573 plist_del(&waiter->list_entry, &lock->wait_list);
574
575 /*
576 * Remove it from current->pi_waiters. We do not adjust a
577 * possible priority boost right now. We execute wakeup in the
578 * boosted mode and go back to normal after releasing
579 * lock->wait_lock.
580 */
581 plist_del(&waiter->pi_list_entry, ¤t->pi_waiters);
582 pendowner = waiter->task;
583 waiter->task = NULL;
584
585 /*
586 * Do the wakeup before the ownership change to give any spinning
587 * waiter grantees a headstart over the other threads that will
588 * trigger once owner changes.
589 */
590 if (!savestate)
591 wake_up_process(pendowner);
592 else {
593 /*
594 * We can skip the actual (expensive) wakeup if the
595 * waiter is already running, but we have to be careful
596 * of race conditions because they may be about to sleep.
597 *
598 * The waiter-side protocol has the following pattern:
599 * 1: Set state != RUNNING
600 * 2: Conditionally sleep if waiter->task != NULL;
601 *
602 * And the owner-side has the following:
603 * A: Set waiter->task = NULL
604 * B: Conditionally wake if the state != RUNNING
605 *
606 * As long as we ensure 1->2 order, and A->B order, we
607 * will never miss a wakeup.
608 *
609 * Therefore, this barrier ensures that waiter->task = NULL
610 * is visible before we test the pendowner->state. The
611 * corresponding barrier is in the sleep logic.
612 */
613 smp_mb();
614
615 /* If !RUNNING && !RUNNING_MUTEX */
616 if (pendowner->state & ~TASK_RUNNING_MUTEX)
617 wake_up_process_mutex(pendowner);
618 }
619
620 rt_mutex_set_owner(lock, pendowner, RT_MUTEX_OWNER_PENDING);
621
622 spin_unlock(¤t->pi_lock);
623
624 /*
625 * Clear the pi_blocked_on variable and enqueue a possible
626 * waiter into the pi_waiters list of the pending owner. This
627 * prevents that in case the pending owner gets unboosted a
628 * waiter with higher priority than pending-owner->normal_prio
629 * is blocked on the unboosted (pending) owner.
630 */
631
632 if (rt_mutex_has_waiters(lock))
633 next = rt_mutex_top_waiter(lock);
634 else
635 next = NULL;
636
637 spin_lock(&pendowner->pi_lock);
638
639 WARN_ON(!pendowner->pi_blocked_on);
640 WARN_ON(pendowner->pi_blocked_on != waiter);
641 WARN_ON(pendowner->pi_blocked_on->lock != lock);
642
643 pendowner->pi_blocked_on = NULL;
644
645 if (next)
646 plist_add(&next->pi_list_entry, &pendowner->pi_waiters);
647
648 spin_unlock(&pendowner->pi_lock);
649 }
650
651 /*
652 * Remove a waiter from a lock
653 *
654 * Must be called with lock->wait_lock held
655 */
656 static void remove_waiter(struct rt_mutex *lock,
657 struct rt_mutex_waiter *waiter,
658 unsigned long flags)
659 {
660 int first = (waiter == rt_mutex_top_waiter(lock));
661 struct task_struct *owner = rt_mutex_owner(lock);
662 int chain_walk = 0;
663
664 spin_lock(¤t->pi_lock);
665 plist_del(&waiter->list_entry, &lock->wait_list);
666 waiter->task = NULL;
667 current->pi_blocked_on = NULL;
668 spin_unlock(¤t->pi_lock);
669
670 if (first && owner != current && !task_is_reader(owner)) {
671
672 spin_lock(&owner->pi_lock);
673
674 plist_del(&waiter->pi_list_entry, &owner->pi_waiters);
675
676 if (rt_mutex_has_waiters(lock)) {
677 struct rt_mutex_waiter *next;
678
679 next = rt_mutex_top_waiter(lock);
680 plist_add(&next->pi_list_entry, &owner->pi_waiters);
681 }
682 __rt_mutex_adjust_prio(owner);
683
684 if (owner->pi_blocked_on)
685 chain_walk = 1;
686
687 spin_unlock(&owner->pi_lock);
688 }
689
690 WARN_ON(!plist_node_empty(&waiter->pi_list_entry));
691
692 if (!chain_walk)
693 return;
694
695 /* gets dropped in rt_mutex_adjust_prio_chain()! */
696 get_task_struct(owner);
697
698 spin_unlock_irqrestore(&lock->wait_lock, flags);
699
700 rt_mutex_adjust_prio_chain(owner, 0, lock, NULL, current, 0);
701
702 spin_lock_irq(&lock->wait_lock);
703 }
704
705 /*
706 * Recheck the pi chain, in case we got a priority setting
707 *
708 * Called from sched_setscheduler
709 */
710 void rt_mutex_adjust_pi(struct task_struct *task)
711 {
712 struct rt_mutex_waiter *waiter;
713 unsigned long flags;
714
715 spin_lock_irqsave(&task->pi_lock, flags);
716
717 waiter = task->pi_blocked_on;
718 if (!waiter || waiter->list_entry.prio == task->prio) {
719 spin_unlock_irqrestore(&task->pi_lock, flags);
720 return;
721 }
722
723 /* gets dropped in rt_mutex_adjust_prio_chain()! */
724 get_task_struct(task);
725 spin_unlock_irqrestore(&task->pi_lock, flags);
726
727 rt_mutex_adjust_prio_chain(task, 0, NULL, NULL, task, 0);
728 }
729
730 /*
731 * preemptible spin_lock functions:
732 */
733
734 #ifdef CONFIG_PREEMPT_RT
735
736 static inline void
737 rt_spin_lock_fastlock(struct rt_mutex *lock,
738 void (*slowfn)(struct rt_mutex *lock))
739 {
740 /* Temporary HACK! */
741 if (likely(!current->in_printk))
742 might_sleep();
743 else if (in_atomic() || irqs_disabled())
744 /* don't grab locks for printk in atomic */
745 return;
746
747 if (likely(rt_mutex_cmpxchg(lock, NULL, current)))
748 rt_mutex_deadlock_account_lock(lock, current);
749 else
750 slowfn(lock);
751 }
752
753 static inline void
754 rt_spin_lock_fastunlock(struct rt_mutex *lock,
755 void (*slowfn)(struct rt_mutex *lock))
756 {
757 /* Temporary HACK! */
758 if (unlikely(rt_mutex_owner(lock) != current) && current->in_printk)
759 /* don't grab locks for printk in atomic */
760 return;
761
762 if (likely(rt_mutex_cmpxchg(lock, current, NULL)))
763 rt_mutex_deadlock_account_unlock(current);
764 else
765 slowfn(lock);
766 }
767
768 static inline void
769 update_current(unsigned long new_state, unsigned long *saved_state)
770 {
771 unsigned long state = xchg(¤t->state, new_state);
772 if (unlikely(state == TASK_RUNNING))
773 *saved_state = TASK_RUNNING;
774 }
775
776 #ifdef CONFIG_SMP
777 static int adaptive_wait(struct rt_mutex_waiter *waiter,
778 struct task_struct *orig_owner)
779 {
780 for (;;) {
781
782 /* we are the owner? */
783 if (!waiter->task)
784 return 0;
785
786 /* Owner changed? Then lets update the original */
787 if (orig_owner != rt_mutex_owner(waiter->lock))
788 return 0;
789
790 /* Owner went to bed, so should we */
791 if (!task_is_current(orig_owner))
792 return 1;
793
794 cpu_relax();
795 }
796 }
797 #else
798 static int adaptive_wait(struct rt_mutex_waiter *waiter,
799 struct task_struct *orig_owner)
800 {
801 return 1;
802 }
803 #endif
804
805 /*
806 * Slow path lock function spin_lock style: this variant is very
807 * careful not to miss any non-lock wakeups.
808 *
809 * The wakeup side uses wake_up_process_mutex, which, combined with
810 * the xchg code of this function is a transparent sleep/wakeup
811 * mechanism nested within any existing sleep/wakeup mechanism. This
812 * enables the seemless use of arbitrary (blocking) spinlocks within
813 * sleep/wakeup event loops.
814 */
815 static void noinline __sched
816 rt_spin_lock_slowlock(struct rt_mutex *lock)
817 {
818 struct rt_mutex_waiter waiter;
819 unsigned long saved_state, state, flags;
820 struct task_struct *orig_owner;
821 int missed = 0;
822
823 debug_rt_mutex_init_waiter(&waiter);
824 waiter.task = NULL;
825 waiter.write_lock = 0;
826
827 spin_lock_irqsave(&lock->wait_lock, flags);
828 init_lists(lock);
829
830 BUG_ON(rt_mutex_owner(lock) == current);
831
832 /*
833 * Here we save whatever state the task was in originally,
834 * we'll restore it at the end of the function and we'll take
835 * any intermediate wakeup into account as well, independently
836 * of the lock sleep/wakeup mechanism. When we get a real
837 * wakeup the task->state is TASK_RUNNING and we change
838 * saved_state accordingly. If we did not get a real wakeup
839 * then we return with the saved state.
840 */
841 saved_state = current->state;
842
843 for (;;) {
844 unsigned long saved_flags;
845 int saved_lock_depth = current->lock_depth;
846
847 /* Try to acquire the lock */
848 if (do_try_to_take_rt_mutex(lock, STEAL_LATERAL)) {
849 /* If we never blocked break out now */
850 if (!missed)
851 goto unlock;
852 break;
853 }
854 missed = 1;
855
856 /*
857 * waiter.task is NULL the first time we come here and
858 * when we have been woken up by the previous owner
859 * but the lock got stolen by an higher prio task.
860 */
861 if (!waiter.task) {
862 task_blocks_on_rt_mutex(lock, &waiter, 0, flags);
863 /* Wakeup during boost ? */
864 if (unlikely(!waiter.task))
865 continue;
866 }
867
868 /*
869 * Prevent schedule() to drop BKL, while waiting for
870 * the lock ! We restore lock_depth when we come back.
871 */
872 saved_flags = current->flags & PF_NOSCHED;
873 current->lock_depth = -1;
874 current->flags &= ~PF_NOSCHED;
875 orig_owner = rt_mutex_owner(lock);
876 get_task_struct(orig_owner);
877 spin_unlock_irqrestore(&lock->wait_lock, flags);
878
879 debug_rt_mutex_print_deadlock(&waiter);
880
881 if (adaptive_wait(&waiter, orig_owner)) {
882 put_task_struct(orig_owner);
883 update_current(TASK_UNINTERRUPTIBLE, &saved_state);
884 /*
885 * The xchg() in update_current() is an implicit
886 * barrier which we rely upon to ensure current->state
887 * is visible before we test waiter.task.
888 */
889 if (waiter.task)
890 schedule_rt_mutex(lock);
891 } else
892 put_task_struct(orig_owner);
893
894 spin_lock_irqsave(&lock->wait_lock, flags);
895 current->flags |= saved_flags;
896 current->lock_depth = saved_lock_depth;
897 }
898
899 state = xchg(¤t->state, saved_state);
900 if (unlikely(state == TASK_RUNNING))
901 current->state = TASK_RUNNING;
902
903 /*
904 * Extremely rare case, if we got woken up by a non-mutex wakeup,
905 * and we managed to steal the lock despite us not being the
906 * highest-prio waiter (due to SCHED_OTHER changing prio), then we
907 * can end up with a non-NULL waiter.task:
908 */
909 if (unlikely(waiter.task))
910 remove_waiter(lock, &waiter, flags);
911 /*
912 * try_to_take_rt_mutex() sets the waiter bit
913 * unconditionally. We might have to fix that up:
914 */
915 fixup_rt_mutex_waiters(lock);
916
917 unlock:
918 spin_unlock_irqrestore(&lock->wait_lock, flags);
919
920 debug_rt_mutex_free_waiter(&waiter);
921 }
922
923 /*
924 * Slow path to release a rt_mutex spin_lock style
925 */
926 static void noinline __sched
927 rt_spin_lock_slowunlock(struct rt_mutex *lock)
928 {
929 unsigned long flags;
930
931 spin_lock_irqsave(&lock->wait_lock, flags);
932
933 debug_rt_mutex_unlock(lock);
934
935 rt_mutex_deadlock_account_unlock(current);
936
937 if (!rt_mutex_has_waiters(lock)) {
938 lock->owner = NULL;
939 spin_unlock_irqrestore(&lock->wait_lock, flags);
940 return;
941 }
942
943 wakeup_next_waiter(lock, 1);
944
945 spin_unlock_irqrestore(&lock->wait_lock, flags);
946
947 /* Undo pi boosting.when necessary */
948 rt_mutex_adjust_prio(current);
949 }
950
951 void __lockfunc rt_spin_lock(spinlock_t *lock)
952 {
953 spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
954 LOCK_CONTENDED_RT(lock, rt_mutex_trylock, __rt_spin_lock);
955 }
956 EXPORT_SYMBOL(rt_spin_lock);
957
958 void __lockfunc __rt_spin_lock(struct rt_mutex *lock)
959 {
960 rt_spin_lock_fastlock(lock, rt_spin_lock_slowlock);
961 }
962 EXPORT_SYMBOL(__rt_spin_lock);
963
964 #ifdef CONFIG_DEBUG_LOCK_ALLOC
965
966 void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass)
967 {
968 spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
969 LOCK_CONTENDED_RT(lock, rt_mutex_trylock, __rt_spin_lock);
970 }
971 EXPORT_SYMBOL(rt_spin_lock_nested);
972
973 #endif
974
975 void __lockfunc rt_spin_unlock(spinlock_t *lock)
976 {
977 /* NOTE: we always pass in '1' for nested, for simplicity */
978 spin_release(&lock->dep_map, 1, _RET_IP_);
979 rt_spin_lock_fastunlock(&lock->lock, rt_spin_lock_slowunlock);
980 }
981 EXPORT_SYMBOL(rt_spin_unlock);
982
983 void __lockfunc __rt_spin_unlock(struct rt_mutex *lock)
984 {
985 rt_spin_lock_fastunlock(lock, rt_spin_lock_slowunlock);
986 }
987 EXPORT_SYMBOL(__rt_spin_unlock);
988
989 /*
990 * Wait for the lock to get unlocked: instead of polling for an unlock
991 * (like raw spinlocks do), we lock and unlock, to force the kernel to
992 * schedule if there's contention:
993 */
994 void __lockfunc rt_spin_unlock_wait(spinlock_t *lock)
995 {
996 spin_lock(lock);
997 spin_unlock(lock);
998 }
999 EXPORT_SYMBOL(rt_spin_unlock_wait);
1000
1001 int __lockfunc rt_spin_trylock(spinlock_t *lock)
1002 {
1003 int ret = rt_mutex_trylock(&lock->lock);
1004
1005 if (ret)
1006 spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
1007
1008 return ret;
1009 }
1010 EXPORT_SYMBOL(rt_spin_trylock);
1011
1012 int __lockfunc rt_spin_trylock_irqsave(spinlock_t *lock, unsigned long *flags)
1013 {
1014 int ret;
1015
1016 *flags = 0;
1017 ret = rt_mutex_trylock(&lock->lock);
1018 if (ret)
1019 spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
1020
1021 return ret;
1022 }
1023 EXPORT_SYMBOL(rt_spin_trylock_irqsave);
1024
1025 int _atomic_dec_and_spin_lock(spinlock_t *lock, atomic_t *atomic)
1026 {
1027 /* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */
1028 if (atomic_add_unless(atomic, -1, 1))
1029 return 0;
1030 rt_spin_lock(lock);
1031 if (atomic_dec_and_test(atomic))
1032 return 1;
1033 rt_spin_unlock(lock);
1034 return 0;
1035 }
1036 EXPORT_SYMBOL(_atomic_dec_and_spin_lock);
1037
1038 void
1039 __rt_spin_lock_init(spinlock_t *lock, char *name, struct lock_class_key *key)
1040 {
1041 #ifdef CONFIG_DEBUG_LOCK_ALLOC
1042 /*
1043 * Make sure we are not reinitializing a held lock:
1044 */
1045 debug_check_no_locks_freed((void *)lock, sizeof(*lock));
1046 lockdep_init_map(&lock->dep_map, name, key, 0);
1047 #endif
1048 __rt_mutex_init(&lock->lock, name);
1049 }
1050 EXPORT_SYMBOL(__rt_spin_lock_init);
1051
1052 int rt_rwlock_limit = NR_CPUS;
1053
1054 static inline int rt_release_bkl(struct rt_mutex *lock, unsigned long flags);
1055 static inline void rt_reacquire_bkl(int saved_lock_depth);
1056
1057 static inline void
1058 rt_rwlock_set_owner(struct rw_mutex *rwm, struct task_struct *owner,
1059 unsigned long mask)
1060 {
1061 unsigned long val = (unsigned long)owner | mask;
1062
1063 rwm->owner = (struct task_struct *)val;
1064 }
1065
1066 static inline void init_rw_lists(struct rw_mutex *rwm)
1067 {
1068 if (unlikely(!rwm->readers.prev)) {
1069 init_lists(&rwm->mutex);
1070 INIT_LIST_HEAD(&rwm->readers);
1071 }
1072 }
1073
1074 /*
1075 * The fast paths of the rw locks do not set up owners to
1076 * the mutex. When blocking on an rwlock we must make sure
1077 * there exists an owner.
1078 */
1079 static void
1080 update_rw_mutex_owner(struct rw_mutex *rwm)
1081 {
1082 struct rt_mutex *mutex = &rwm->mutex;
1083 struct task_struct *mtxowner;
1084
1085 mtxowner = rt_mutex_owner(mutex);
1086 if (mtxowner)
1087 return;
1088
1089 mtxowner = rt_rwlock_owner(rwm);
1090 WARN_ON(!mtxowner);
1091 if (rt_rwlock_writer(rwm))
1092 WARN_ON(mtxowner == RT_RW_READER);
1093 else
1094 mtxowner = RT_RW_READER;
1095 rt_mutex_set_owner(mutex, mtxowner, 0);
1096 }
1097
1098 #ifdef CONFIG_DEBUG_RT_MUTEXES
1099 /*
1100 * A rw lock is about to be added or has already been
1101 * removed from current. Make sure it doesn't exist still.
1102 */
1103 static void rw_check_held(struct rw_mutex *rwm)
1104 {
1105 int reader_count = current->reader_lock_count;
1106 int i;
1107
1108 for (i = 0; i < reader_count; i++)
1109 WARN_ON_ONCE(current->owned_read_locks[i].lock == rwm);
1110 }
1111 #else
1112 # define rw_check_held(rwm) do { } while (0)
1113 #endif
1114
1115 /*
1116 * The fast path does not add itself to the reader list to keep
1117 * from needing to grab the spinlock. We need to add the owner
1118 * itself. This may seem racy, but in practice, it is fine.
1119 * The link list is protected by mutex->wait_lock. But to find
1120 * the lock on the owner we need to read the owners reader counter.
1121 * That counter is modified only by the owner. We are OK with that
1122 * because to remove the lock that we are looking for, the owner
1123 * must first grab the mutex->wait_lock. The lock will not disappear
1124 * from the owner now, and we don't care if we see other locks
1125 * held or not held.
1126 */
1127
1128 static inline void
1129 rt_rwlock_update_owner(struct rw_mutex *rwm, struct task_struct *own)
1130 {
1131 struct reader_lock_struct *rls;
1132 int i;
1133
1134 if (!own || rt_rwlock_pending(rwm))
1135 return;
1136
1137 if (own == RT_RW_READER)
1138 return;
1139
1140 /*
1141 * We don't need to grab the pi_lock to look at the reader list
1142 * since we hold the rwm wait_lock. We only care about the pointer
1143 * to this lock, and we own the wait_lock, so that pointer
1144 * can't be changed.
1145 */
1146 for (i = own->reader_lock_count - 1; i >= 0; i--) {
1147 if (own->owned_read_locks[i].lock == rwm)
1148 break;
1149 }
1150 /* It is possible the owner didn't add it yet */
1151 if (i < 0)
1152 return;
1153
1154 rls = &own->owned_read_locks[i];
1155 /* It is also possible that the owner added it already */
1156 if (rls->list.prev && !list_empty(&rls->list))
1157 return;
1158
1159 list_add(&rls->list, &rwm->readers);
1160
1161 /* change to reader, so no one else updates too */
1162 rt_rwlock_set_owner(rwm, RT_RW_READER, RT_RWLOCK_CHECK);
1163 }
1164
1165 static int try_to_take_rw_read(struct rw_mutex *rwm, int mtx)
1166 {
1167 struct rt_mutex *mutex = &rwm->mutex;
1168 struct rt_mutex_waiter *waiter;
1169 struct reader_lock_struct *rls;
1170 struct task_struct *mtxowner;
1171 int owners;
1172 int reader_count, i;
1173 int incr = 1;
1174
1175 assert_spin_locked(&mutex->wait_lock);
1176
1177 /* mark the lock to force the owner to check on release */
1178 mark_rt_rwlock_check(rwm);
1179
1180 /* is the owner a writer? */
1181 if (unlikely(rt_rwlock_writer(rwm)))
1182 return 0;
1183
1184 /* check to see if we don't already own this lock */
1185 for (i = current->reader_lock_count - 1; i >= 0; i--) {
1186 if (current->owned_read_locks[i].lock == rwm) {
1187 rls = ¤t->owned_read_locks[i];
1188 /*
1189 * If this was taken via the fast path, then
1190 * it hasn't been added to the link list yet.
1191 */
1192 if (!rls->list.prev || list_empty(&rls->list))
1193 list_add(&rls->list, &rwm->readers);
1194 rt_rwlock_set_owner(rwm, RT_RW_READER, 0);
1195 rls->count++;
1196 incr = 0;
1197 goto taken;
1198 }
1199 }
1200
1201 /* A writer is not the owner, but is a writer waiting */
1202 mtxowner = rt_mutex_owner(mutex);
1203
1204 /* if the owner released it before we marked it then take it */
1205 if (!mtxowner && !rt_rwlock_owner(rwm)) {
1206 /* Still unlock with the slow path (for PI handling) */
1207 rt_rwlock_set_owner(rwm, RT_RW_READER, 0);
1208 goto taken;
1209 }
1210
1211 owners = atomic_read(&rwm->owners);
1212 rt_rwlock_update_owner(rwm, rt_rwlock_owner(rwm));
1213
1214 /* Check for rwlock limits */
1215 if (rt_rwlock_limit && owners >= rt_rwlock_limit)
1216 return 0;
1217
1218 if (mtxowner && mtxowner != RT_RW_READER) {
1219 int mode = mtx ? STEAL_NORMAL : STEAL_LATERAL;
1220
1221 if (!try_to_steal_lock(mutex, mode)) {
1222 /*
1223 * readers don't own the mutex, and rwm shows that a
1224 * writer doesn't have it either. If we enter this
1225 * condition, then we must be pending.
1226 */
1227 WARN_ON(!rt_mutex_owner_pending(mutex));
1228 /*
1229 * Even though we didn't steal the lock, if the owner
1230 * is a reader, and we are of higher priority than
1231 * any waiting writer, we might still be able to continue.
1232 */
1233 if (rt_rwlock_pending_writer(rwm))
1234 return 0;
1235 if (rt_mutex_has_waiters(mutex)) {
1236 waiter = rt_mutex_top_waiter(mutex);
1237 if (!lock_is_stealable(waiter->task, mode))
1238 return 0;
1239 /*
1240 * The pending reader has PI waiters,
1241 * but we are taking the lock.
1242 * Remove the waiters from the pending owner.
1243 */
1244 spin_lock(&mtxowner->pi_lock);
1245 plist_del(&waiter->pi_list_entry, &mtxowner->pi_waiters);
1246 spin_unlock(&mtxowner->pi_lock);
1247 }
1248 } else if (rt_mutex_has_waiters(mutex)) {
1249 /* Readers do things differently with respect to PI */
1250 waiter = rt_mutex_top_waiter(mutex);
1251 spin_lock(¤t->pi_lock);
1252 plist_del(&waiter->pi_list_entry, ¤t->pi_waiters);
1253 spin_unlock(¤t->pi_lock);
1254 }
1255 /* Readers never own the mutex */
1256 rt_mutex_set_owner(mutex, RT_RW_READER, 0);
1257 }
1258
1259 /* RT_RW_READER forces slow paths */
1260 rt_rwlock_set_owner(rwm, RT_RW_READER, 0);
1261 taken:
1262 if (incr) {
1263 atomic_inc(&rwm->owners);
1264 rw_check_held(rwm);
1265 spin_lock(¤t->pi_lock);
1266 reader_count = current->reader_lock_count++;
1267 if (likely(reader_count < MAX_RWLOCK_DEPTH)) {
1268 rls = ¤t->owned_read_locks[reader_count];
1269 rls->lock = rwm;
1270 rls->count = 1;
1271 WARN_ON(rls->list.prev && !list_empty(&rls->list));
1272 list_add(&rls->list, &rwm->readers);
1273 } else
1274 WARN_ON_ONCE(1);
1275 spin_unlock(¤t->pi_lock);
1276 }
1277 rt_mutex_deadlock_account_lock(mutex, current);
1278 atomic_inc(&rwm->count);
1279 return 1;
1280 }
1281
1282 static int
1283 try_to_take_rw_write(struct rw_mutex *rwm, int mtx)
1284 {
1285 struct rt_mutex *mutex = &rwm->mutex;
1286 struct task_struct *own;
1287
1288 /* mark the lock to force the owner to check on release */
1289 mark_rt_rwlock_check(rwm);
1290
1291 own = rt_rwlock_owner(rwm);
1292
1293 /* owners must be zero for writer */
1294 if (own) {
1295 rt_rwlock_update_owner(rwm, own);
1296
1297 if (!rt_rwlock_pending(rwm))
1298 return 0;
1299 }
1300
1301 /*
1302 * RT_RWLOCK_PENDING means that the lock is free, but there are
1303 * pending owners on the mutex
1304 */
1305 WARN_ON(own && !rt_mutex_owner_pending(mutex));
1306
1307 if (!do_try_to_take_rt_mutex(mutex, mtx ? STEAL_NORMAL : STEAL_LATERAL))
1308 return 0;
1309
1310 /*
1311 * We stole the lock. Add both WRITER and CHECK flags
1312 * since we must release the mutex.
1313 */
1314 rt_rwlock_set_owner(rwm, current, RT_RWLOCK_WRITER | RT_RWLOCK_CHECK);
1315
1316 return 1;
1317 }
1318
1319 static void
1320 rt_read_slowlock(struct rw_mutex *rwm, int mtx)
1321 {
1322 struct rt_mutex_waiter waiter;
1323 struct rt_mutex *mutex = &rwm->mutex;
1324 int saved_lock_depth = -1;
1325 unsigned long saved_state = -1, state, flags;
1326
1327 spin_lock_irqsave(&mutex->wait_lock, flags);
1328 init_rw_lists(rwm);
1329
1330 if (try_to_take_rw_read(rwm, mtx)) {
1331 spin_unlock_irqrestore(&mutex->wait_lock, flags);
1332 return;
1333 }
1334 update_rw_mutex_owner(rwm);
1335
1336 /* Owner is a writer (or a blocked writer). Block on the lock */
1337
1338 debug_rt_mutex_init_waiter(&waiter);
1339 waiter.task = NULL;
1340 waiter.write_lock = 0;
1341
1342 if (mtx) {
1343 /*
1344 * We drop the BKL here before we go into the wait loop to avoid a
1345 * possible deadlock in the scheduler.
1346 */
1347 if (unlikely(current->lock_depth >= 0))
1348 saved_lock_depth = rt_release_bkl(mutex, flags);
1349 set_current_state(TASK_UNINTERRUPTIBLE);
1350 } else {
1351 /* Spin lock must preserve BKL */
1352 saved_state = xchg(¤t->state, TASK_UNINTERRUPTIBLE);
1353 saved_lock_depth = current->lock_depth;
1354 }
1355
1356 for (;;) {
1357 unsigned long saved_flags;
1358
1359 /* Try to acquire the lock: */
1360 if (try_to_take_rw_read(rwm, mtx))
1361 break;
1362 update_rw_mutex_owner(rwm);
1363
1364 /*
1365 * waiter.task is NULL the first time we come here and
1366 * when we have been woken up by the previous owner
1367 * but the lock got stolen by a higher prio task.
1368 */
1369 if (!waiter.task) {
1370 task_blocks_on_rt_mutex(mutex, &waiter, 0, flags);
1371 /* Wakeup during boost ? */
1372 if (unlikely(!waiter.task))
1373 continue;
1374 }
1375 saved_flags = current->flags & PF_NOSCHED;
1376 current->flags &= ~PF_NOSCHED;
1377 if (!mtx)
1378 current->lock_depth = -1;
1379
1380 spin_unlock_irqrestore(&mutex->wait_lock, flags);
1381
1382 debug_rt_mutex_print_deadlock(&waiter);
1383
1384 if (!mtx || waiter.task)
1385 schedule_rt_mutex(mutex);
1386
1387 spin_lock_irqsave(&mutex->wait_lock, flags);
1388
1389 current->flags |= saved_flags;
1390 if (mtx)
1391 set_current_state(TASK_UNINTERRUPTIBLE);
1392 else {
1393 current->lock_depth = saved_lock_depth;
1394 state = xchg(¤t->state, TASK_UNINTERRUPTIBLE);
1395 if (unlikely(state == TASK_RUNNING))
1396 saved_state = TASK_RUNNING;
1397 }
1398 }
1399
1400 if (mtx)
1401 set_current_state(TASK_RUNNING);
1402 else {
1403 state = xchg(¤t->state, saved_state);
1404 if (unlikely(state == TASK_RUNNING))
1405 current->state = TASK_RUNNING;
1406 }
1407
1408 if (unlikely(waiter.task))
1409 remove_waiter(mutex, &waiter, flags);
1410
1411 WARN_ON(rt_mutex_owner(mutex) &&
1412 rt_mutex_owner(mutex) != current &&
1413 rt_mutex_owner(mutex) != RT_RW_READER &&
1414 !rt_mutex_owner_pending(mutex));
1415
1416 spin_unlock_irqrestore(&mutex->wait_lock, flags);
1417
1418 /* Must we reaquire the BKL? */
1419 if (mtx && unlikely(saved_lock_depth >= 0))
1420 rt_reacquire_bkl(saved_lock_depth);
1421
1422 debug_rt_mutex_free_waiter(&waiter);
1423 }
1424
1425 static inline int
1426 __rt_read_fasttrylock(struct rw_mutex *rwm)
1427 {
1428 retry:
1429 if (likely(rt_rwlock_cmpxchg(rwm, NULL, current))) {
1430 int reader_count;
1431 unsigned long flags;
1432
1433 rt_mutex_deadlock_account_lock(&rwm->mutex, current);
1434 atomic_inc(&rwm->count);
1435 smp_mb();
1436 /*
1437 * It is possible that the owner was zeroed
1438 * before we incremented count. If owner is not
1439 * current, then retry again
1440 */
1441 if (unlikely(rwm->owner != current)) {
1442 atomic_dec(&rwm->count);
1443 goto retry;
1444 }
1445
1446 atomic_inc(&rwm->owners);
1447 rw_check_held(rwm);
1448 local_irq_save(flags);
1449 spin_lock(¤t->pi_lock);
1450 reader_count = current->reader_lock_count++;
1451 if (likely(reader_count < MAX_RWLOCK_DEPTH)) {
1452 current->owned_read_locks[reader_count].lock = rwm;
1453 current->owned_read_locks[reader_count].count = 1;
1454 } else
1455 WARN_ON_ONCE(1);
1456 spin_unlock(¤t->pi_lock);
1457 /*
1458 * If this task is no longer the sole owner of the lock
1459 * or someone is blocking, then we need to add the task
1460 * to the lock.
1461 */
1462 if (unlikely(rwm->owner != current)) {
1463 struct rt_mutex *mutex = &rwm->mutex;
1464 struct reader_lock_struct *rls;
1465
1466 spin_lock(&mutex->wait_lock);
1467 rls = ¤t->owned_read_locks[reader_count];
1468 if (!rls->list.prev || list_empty(&rls->list))
1469 list_add(&rls->list, &rwm->readers);
1470 spin_unlock(&mutex->wait_lock);
1471 }
1472 local_irq_restore(flags);
1473 return 1;
1474 }
1475 return 0;
1476 }
1477
1478 static inline void
1479 rt_read_fastlock(struct rw_mutex *rwm,
1480 void (*slowfn)(struct rw_mutex *rwm, int mtx),
1481 int mtx)
1482 {
1483 if (unlikely(!__rt_read_fasttrylock(rwm)))
1484 slowfn(rwm, mtx);
1485 }
1486
1487 void rt_mutex_down_read(struct rw_mutex *rwm)
1488 {
1489 rt_read_fastlock(rwm, rt_read_slowlock, 1);
1490 }
1491
1492 void rt_rwlock_read_lock(struct rw_mutex *rwm)
1493 {
1494 rt_read_fastlock(rwm, rt_read_slowlock, 0);
1495 }
1496
1497 static inline int
1498 rt_read_slowtrylock(struct rw_mutex *rwm, int mtx)
1499 {
1500 struct rt_mutex *mutex = &rwm->mutex;
1501 unsigned long flags;
1502 int ret = 0;
1503
1504 spin_lock_irqsave(&mutex->wait_lock, flags);
1505 init_rw_lists(rwm);
1506
1507 if (try_to_take_rw_read(rwm, mtx))
1508 ret = 1;
1509
1510 spin_unlock_irqrestore(&mutex->wait_lock, flags);
1511
1512 return ret;
1513 }
1514
1515 static inline int
1516 rt_read_fasttrylock(struct rw_mutex *rwm,
1517 int (*slowfn)(struct rw_mutex *rwm, int mtx), int mtx)
1518 {
1519 if (likely(__rt_read_fasttrylock(rwm)))
1520 return 1;
1521 else
1522 return slowfn(rwm, mtx);
1523 }
1524
1525 int __sched rt_mutex_down_read_trylock(struct rw_mutex *rwm)
1526 {
1527 return rt_read_fasttrylock(rwm, rt_read_slowtrylock, 1);
1528 }
1529
1530 static void
1531 rt_write_slowlock(struct rw_mutex *rwm, int mtx)
1532 {
1533 struct rt_mutex *mutex = &rwm->mutex;
1534 struct rt_mutex_waiter waiter;
1535 int saved_lock_depth = -1;
1536 unsigned long flags, saved_state = -1, state;
1537
1538 debug_rt_mutex_init_waiter(&waiter);
1539 waiter.task = NULL;
1540
1541 /* we do PI different for writers that are blocked */
1542 waiter.write_lock = 1;
1543
1544 spin_lock_irqsave(&mutex->wait_lock, flags);
1545 init_rw_lists(rwm);
1546
1547 if (try_to_take_rw_write(rwm, mtx)) {
1548 spin_unlock_irqrestore(&mutex->wait_lock, flags);
1549 return;
1550 }
1551 update_rw_mutex_owner(rwm);
1552
1553 if (mtx) {
1554 /*
1555 * We drop the BKL here before we go into the wait loop to avoid a
1556 * possible deadlock in the scheduler.
1557 */
1558 if (unlikely(current->lock_depth >= 0))
1559 saved_lock_depth = rt_release_bkl(mutex, flags);
1560 set_current_state(TASK_UNINTERRUPTIBLE);
1561 } else {
1562 /* Spin locks must preserve the BKL */
1563 saved_lock_depth = current->lock_depth;
1564 saved_state = xchg(¤t->state, TASK_UNINTERRUPTIBLE);
1565 }
1566
1567 for (;;) {
1568 unsigned long saved_flags;
1569
1570 /* Try to acquire the lock: */
1571 if (try_to_take_rw_write(rwm, mtx))
1572 break;
1573 update_rw_mutex_owner(rwm);
1574
1575 /*
1576 * waiter.task is NULL the first time we come here and
1577 * when we have been woken up by the previous owner
1578 * but the lock got stolen by a higher prio task.
1579 */
1580 if (!waiter.task) {
1581 task_blocks_on_rt_mutex(mutex, &waiter, 0, flags);
1582 /* Wakeup during boost ? */
1583 if (unlikely(!waiter.task))
1584 continue;
1585 }
1586 saved_flags = current->flags & PF_NOSCHED;
1587 current->flags &= ~PF_NOSCHED;
1588 if (!mtx)
1589 current->lock_depth = -1;
1590
1591 spin_unlock_irqrestore(&mutex->wait_lock, flags);
1592
1593 debug_rt_mutex_print_deadlock(&waiter);
1594
1595 if (!mtx || waiter.task)
1596 schedule_rt_mutex(mutex);
1597
1598 spin_lock_irqsave(&mutex->wait_lock, flags);
1599
1600 current->flags |= saved_flags;
1601 if (mtx)
1602 set_current_state(TASK_UNINTERRUPTIBLE);
1603 else {
1604 current->lock_depth = saved_lock_depth;
1605 state = xchg(¤t->state, TASK_UNINTERRUPTIBLE);
1606 if (unlikely(state == TASK_RUNNING))
1607 saved_state = TASK_RUNNING;
1608 }
1609 }
1610
1611 if (mtx)
1612 set_current_state(TASK_RUNNING);
1613 else {
1614 state = xchg(¤t->state, saved_state);
1615 if (unlikely(state == TASK_RUNNING))
1616 current->state = TASK_RUNNING;
1617 }
1618
1619 if (unlikely(waiter.task))
1620 remove_waiter(mutex, &waiter, flags);
1621
1622 /* check on unlock if we have any waiters. */
1623 if (rt_mutex_has_waiters(mutex))
1624 mark_rt_rwlock_check(rwm);
1625
1626 spin_unlock_irqrestore(&mutex->wait_lock, flags);
1627
1628 /* Must we reaquire the BKL? */
1629 if (mtx && unlikely(saved_lock_depth >= 0))
1630 rt_reacquire_bkl(saved_lock_depth);
1631
1632 debug_rt_mutex_free_waiter(&waiter);
1633
1634 }
1635
1636 static inline void
1637 rt_write_fastlock(struct rw_mutex *rwm,
1638 void (*slowfn)(struct rw_mutex *rwm, int mtx), int mtx)
1639 {
1640 struct task_struct *val = (void *)((unsigned long)current |
1641 RT_RWLOCK_WRITER);
1642
1643 if (likely(rt_rwlock_cmpxchg(rwm, NULL, val)))
1644 rt_mutex_deadlock_account_lock(&rwm->mutex, current);
1645 else
1646 slowfn(rwm, mtx);
1647 }
1648
1649 void rt_mutex_down_write(struct rw_mutex *rwm)
1650 {
1651 rt_write_fastlock(rwm, rt_write_slowlock, 1);
1652 }
1653
1654 void rt_rwlock_write_lock(struct rw_mutex *rwm)
1655 {
1656 rt_write_fastlock(rwm, rt_write_slowlock, 0);
1657 }
1658
1659 static int
1660 rt_write_slowtrylock(struct rw_mutex *rwm, int mtx)
1661 {
1662 struct rt_mutex *mutex = &rwm->mutex;
1663 unsigned long flags;
1664 int ret = 0;
1665
1666 spin_lock_irqsave(&mutex->wait_lock, flags);
1667 init_rw_lists(rwm);
1668
1669 if (try_to_take_rw_write(rwm, mtx))
1670 ret = 1;
1671
1672 spin_unlock_irqrestore(&mutex->wait_lock, flags);
1673
1674 return ret;
1675 }
1676
1677 static inline int
1678 rt_write_fasttrylock(struct rw_mutex *rwm,
1679 int (*slowfn)(struct rw_mutex *rwm, int mtx), int mtx)
1680 {
1681 struct task_struct *val = (void *)((unsigned long)current |
1682 RT_RWLOCK_WRITER);
1683
1684 if (likely(rt_rwlock_cmpxchg(rwm, NULL, val))) {
1685 rt_mutex_deadlock_account_lock(&rwm->mutex, current);
1686 return 1;
1687 } else
1688 return slowfn(rwm, mtx);
1689 }
1690
1691 int rt_mutex_down_write_trylock(struct rw_mutex *rwm)
1692 {
1693 return rt_write_fasttrylock(rwm, rt_write_slowtrylock, 1);
1694 }
1695
1696 static void noinline __sched
1697 rt_read_slowunlock(struct rw_mutex *rwm, int mtx)
1698 {
1699 struct rt_mutex *mutex = &rwm->mutex;
1700 struct rt_mutex_waiter *waiter;
1701 struct task_struct *pendowner;
1702 struct reader_lock_struct *rls;
1703 unsigned long flags;
1704 unsigned int reader_count;
1705 int savestate = !mtx;
1706 int i;
1707
1708 spin_lock_irqsave(&mutex->wait_lock, flags);
1709
1710 rt_mutex_deadlock_account_unlock(current);
1711
1712 /*
1713 * To prevent multiple readers from zeroing out the owner
1714 * when the count goes to zero and then have another task
1715 * grab the task. We mark the lock. This makes all tasks
1716 * go to the slow path. Then we can check the owner without
1717 * worry that it changed.
1718 */
1719 mark_rt_rwlock_check(rwm);
1720
1721 for (i = current->reader_lock_count - 1; i >= 0; i--) {
1722 if (current->owned_read_locks[i].lock == rwm) {
1723 spin_lock(¤t->pi_lock);
1724 current->owned_read_locks[i].count--;
1725 if (!current->owned_read_locks[i].count) {
1726 current->reader_lock_count--;
1727 WARN_ON_ONCE(i != current->reader_lock_count);
1728 atomic_dec(&rwm->owners);
1729 rls = ¤t->owned_read_locks[i];
1730 WARN_ON(!rls->list.prev || list_empty(&rls->list));
1731 list_del_init(&rls->list);
1732 rls->lock = NULL;
1733 rw_check_held(rwm);
1734 }
1735 spin_unlock(¤t->pi_lock);
1736 break;
1737 }
1738 }
1739 WARN_ON_ONCE(i < 0);
1740
1741 /*
1742 * If the last two (or more) readers unlocked at the same
1743 * time, the owner could be cleared since the count went to
1744 * zero. If this has happened, the rwm owner will not
1745 * be set to current or readers. This means that another reader
1746 * already reset the lock, so there is nothing left to do.
1747 */
1748 if (unlikely(rt_rwlock_owner(rwm) != current &&
1749 rt_rwlock_owner(rwm) != RT_RW_READER)) {
1750 /* Update the owner if necessary */
1751 rt_rwlock_update_owner(rwm, rt_rwlock_owner(rwm));
1752 goto out;
1753 }
1754
1755 /*
1756 * If there are more readers and we are under the limit
1757 * let the last reader do the wakeups.
1758 */
1759 reader_count = atomic_read(&rwm->count);
1760 if (reader_count &&
1761 (!rt_rwlock_limit || atomic_read(&rwm->owners) >= rt_rwlock_limit))
1762 goto out;
1763
1764 /* If no one is blocked, then clear all ownership */
1765 if (!rt_mutex_has_waiters(mutex)) {
1766 rwm->prio = MAX_PRIO;
1767 /*
1768 * If count is not zero, we are under the limit with
1769 * no other readers.
1770 */
1771 if (reader_count)
1772 goto out;
1773
1774 /* We could still have a pending reader waiting */
1775 if (rt_mutex_owner_pending(mutex)) {
1776 /* set the rwm back to pending */
1777 rwm->owner = RT_RWLOCK_PENDING_READ;
1778 } else {
1779 rwm->owner = NULL;
1780 mutex->owner = NULL;
1781 }
1782 goto out;
1783 }
1784
1785 /*
1786 * If the next waiter is a reader, this can be because of
1787 * two things. One is that we hit the reader limit, or
1788 * Two, there is a pending writer.
1789 * We still only wake up one reader at a time (even if
1790 * we could wake up more). This is because we dont
1791 * have any idea if a writer is pending.
1792 */
1793 waiter = rt_mutex_top_waiter(mutex);
1794 if (waiter->write_lock) {
1795 /* only wake up if there are no readers */
1796 if (reader_count)
1797 goto out;
1798 rwm->owner = RT_RWLOCK_PENDING_WRITE;
1799 } else {
1800 /*
1801 * It is also possible that the reader limit decreased.
1802 * If the limit did decrease, we may not be able to
1803 * wake up the reader if we are currently above the limit.
1804 */
1805 if (rt_rwlock_limit &&
1806 unlikely(atomic_read(&rwm->owners) >= rt_rwlock_limit))
1807 goto out;
1808 if (!reader_count)
1809 rwm->owner = RT_RWLOCK_PENDING_READ;
1810 }
1811
1812 pendowner = waiter->task;
1813 wakeup_next_waiter(mutex, savestate);
1814
1815 /*
1816 * If we woke up a reader but the lock is already held by readers
1817 * we need to set the mutex owner to RT_RW_READER, since the
1818 * wakeup_next_waiter set it to the pending reader.
1819 */
1820 if (reader_count) {
1821 WARN_ON(waiter->write_lock);
1822 rt_mutex_set_owner(mutex, RT_RW_READER, 0);
1823 }
1824
1825 if (rt_mutex_has_waiters(mutex)) {
1826 waiter = rt_mutex_top_waiter(mutex);
1827 rwm->prio = waiter->task->prio;
1828 /*
1829 * If readers still own this lock, then we need
1830 * to update the pi_list too. Readers have a separate
1831 * path in the PI chain.
1832 */
1833 if (reader_count) {
1834 spin_lock(&pendowner->pi_lock);
1835 plist_del(&waiter->pi_list_entry,
1836 &pendowner->pi_waiters);
1837 spin_unlock(&pendowner->pi_lock);
1838 }
1839 } else
1840 rwm->prio = MAX_PRIO;
1841
1842 out:
1843 spin_unlock_irqrestore(&mutex->wait_lock, flags);
1844
1845 /* Undo pi boosting.when necessary */
1846 rt_mutex_adjust_prio(current);
1847 }
1848
1849 static inline void
1850 rt_read_fastunlock(struct rw_mutex *rwm,
1851 void (*slowfn)(struct rw_mutex *rwm, int mtx), int mtx)
1852 {
1853 WARN_ON(!atomic_read(&rwm->count));
1854 WARN_ON(!atomic_read(&rwm->owners));
1855 WARN_ON(!rwm->owner);
1856 smp_mb();
1857 atomic_dec(&rwm->count);
1858 if (likely(rt_rwlock_cmpxchg(rwm, current, NULL))) {
1859 struct reader_lock_struct *rls;
1860 unsigned long flags;
1861 int reader_count;
1862 int owners;
1863
1864 spin_lock_irqsave(¤t->pi_lock, flags);
1865 reader_count = --current->reader_lock_count;
1866 spin_unlock_irqrestore(¤t->pi_lock, flags);
1867
1868 rt_mutex_deadlock_account_unlock(current);
1869 if (unlikely(reader_count < 0)) {
1870 reader_count = 0;
1871 WARN_ON_ONCE(1);
1872 }
1873 owners = atomic_dec_return(&rwm->owners);
1874 if (unlikely(owners < 0)) {
1875 atomic_set(&rwm->owners, 0);
1876 WARN_ON_ONCE(1);
1877 }
1878 rls = ¤t->owned_read_locks[reader_count];
1879 WARN_ON_ONCE(rls->lock != rwm);
1880 WARN_ON(rls->list.prev && !list_empty(&rls->list));
1881 WARN_ON(rls->count != 1);
1882 rls->lock = NULL;
1883 rw_check_held(rwm);
1884 } else
1885 slowfn(rwm, mtx);
1886 }
1887
1888 void rt_mutex_up_read(struct rw_mutex *rwm)
1889 {
1890 rt_read_fastunlock(rwm, rt_read_slowunlock, 1);
1891 }
1892
1893 void rt_rwlock_read_unlock(struct rw_mutex *rwm)
1894 {
1895 rt_read_fastunlock(rwm, rt_read_slowunlock, 0);
1896 }
1897
1898 static void noinline __sched
1899 rt_write_slowunlock(struct rw_mutex *rwm, int mtx)
1900 {
1901 struct rt_mutex *mutex = &rwm->mutex;
1902 struct rt_mutex_waiter *waiter;
1903 struct task_struct *pendowner;
1904 int savestate = !mtx;
1905 unsigned long flags;
1906
1907 spin_lock_irqsave(&mutex->wait_lock, flags);
1908
1909 rt_mutex_deadlock_account_unlock(current);
1910
1911 if (!rt_mutex_has_waiters(mutex)) {
1912 rwm->owner = NULL;
1913 mutex->owner = NULL;
1914 spin_unlock_irqrestore(&mutex->wait_lock, flags);
1915 return;
1916 }
1917
1918 debug_rt_mutex_unlock(mutex);
1919
1920 /*
1921 * This is where it gets a bit tricky.
1922 * We can have both readers and writers waiting below us.
1923 * They are ordered by priority. For each reader we wake
1924 * up, we check to see if there's another reader waiting.
1925 * If that is the case, we continue to wake up the readers
1926 * until we hit a writer. Once we hit a writer, then we
1927 * stop (and don't wake it up).
1928 *
1929 * If the next waiter is a writer, than we just wake up
1930 * the writer and we are done.
1931 */
1932
1933 waiter = rt_mutex_top_waiter(mutex);
1934 pendowner = waiter->task;
1935 wakeup_next_waiter(mutex, savestate);
1936
1937 /* another writer is next? */
1938 if (waiter->write_lock) {
1939 rwm->owner = RT_RWLOCK_PENDING_WRITE;
1940 goto out;
1941 }
1942
1943 rwm->owner = RT_RWLOCK_PENDING_READ;
1944
1945 if (!rt_mutex_has_waiters(mutex))
1946 goto out;
1947
1948 /*
1949 * Wake up all readers.
1950 * This gets a bit more complex. More than one reader can't
1951 * own the mutex. We give it to the first (highest prio)
1952 * reader, and then wake up the rest of the readers until
1953 * we wake up all readers or come to a writer. The woken
1954 * up readers that don't own the lock will try to take it
1955 * when they schedule. Doing this lets a high prio writer
1956 * come along and steal the lock.
1957 */
1958 waiter = rt_mutex_top_waiter(mutex);
1959 while (waiter && !waiter->write_lock) {
1960 struct task_struct *reader = waiter->task;
1961
1962 spin_lock(&pendowner->pi_lock);
1963 plist_del(&waiter->list_entry, &mutex->wait_list);
1964
1965 /* nop if not on a list */
1966 plist_del(&waiter->pi_list_entry, &pendowner->pi_waiters);
1967 spin_unlock(&pendowner->pi_lock);
1968
1969 spin_lock(&reader->pi_lock);
1970 waiter->task = NULL;
1971 reader->pi_blocked_on = NULL;
1972 spin_unlock(&reader->pi_lock);
1973
1974 if (savestate)
1975 wake_up_process_mutex(reader);
1976 else
1977 wake_up_process(reader);
1978
1979 if (rt_mutex_has_waiters(mutex))
1980 waiter = rt_mutex_top_waiter(mutex);
1981 else
1982 waiter = NULL;
1983 }
1984
1985 /* If a writer is still pending, then update its plist. */
1986 if (rt_mutex_has_waiters(mutex)) {
1987 struct rt_mutex_waiter *next;
1988
1989 next = rt_mutex_top_waiter(mutex);
1990
1991 spin_lock(&pendowner->pi_lock);
1992 /* delete incase we didn't go through the loop */
1993 plist_del(&next->pi_list_entry, &pendowner->pi_waiters);
1994
1995 /* This could also be a reader (if reader_limit is set) */
1996 if (next->write_lock)
1997 /* add back in as top waiter */
1998 plist_add(&next->pi_list_entry, &pendowner->pi_waiters);
1999 spin_unlock(&pendowner->pi_lock);
2000
2001 rwm->prio = next->task->prio;
2002 } else
2003 rwm->prio = MAX_PRIO;
2004
2005 out:
2006
2007 spin_unlock_irqrestore(&mutex->wait_lock, flags);
2008
2009 /* Undo pi boosting.when necessary */
2010 rt_mutex_adjust_prio(current);
2011 }
2012
2013 static inline void
2014 rt_write_fastunlock(struct rw_mutex *rwm,
2015 void (*slowfn)(struct rw_mutex *rwm, int mtx), int mtx)
2016 {
2017 struct task_struct *val = (void *)((unsigned long)current |
2018 RT_RWLOCK_WRITER);
2019
2020 WARN_ON(rt_rwlock_owner(rwm) != current);
2021 if (likely(rt_rwlock_cmpxchg(rwm, (struct task_struct *)val, NULL)))
2022 rt_mutex_deadlock_account_unlock(current);
2023 else
2024 slowfn(rwm, mtx);
2025 }
2026
2027 void rt_mutex_up_write(struct rw_mutex *rwm)
2028 {
2029 rt_write_fastunlock(rwm, rt_write_slowunlock, 1);
2030 }
2031
2032 void rt_rwlock_write_unlock(struct rw_mutex *rwm)
2033 {
2034 rt_write_fastunlock(rwm, rt_write_slowunlock, 0);
2035 }
2036
2037 /*
2038 * We own the lock for write, and we want to convert it to a read,
2039 * so we simply take the lock as read, and wake up all other readers.
2040 */
2041 void __sched
2042 rt_mutex_downgrade_write(struct rw_mutex *rwm)
2043 {
2044 struct rt_mutex *mutex = &rwm->mutex;
2045 struct reader_lock_struct *rls;
2046 struct rt_mutex_waiter *waiter;
2047 unsigned long flags;
2048 int reader_count;
2049
2050 spin_lock_irqsave(&mutex->wait_lock, flags);
2051 init_rw_lists(rwm);
2052
2053 /* we have the lock and are sole owner, then update the accounting */
2054 atomic_inc(&rwm->count);
2055 atomic_inc(&rwm->owners);
2056 rw_check_held(rwm);
2057
2058 spin_lock(¤t->pi_lock);
2059 reader_count = current->reader_lock_count++;
2060 rls = ¤t->owned_read_locks[reader_count];
2061 if (likely(reader_count < MAX_RWLOCK_DEPTH)) {
2062 rls->lock = rwm;
2063 rls->count = 1;
2064 } else
2065 WARN_ON_ONCE(1);
2066 spin_unlock(¤t->pi_lock);
2067
2068 if (!rt_mutex_has_waiters(mutex)) {
2069 /* We are sole owner, we are done */
2070 rwm->owner = current;
2071 rwm->prio = MAX_PRIO;
2072 mutex->owner = NULL;
2073 spin_unlock_irqrestore(&mutex->wait_lock, flags);
2074 return;
2075 }
2076
2077 /* Set us up for multiple readers or conflicts */
2078
2079 list_add(&rls->list, &rwm->readers);
2080 rwm->owner = RT_RW_READER;
2081
2082 /*
2083 * This is like the write unlock, but we already own the
2084 * reader. We still want to wake up other readers that are
2085 * waiting, until we hit the reader limit, or a writer.
2086 */
2087
2088 waiter = rt_mutex_top_waiter(mutex);
2089 while (waiter && !waiter->write_lock) {
2090 struct task_struct *reader = waiter->task;
2091
2092 spin_lock(¤t->pi_lock);
2093 plist_del(&waiter->list_entry, &mutex->wait_list);
2094
2095 /* nop if not on a list */
2096 plist_del(&waiter->pi_list_entry, ¤t->pi_waiters);
2097 spin_unlock(¤t->pi_lock);
2098
2099 spin_lock(&reader->pi_lock);
2100 waiter->task = NULL;
2101 reader->pi_blocked_on = NULL;
2102 spin_unlock(&reader->pi_lock);
2103
2104 /* downgrade is only for mutexes */
2105 wake_up_process(reader);
2106
2107 if (rt_mutex_has_waiters(mutex))
2108 waiter = rt_mutex_top_waiter(mutex);
2109 else
2110 waiter = NULL;
2111 }
2112
2113 /* If a writer is still pending, then update its plist. */
2114 if (rt_mutex_has_waiters(mutex)) {
2115 struct rt_mutex_waiter *next;
2116
2117 next = rt_mutex_top_waiter(mutex);
2118
2119 /* setup this mutex prio for read */
2120 rwm->prio = next->task->prio;
2121
2122 spin_lock(¤t->pi_lock);
2123 /* delete incase we didn't go through the loop */
2124 plist_del(&next->pi_list_entry, ¤t->pi_waiters);
2125 spin_unlock(¤t->pi_lock);
2126 /* No need to add back since readers don't have PI waiters */
2127 } else
2128 rwm->prio = MAX_PRIO;
2129
2130 rt_mutex_set_owner(mutex, RT_RW_READER, 0);
2131
2132 spin_unlock_irqrestore(&mutex->wait_lock, flags);
2133
2134 /*
2135 * Undo pi boosting when necessary.
2136 * If one of the awoken readers boosted us, we don't want to keep
2137 * that priority.
2138 */
2139 rt_mutex_adjust_prio(current);
2140 }
2141
2142 void rt_mutex_rwsem_init(struct rw_mutex *rwm, const char *name)
2143 {
2144 struct rt_mutex *mutex = &rwm->mutex;
2145
2146 rwm->owner = NULL;
2147 atomic_set(&rwm->count, 0);
2148 atomic_set(&rwm->owners, 0);
2149 rwm->prio = MAX_PRIO;
2150 INIT_LIST_HEAD(&rwm->readers);
2151
2152 __rt_mutex_init(mutex, name);
2153 }
2154
2155 static int rt_mutex_get_readers_prio(struct task_struct *task, int prio)
2156 {
2157 struct reader_lock_struct *rls;
2158 struct rw_mutex *rwm;
2159 int lock_prio;
2160 int i;
2161
2162 for (i = 0; i < task->reader_lock_count; i++) {
2163 rls = &task->owned_read_locks[i];
2164 rwm = rls->lock;
2165 if (rwm) {
2166 lock_prio = rwm->prio;
2167 if (prio > lock_prio)
2168 prio = lock_prio;
2169 }
2170 }
2171
2172 return prio;
2173 }
2174
2175 static int rt_mutex_adjust_readers(struct rt_mutex *orig_lock,
2176 struct rt_mutex_waiter *orig_waiter,
2177 struct task_struct *top_task,
2178 struct rt_mutex *lock,
2179 int recursion_depth)
2180 {
2181 struct reader_lock_struct *rls;
2182 struct rt_mutex_waiter *waiter;
2183 struct task_struct *task;
2184 struct rw_mutex *rwm = container_of(lock, struct rw_mutex, mutex);
2185
2186 if (rt_mutex_has_waiters(lock)) {
2187 waiter = rt_mutex_top_waiter(lock);
2188 /*
2189 * Do we need to grab the task->pi_lock?
2190 * Really, we are only reading it. If it
2191 * changes, then that should follow this chain
2192 * too.
2193 */
2194 rwm->prio = waiter->task->prio;
2195 } else
2196 rwm->prio = MAX_PRIO;
2197
2198 if (recursion_depth >= MAX_RWLOCK_DEPTH) {
2199 WARN_ON(1);
2200 return 1;
2201 }
2202
2203 list_for_each_entry(rls, &rwm->readers, list) {
2204 task = rls->task;
2205 get_task_struct(task);
2206 /*
2207 * rt_mutex_adjust_prio_chain will do
2208 * the put_task_struct
2209 */
2210 rt_mutex_adjust_prio_chain(task, 0, orig_lock,
2211 orig_waiter, top_task,
2212 recursion_depth+1);
2213 }
2214
2215 return 0;
2216 }
2217 #else
2218 static int rt_mutex_adjust_readers(struct rt_mutex *orig_lock,
2219 struct rt_mutex_waiter *orig_waiter,
2220 struct task_struct *top_task,
2221 struct rt_mutex *lock,
2222 int recursion_depth)
2223 {
2224 return 0;
2225 }
2226
2227 static int rt_mutex_get_readers_prio(struct task_struct *task, int prio)
2228 {
2229 return prio;
2230 }
2231 #endif /* CONFIG_PREEMPT_RT */
2232
2233 static inline int rt_release_bkl(struct rt_mutex *lock, unsigned long flags)
2234 {
2235 int saved_lock_depth = current->lock_depth;
2236
2237 current->lock_depth = -1;
2238 /*
2239 * try_to_take_lock set the waiters, make sure it's
2240 * still correct.
2241 */
2242 fixup_rt_mutex_waiters(lock);
2243 spin_unlock_irqrestore(&lock->wait_lock, flags);
2244
2245 up(&kernel_sem);
2246
2247 spin_lock_irq(&lock->wait_lock);
2248
2249 return saved_lock_depth;
2250 }
2251
2252 static inline void rt_reacquire_bkl(int saved_lock_depth)
2253 {
2254 down(&kernel_sem);
2255 current->lock_depth = saved_lock_depth;
2256 }
2257
2258 /*
2259 * Slow path lock function:
2260 */
2261 static int __sched
2262 rt_mutex_slowlock(struct rt_mutex *lock, int state,
2263 struct hrtimer_sleeper *timeout,
2264 int detect_deadlock)
2265 {
2266 int ret = 0, saved_lock_depth = -1;
2267 struct rt_mutex_waiter waiter;
2268 unsigned long flags;
2269
2270 debug_rt_mutex_init_waiter(&waiter);
2271 waiter.task = NULL;
2272 waiter.write_lock = 0;
2273
2274 spin_lock_irqsave(&lock->wait_lock, flags);
2275 init_lists(lock);
2276
2277 /* Try to acquire the lock again: */
2278 if (try_to_take_rt_mutex(lock)) {
2279 spin_unlock_irqrestore(&lock->wait_lock, flags);
2280 return 0;
2281 }
2282
2283 /*
2284 * We drop the BKL here before we go into the wait loop to avoid a
2285 * possible deadlock in the scheduler.
2286 */
2287 if (unlikely(current->lock_depth >= 0))
2288 saved_lock_depth = rt_release_bkl(lock, flags);
2289
2290 set_current_state(state);
2291
2292 /* Setup the timer, when timeout != NULL */
2293 if (unlikely(timeout)) {
2294 hrtimer_start(&timeout->timer, timeout->timer.expires,
2295 HRTIMER_MODE_ABS);
2296 if (!hrtimer_active(&timeout->timer))
2297 timeout->task = NULL;
2298 }
2299
2300 for (;;) {
2301 unsigned long saved_flags;
2302
2303 /* Try to acquire the lock: */
2304 if (try_to_take_rt_mutex(lock))
2305 break;
2306
2307 /*
2308 * TASK_INTERRUPTIBLE checks for signals and
2309 * timeout. Ignored otherwise.
2310 */
2311 if (unlikely(state == TASK_INTERRUPTIBLE)) {
2312 /* Signal pending? */
2313 if (signal_pending(current))
2314 ret = -EINTR;
2315 if (timeout && !timeout->task)
2316 ret = -ETIMEDOUT;
2317 if (ret)
2318 break;
2319 }
2320
2321 /*
2322 * waiter.task is NULL the first time we come here and
2323 * when we have been woken up by the previous owner
2324 * but the lock got stolen by a higher prio task.
2325 */
2326 if (!waiter.task) {
2327 ret = task_blocks_on_rt_mutex(lock, &waiter,
2328 detect_deadlock, flags);
2329 /*
2330 * If we got woken up by the owner then start loop
2331 * all over without going into schedule to try
2332 * to get the lock now:
2333 */
2334 if (unlikely(!waiter.task)) {
2335 /*
2336 * Reset the return value. We might
2337 * have returned with -EDEADLK and the
2338 * owner released the lock while we
2339 * were walking the pi chain.
2340 */
2341 ret = 0;
2342 continue;
2343 }
2344 if (unlikely(ret))
2345 break;
2346 }
2347 saved_flags = current->flags & PF_NOSCHED;
2348 current->flags &= ~PF_NOSCHED;
2349
2350 spin_unlock_irq(&lock->wait_lock);
2351
2352 debug_rt_mutex_print_deadlock(&waiter);
2353
2354 if (waiter.task)
2355 schedule_rt_mutex(lock);
2356
2357 spin_lock_irq(&lock->wait_lock);
2358
2359 current->flags |= saved_flags;
2360 set_current_state(state);
2361 }
2362
2363 set_current_state(TASK_RUNNING);
2364
2365 if (unlikely(waiter.task))
2366 remove_waiter(lock, &waiter, flags);
2367
2368 /*
2369 * try_to_take_rt_mutex() sets the waiter bit
2370 * unconditionally. We might have to fix that up.
2371 */
2372 fixup_rt_mutex_waiters(lock);
2373
2374 spin_unlock_irqrestore(&lock->wait_lock, flags);
2375
2376 /* Remove pending timer: */
2377 if (unlikely(timeout))
2378 hrtimer_cancel(&timeout->timer);
2379
2380 /*
2381 * Readjust priority, when we did not get the lock. We might
2382 * have been the pending owner and boosted. Since we did not
2383 * take the lock, the PI boost has to go.
2384 */
2385 if (unlikely(ret))
2386 rt_mutex_adjust_prio(current);
2387
2388 /* Must we reaquire the BKL? */
2389 if (unlikely(saved_lock_depth >= 0))
2390 rt_reacquire_bkl(saved_lock_depth);
2391
2392 debug_rt_mutex_free_waiter(&waiter);
2393
2394 return ret;
2395 }
2396
2397 /*
2398 * Slow path try-lock function:
2399 */
2400 static inline int
2401 rt_mutex_slowtrylock(struct rt_mutex *lock)
2402 {
2403 unsigned long flags;
2404 int ret = 0;
2405
2406 spin_lock_irqsave(&lock->wait_lock, flags);
2407
2408 if (likely(rt_mutex_owner(lock) != current)) {
2409
2410 init_lists(lock);
2411
2412 ret = try_to_take_rt_mutex(lock);
2413 /*
2414 * try_to_take_rt_mutex() sets the lock waiters
2415 * bit unconditionally. Clean this up.
2416 */
2417 fixup_rt_mutex_waiters(lock);
2418 }
2419
2420 spin_unlock_irqrestore(&lock->wait_lock, flags);
2421
2422 return ret;
2423 }
2424
2425 /*
2426 * Slow path to release a rt-mutex:
2427 */
2428 static void __sched
2429 rt_mutex_slowunlock(struct rt_mutex *lock)
2430 {
2431 unsigned long flags;
2432
2433 spin_lock_irqsave(&lock->wait_lock, flags);
2434
2435 debug_rt_mutex_unlock(lock);
2436
2437 rt_mutex_deadlock_account_unlock(current);
2438
2439 if (!rt_mutex_has_waiters(lock)) {
2440 lock->owner = NULL;
2441 spin_unlock_irqrestore(&lock->wait_lock, flags);
2442 return;
2443 }
2444
2445 wakeup_next_waiter(lock, 0);
2446
2447 spin_unlock_irqrestore(&lock->wait_lock, flags);
2448
2449 /* Undo pi boosting if necessary: */
2450 rt_mutex_adjust_prio(current);
2451 }
2452
2453 /*
2454 * debug aware fast / slowpath lock,trylock,unlock
2455 *
2456 * The atomic acquire/release ops are compiled away, when either the
2457 * architecture does not support cmpxchg or when debugging is enabled.
2458 */
2459 static inline int
2460 rt_mutex_fastlock(struct rt_mutex *lock, int state,
2461 int detect_deadlock,
2462 int (*slowfn)(struct rt_mutex *lock, int state,
2463 struct hrtimer_sleeper *timeout,
2464 int detect_deadlock))
2465 {
2466 if (!detect_deadlock && likely(rt_mutex_cmpxchg(lock, NULL, current))) {
2467 rt_mutex_deadlock_account_lock(lock, current);
2468 return 0;
2469 } else
2470 return slowfn(lock, state, NULL, detect_deadlock);
2471 }
2472
2473 static inline int
2474 rt_mutex_timed_fastlock(struct rt_mutex *lock, int state,
2475 struct hrtimer_sleeper *timeout, int detect_deadlock,
2476 int (*slowfn)(struct rt_mutex *lock, int state,
2477 struct hrtimer_sleeper *timeout,
2478 int detect_deadlock))
2479 {
2480 if (!detect_deadlock && likely(rt_mutex_cmpxchg(lock, NULL, current))) {
2481 rt_mutex_deadlock_account_lock(lock, current);
2482 return 0;
2483 } else
2484 return slowfn(lock, state, timeout, detect_deadlock);
2485 }
2486
2487 static inline int
2488 rt_mutex_fasttrylock(struct rt_mutex *lock,
2489 int (*slowfn)(struct rt_mutex *lock))
2490 {
2491 if (likely(rt_mutex_cmpxchg(lock, NULL, current))) {
2492 rt_mutex_deadlock_account_lock(lock, current);
2493 return 1;
2494 }
2495 return slowfn(lock);
2496 }
2497
2498 static inline void
2499 rt_mutex_fastunlock(struct rt_mutex *lock,
2500 void (*slowfn)(struct rt_mutex *lock))
2501 {
2502 if (likely(rt_mutex_cmpxchg(lock, current, NULL)))
2503 rt_mutex_deadlock_account_unlock(current);
2504 else
2505 slowfn(lock);
2506 }
2507
2508 /**
2509 * rt_mutex_lock_killable - lock a rt_mutex killable
2510 *
2511 * @lock: the rt_mutex to be locked
2512 * @detect_deadlock: deadlock detection on/off
2513 *
2514 * Returns:
2515 * 0 on success
2516 * -EINTR when interrupted by a signal
2517 * -EDEADLK when the lock would deadlock (when deadlock detection is on)
2518 */
2519 int __sched rt_mutex_lock_killable(struct rt_mutex *lock,
2520 int detect_deadlock)
2521 {
2522 might_sleep();
2523
2524 return rt_mutex_fastlock(lock, TASK_KILLABLE,
2525 detect_deadlock, rt_mutex_slowlock);
2526 }
2527 EXPORT_SYMBOL_GPL(rt_mutex_lock_killable);
2528
2529 /**
2530 * rt_mutex_lock - lock a rt_mutex
2531 *
2532 * @lock: the rt_mutex to be locked
2533 */
2534 void __sched rt_mutex_lock(struct rt_mutex *lock)
2535 {
2536 might_sleep();
2537
2538 rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, 0, rt_mutex_slowlock);
2539 }
2540 EXPORT_SYMBOL_GPL(rt_mutex_lock);
2541
2542 /**
2543 * rt_mutex_lock_interruptible - lock a rt_mutex interruptible
2544 *
2545 * @lock: the rt_mutex to be locked
2546 * @detect_deadlock: deadlock detection on/off
2547 *
2548 * Returns:
2549 * 0 on success
2550 * -EINTR when interrupted by a signal
2551 * -EDEADLK when the lock would deadlock (when deadlock detection is on)
2552 */
2553 int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock,
2554 int detect_deadlock)
2555 {
2556 might_sleep();
2557
2558 return rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE,
2559 detect_deadlock, rt_mutex_slowlock);
2560 }
2561 EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible);
2562
2563 /**
2564 * rt_mutex_lock_interruptible_ktime - lock a rt_mutex interruptible
2565 * the timeout structure is provided
2566 * by the caller
2567 *
2568 * @lock: the rt_mutex to be locked
2569 * @timeout: timeout structure or NULL (no timeout)
2570 * @detect_deadlock: deadlock detection on/off
2571 *
2572 * Returns:
2573 * 0 on success
2574 * -EINTR when interrupted by a signal
2575 * -ETIMEOUT when the timeout expired
2576 * -EDEADLK when the lock would deadlock (when deadlock detection is on)
2577 */
2578 int
2579 rt_mutex_timed_lock(struct rt_mutex *lock, struct hrtimer_sleeper *timeout,
2580 int detect_deadlock)
2581 {
2582 might_sleep();
2583
2584 return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout,
2585 detect_deadlock, rt_mutex_slowlock);
2586 }
2587 EXPORT_SYMBOL_GPL(rt_mutex_timed_lock);
2588
2589 /**
2590 * rt_mutex_trylock - try to lock a rt_mutex
2591 *
2592 * @lock: the rt_mutex to be locked
2593 *
2594 * Returns 1 on success and 0 on contention
2595 */
2596 int __sched rt_mutex_trylock(struct rt_mutex *lock)
2597 {
2598 return rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock);
2599 }
2600 EXPORT_SYMBOL_GPL(rt_mutex_trylock);
2601
2602 /**
2603 * rt_mutex_unlock - unlock a rt_mutex
2604 *
2605 * @lock: the rt_mutex to be unlocked
2606 */
2607 void __sched rt_mutex_unlock(struct rt_mutex *lock)
2608 {
2609 rt_mutex_fastunlock(lock, rt_mutex_slowunlock);
2610 }
2611 EXPORT_SYMBOL_GPL(rt_mutex_unlock);
2612
2613 /***
2614 * rt_mutex_destroy - mark a mutex unusable
2615 * @lock: the mutex to be destroyed
2616 *
2617 * This function marks the mutex uninitialized, and any subsequent
2618 * use of the mutex is forbidden. The mutex must not be locked when
2619 * this function is called.
2620 */
2621 void rt_mutex_destroy(struct rt_mutex *lock)
2622 {
2623 WARN_ON(rt_mutex_is_locked(lock));
2624 #ifdef CONFIG_DEBUG_RT_MUTEXES
2625 lock->magic = NULL;
2626 #endif
2627 }
2628
2629 EXPORT_SYMBOL_GPL(rt_mutex_destroy);
2630
2631 /**
2632 * __rt_mutex_init - initialize the rt lock
2633 *
2634 * @lock: the rt lock to be initialized
2635 *
2636 * Initialize the rt lock to unlocked state.
2637 *
2638 * Initializing of a locked rt lock is not allowed
2639 */
2640 void __rt_mutex_init(struct rt_mutex *lock, const char *name)
2641 {
2642 lock->owner = NULL;
2643 spin_lock_init(&lock->wait_lock);
2644 plist_head_init(&lock->wait_list, &lock->wait_lock);
2645
2646 debug_rt_mutex_init(lock, name);
2647 }
2648 EXPORT_SYMBOL_GPL(__rt_mutex_init);
2649
2650 /**
2651 * rt_mutex_init_proxy_locked - initialize and lock a rt_mutex on behalf of a
2652 * proxy owner
2653 *
2654 * @lock: the rt_mutex to be locked
2655 * @proxy_owner:the task to set as owner
2656 *
2657 * No locking. Caller has to do serializing itself
2658 * Special API call for PI-futex support
2659 */
2660 void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
2661 struct task_struct *proxy_owner)
2662 {
2663 __rt_mutex_init(lock, NULL);
2664 debug_rt_mutex_proxy_lock(lock, proxy_owner);
2665 rt_mutex_set_owner(lock, proxy_owner, 0);
2666 rt_mutex_deadlock_account_lock(lock, proxy_owner);
2667 }
2668
2669 /**
2670 * rt_mutex_proxy_unlock - release a lock on behalf of owner
2671 *
2672 * @lock: the rt_mutex to be locked
2673 *
2674 * No locking. Caller has to do serializing itself
2675 * Special API call for PI-futex support
2676 */
2677 void rt_mutex_proxy_unlock(struct rt_mutex *lock,
2678 struct task_struct *proxy_owner)
2679 {
2680 debug_rt_mutex_proxy_unlock(lock);
2681 rt_mutex_set_owner(lock, NULL, 0);
2682 rt_mutex_deadlock_account_unlock(proxy_owner);
2683 }
2684
2685 /**
2686 * rt_mutex_next_owner - return the next owner of the lock
2687 *
2688 * @lock: the rt lock query
2689 *
2690 * Returns the next owner of the lock or NULL
2691 *
2692 * Caller has to serialize against other accessors to the lock
2693 * itself.
2694 *
2695 * Special API call for PI-futex support
2696 */
2697 struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock)
2698 {
2699 if (!rt_mutex_has_waiters(lock))
2700 return NULL;
2701
2702 return rt_mutex_top_waiter(lock)->task;
2703 }
2704
|
This page was automatically generated by the
LXR engine.
|