Linux kernel & device driver programming

Cross-Referenced Linux and Device Driver Code

[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ]
Version: [ 2.6.11.8 ] [ 2.6.25 ] [ 2.6.25.8 ] [ 2.6.31.13 ] Architecture: [ i386 ]
  1 /*
  2  * Read-Copy Update preempt priority boosting
  3  *
  4  * This program is free software; you can redistribute it and/or modify
  5  * it under the terms of the GNU General Public License as published by
  6  * the Free Software Foundation; either version 2 of the License, or
  7  * (at your option) any later version.
  8  *
  9  * This program is distributed in the hope that it will be useful,
 10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12  * GNU General Public License for more details.
 13  *
 14  * You should have received a copy of the GNU General Public License
 15  * along with this program; if not, write to the Free Software
 16  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 17  *
 18  * Copyright Red Hat Inc, 2007
 19  *
 20  * Authors: Steven Rostedt <srostedt@redhat.com>
 21  *
 22  * Based on the original work by Paul McKenney <paulmck@us.ibm.com>.
 23  *
 24  */
 25 #include <linux/sched.h>
 26 #include <linux/list.h>
 27 #include <linux/spinlock.h>
 28 #include <linux/debugfs.h>
 29 #include <linux/module.h>
 30 #include <linux/syscalls.h>
 31 #include <linux/kthread.h>
 32 
 33 DEFINE_RAW_SPINLOCK(rcu_boost_wake_lock);
 34 static int rcu_boost_prio = MAX_PRIO;   /* Prio to set preempted RCU readers */
 35 static long rcu_boost_counter;          /* used to keep track of who boosted */
 36 static int rcu_preempt_thread_secs = 3; /* Seconds between waking rcupreemptd thread */
 37 
 38 struct rcu_boost_dat {
 39         raw_spinlock_t rbs_lock;        /* Sync changes to this struct */
 40         int rbs_prio;                   /* CPU copy of rcu_boost_prio  */
 41         struct list_head rbs_toboost;   /* Preempted RCU readers       */
 42         struct list_head rbs_boosted;   /* RCU readers that have been boosted */
 43 #ifdef CONFIG_RCU_TRACE
 44         /* The rest are for statistics */
 45         unsigned long rbs_stat_task_boost_called;
 46         unsigned long rbs_stat_task_boosted;
 47         unsigned long rbs_stat_boost_called;
 48         unsigned long rbs_stat_try_boost;
 49         unsigned long rbs_stat_boosted;
 50         unsigned long rbs_stat_unboost_called;
 51         unsigned long rbs_stat_unboosted;
 52         unsigned long rbs_stat_try_boost_readers;
 53         unsigned long rbs_stat_boost_readers;
 54         unsigned long rbs_stat_try_unboost_readers;
 55         unsigned long rbs_stat_unboost_readers;
 56         unsigned long rbs_stat_over_taken;
 57 #endif /* CONFIG_RCU_TRACE */
 58 };
 59 
 60 static DEFINE_PER_CPU(struct rcu_boost_dat, rcu_boost_data);
 61 #define RCU_BOOST_ME &__get_cpu_var(rcu_boost_data)
 62 
 63 #ifdef CONFIG_RCU_TRACE
 64 
 65 #define RCUPREEMPT_BOOST_TRACE_BUF_SIZE 4096
 66 static char rcupreempt_boost_trace_buf[RCUPREEMPT_BOOST_TRACE_BUF_SIZE];
 67 
 68 static ssize_t rcuboost_read(struct file *filp, char __user *buffer,
 69                                 size_t count, loff_t *ppos)
 70 {
 71         static DEFINE_MUTEX(mutex);
 72         int cnt = 0;
 73         int cpu;
 74         struct rcu_boost_dat *rbd;
 75         ssize_t bcount;
 76         unsigned long task_boost_called = 0;
 77         unsigned long task_boosted = 0;
 78         unsigned long boost_called = 0;
 79         unsigned long try_boost = 0;
 80         unsigned long boosted = 0;
 81         unsigned long unboost_called = 0;
 82         unsigned long unboosted = 0;
 83         unsigned long try_boost_readers = 0;
 84         unsigned long boost_readers = 0;
 85         unsigned long try_unboost_readers = 0;
 86         unsigned long unboost_readers = 0;
 87         unsigned long over_taken = 0;
 88 
 89         mutex_lock(&mutex);
 90 
 91         for_each_online_cpu(cpu) {
 92                 rbd = &per_cpu(rcu_boost_data, cpu);
 93 
 94                 task_boost_called += rbd->rbs_stat_task_boost_called;
 95                 task_boosted += rbd->rbs_stat_task_boosted;
 96                 boost_called += rbd->rbs_stat_boost_called;
 97                 try_boost += rbd->rbs_stat_try_boost;
 98                 boosted += rbd->rbs_stat_boosted;
 99                 unboost_called += rbd->rbs_stat_unboost_called;
100                 unboosted += rbd->rbs_stat_unboosted;
101                 try_boost_readers += rbd->rbs_stat_try_boost_readers;
102                 boost_readers += rbd->rbs_stat_boost_readers;
103                 try_unboost_readers += rbd->rbs_stat_try_boost_readers;
104                 unboost_readers += rbd->rbs_stat_boost_readers;
105                 over_taken += rbd->rbs_stat_over_taken;
106         }
107 
108         cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
109                         RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
110                         "task_boost_called = %ld\n",
111                         task_boost_called);
112         cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
113                         RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
114                         "task_boosted = %ld\n",
115                         task_boosted);
116         cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
117                         RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
118                         "boost_called = %ld\n",
119                         boost_called);
120         cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
121                         RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
122                         "try_boost = %ld\n",
123                         try_boost);
124         cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
125                         RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
126                         "boosted = %ld\n",
127                         boosted);
128         cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
129                         RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
130                         "unboost_called = %ld\n",
131                         unboost_called);
132         cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
133                         RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
134                         "unboosted = %ld\n",
135                         unboosted);
136         cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
137                         RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
138                         "try_boost_readers = %ld\n",
139                         try_boost_readers);
140         cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
141                         RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
142                         "boost_readers = %ld\n",
143                         boost_readers);
144         cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
145                         RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
146                         "try_unboost_readers = %ld\n",
147                         try_unboost_readers);
148         cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
149                         RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
150                         "unboost_readers = %ld\n",
151                         unboost_readers);
152         cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
153                         RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
154                         "over_taken = %ld\n",
155                         over_taken);
156         cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
157                         RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
158                         "rcu_boost_prio = %d\n",
159                         rcu_boost_prio);
160         bcount = simple_read_from_buffer(buffer, count, ppos,
161                         rcupreempt_boost_trace_buf, strlen(rcupreempt_boost_trace_buf));
162         mutex_unlock(&mutex);
163 
164         return bcount;
165 }
166 
167 static struct file_operations rcuboost_fops = {
168         .read = rcuboost_read,
169 };
170 
171 static struct dentry  *rcuboostdir;
172 int rcu_trace_boost_create(struct dentry *rcudir)
173 {
174         rcuboostdir = debugfs_create_file("rcuboost", 0444, rcudir,
175                                           NULL, &rcuboost_fops);
176         if (!rcuboostdir)
177                 return 0;
178 
179         return 1;
180 }
181 EXPORT_SYMBOL_GPL(rcu_trace_boost_create);
182 
183 void rcu_trace_boost_destroy(void)
184 {
185         if (rcuboostdir)
186                 debugfs_remove(rcuboostdir);
187         rcuboostdir = NULL;
188 }
189 EXPORT_SYMBOL_GPL(rcu_trace_boost_destroy);
190 
191 #define RCU_BOOST_TRACE_FUNC_DECL(type)                       \
192         static void rcu_trace_boost_##type(struct rcu_boost_dat *rbd)   \
193         {                                                               \
194                 rbd->rbs_stat_##type++;                                 \
195         }
196 RCU_BOOST_TRACE_FUNC_DECL(task_boost_called)
197 RCU_BOOST_TRACE_FUNC_DECL(task_boosted)
198 RCU_BOOST_TRACE_FUNC_DECL(boost_called)
199 RCU_BOOST_TRACE_FUNC_DECL(try_boost)
200 RCU_BOOST_TRACE_FUNC_DECL(boosted)
201 RCU_BOOST_TRACE_FUNC_DECL(unboost_called)
202 RCU_BOOST_TRACE_FUNC_DECL(unboosted)
203 RCU_BOOST_TRACE_FUNC_DECL(try_boost_readers)
204 RCU_BOOST_TRACE_FUNC_DECL(boost_readers)
205 RCU_BOOST_TRACE_FUNC_DECL(try_unboost_readers)
206 RCU_BOOST_TRACE_FUNC_DECL(unboost_readers)
207 RCU_BOOST_TRACE_FUNC_DECL(over_taken)
208 #else /* CONFIG_RCU_TRACE */
209 /* These were created by the above macro "RCU_BOOST_TRACE_FUNC_DECL" */
210 # define rcu_trace_boost_task_boost_called(rbd) do { } while (0)
211 # define rcu_trace_boost_task_boosted(rbd) do { } while (0)
212 # define rcu_trace_boost_boost_called(rbd) do { } while (0)
213 # define rcu_trace_boost_try_boost(rbd) do { } while (0)
214 # define rcu_trace_boost_boosted(rbd) do { } while (0)
215 # define rcu_trace_boost_unboost_called(rbd) do { } while (0)
216 # define rcu_trace_boost_unboosted(rbd) do { } while (0)
217 # define rcu_trace_boost_try_boost_readers(rbd) do { } while (0)
218 # define rcu_trace_boost_boost_readers(rbd) do { } while (0)
219 # define rcu_trace_boost_try_unboost_readers(rbd) do { } while (0)
220 # define rcu_trace_boost_unboost_readers(rbd) do { } while (0)
221 # define rcu_trace_boost_over_taken(rbd) do { } while (0)
222 #endif /* CONFIG_RCU_TRACE */
223 
224 static inline int rcu_is_boosted(struct task_struct *task)
225 {
226         return !list_empty(&task->rcub_entry);
227 }
228 
229 /*
230  * Helper function to boost a task's prio.
231  */
232 static void rcu_boost_task(struct task_struct *task)
233 {
234         WARN_ON(!irqs_disabled());
235         WARN_ON_SMP(!spin_is_locked(&task->pi_lock));
236 
237         rcu_trace_boost_task_boost_called(RCU_BOOST_ME);
238 
239         if (task->rcu_prio < task->prio) {
240                 rcu_trace_boost_task_boosted(RCU_BOOST_ME);
241                 task_setprio(task, task->rcu_prio);
242         }
243 }
244 
245 /**
246  * __rcu_preepmt_boost - Called by sleeping RCU readers.
247  *
248  * When the RCU read-side critical section is preempted
249  * (or schedules out due to RT mutex)
250  * it places itself onto a list to notify that it is sleeping
251  * while holding a RCU read lock. If there is already a
252  * synchronize_rcu happening, then it will increase its
253  * priority (if necessary).
254  */
255 void __rcu_preempt_boost(void)
256 {
257         struct task_struct *curr = current;
258         struct rcu_boost_dat *rbd;
259         int prio;
260         unsigned long flags;
261 
262         WARN_ON(!current->rcu_read_lock_nesting);
263 
264         rcu_trace_boost_boost_called(RCU_BOOST_ME);
265 
266         /* check to see if we are already boosted */
267         if (unlikely(rcu_is_boosted(curr)))
268                 return;
269 
270         /*
271          * To keep us from preempting between grabing
272          * the rbd and locking it, we use local_irq_save
273          */
274         local_irq_save(flags);
275         rbd = &__get_cpu_var(rcu_boost_data);
276         spin_lock(&rbd->rbs_lock);
277 
278         spin_lock(&curr->pi_lock);
279 
280         curr->rcub_rbdp = rbd;
281 
282         rcu_trace_boost_try_boost(rbd);
283 
284         prio = rt_mutex_getprio(curr);
285 
286         if (list_empty(&curr->rcub_entry))
287                 list_add_tail(&curr->rcub_entry, &rbd->rbs_toboost);
288         if (prio <= rbd->rbs_prio)
289                 goto out;
290 
291         rcu_trace_boost_boosted(curr->rcub_rbdp);
292 
293         set_rcu_prio(curr, rbd->rbs_prio);
294         rcu_boost_task(curr);
295 
296  out:
297         spin_unlock(&curr->pi_lock);
298         spin_unlock_irqrestore(&rbd->rbs_lock, flags);
299 }
300 
301 /**
302  * __rcu_preempt_unboost - called when releasing the RCU read lock
303  *
304  * When releasing the RCU read lock, a check is made to see if
305  * the task was preempted. If it was, it removes itself from the
306  * RCU data lists and if necessary, sets its priority back to
307  * normal.
308  */
309 void __rcu_preempt_unboost(void)
310 {
311         struct task_struct *curr = current;
312         struct rcu_boost_dat *rbd;
313         int prio;
314         unsigned long flags;
315 
316         rcu_trace_boost_unboost_called(RCU_BOOST_ME);
317 
318         /* if not boosted, then ignore */
319         if (likely(!rcu_is_boosted(curr)))
320                 return;
321 
322         /*
323          * Need to be very careful with NMIs.
324          * If we take the lock and an NMI comes in
325          * and it may try to unboost us if curr->rcub_rbdp
326          * is still set. So we zero it before grabbing the lock.
327          * But this also means that we might be boosted again
328          * so the boosting code needs to be aware of this.
329          */
330         rbd = curr->rcub_rbdp;
331         curr->rcub_rbdp = NULL;
332 
333         /*
334          * Now an NMI might have came in after we grab
335          * the below lock. This check makes sure that
336          * the NMI doesn't try grabbing the lock
337          * while we already have it.
338          */
339         if (unlikely(!rbd))
340                 return;
341 
342         spin_lock_irqsave(&rbd->rbs_lock, flags);
343         /*
344          * It is still possible that an NMI came in
345          * between the "is_boosted" check and setting
346          * the rcu_rbdp to NULL. This would mean that
347          * the NMI already dequeued us.
348          */
349         if (unlikely(!rcu_is_boosted(curr)))
350                 goto out;
351 
352         list_del_init(&curr->rcub_entry);
353 
354         rcu_trace_boost_unboosted(rbd);
355 
356         set_rcu_prio(curr, MAX_PRIO);
357 
358         spin_lock(&curr->pi_lock);
359         prio = rt_mutex_getprio(curr);
360         task_setprio(curr, prio);
361 
362         curr->rcub_rbdp = NULL;
363 
364         spin_unlock(&curr->pi_lock);
365  out:
366         spin_unlock_irqrestore(&rbd->rbs_lock, flags);
367 }
368 
369 /*
370  * For each rcu_boost_dat structure, update all the tasks that
371  * are on the lists to the priority of the caller of
372  * synchronize_rcu.
373  */
374 static int __rcu_boost_readers(struct rcu_boost_dat *rbd, int prio, unsigned long flags)
375 {
376         struct task_struct *curr = current;
377         struct task_struct *p;
378 
379         spin_lock(&rbd->rbs_lock);
380 
381         rbd->rbs_prio = prio;
382 
383         /*
384          * Move the already boosted readers onto the list and reboost
385          * them.
386          */
387         list_splice_init(&rbd->rbs_boosted,
388                          &rbd->rbs_toboost);
389 
390         while (!list_empty(&rbd->rbs_toboost)) {
391                 p = list_entry(rbd->rbs_toboost.next,
392                                struct task_struct, rcub_entry);
393                 list_move_tail(&p->rcub_entry,
394                                &rbd->rbs_boosted);
395                 set_rcu_prio(p, prio);
396                 spin_lock(&p->pi_lock);
397                 rcu_boost_task(p);
398                 spin_unlock(&p->pi_lock);
399 
400                 /*
401                  * Now we release the lock to allow for a higher
402                  * priority task to come in and boost the readers
403                  * even higher. Or simply to let a higher priority
404                  * task to run now.
405                  */
406                 spin_unlock(&rbd->rbs_lock);
407                 spin_unlock_irqrestore(&rcu_boost_wake_lock, flags);
408 
409                 cpu_relax();
410                 spin_lock_irqsave(&rcu_boost_wake_lock, flags);
411                 /*
412                  * Another task may have taken over.
413                  */
414                 if (curr->rcu_preempt_counter != rcu_boost_counter) {
415                         rcu_trace_boost_over_taken(rbd);
416                         return 1;
417                 }
418 
419                 spin_lock(&rbd->rbs_lock);
420         }
421 
422         spin_unlock(&rbd->rbs_lock);
423 
424         return 0;
425 }
426 
427 /**
428  * rcu_boost_readers - called by synchronize_rcu to boost sleeping RCU readers.
429  *
430  * This function iterates over all the per_cpu rcu_boost_data descriptors
431  * and boosts any sleeping (or slept) RCU readers.
432  */
433 void rcu_boost_readers(void)
434 {
435         struct task_struct *curr = current;
436         struct rcu_boost_dat *rbd;
437         unsigned long flags;
438         int prio;
439         int cpu;
440         int ret;
441 
442         spin_lock_irqsave(&rcu_boost_wake_lock, flags);
443 
444         prio = rt_mutex_getprio(curr);
445 
446         rcu_trace_boost_try_boost_readers(RCU_BOOST_ME);
447 
448         if (prio >= rcu_boost_prio) {
449                 /* already boosted */
450                 spin_unlock_irqrestore(&rcu_boost_wake_lock, flags);
451                 return;
452         }
453 
454         rcu_boost_prio = prio;
455 
456         rcu_trace_boost_boost_readers(RCU_BOOST_ME);
457 
458         /* Flag that we are the one to unboost */
459         curr->rcu_preempt_counter = ++rcu_boost_counter;
460 
461         for_each_online_cpu(cpu) {
462                 rbd = &per_cpu(rcu_boost_data, cpu);
463                 ret = __rcu_boost_readers(rbd, prio, flags);
464                 if (ret)
465                         break;
466         }
467 
468         spin_unlock_irqrestore(&rcu_boost_wake_lock, flags);
469 
470 }
471 
472 /**
473  * rcu_unboost_readers - set the boost level back to normal.
474  *
475  * This function DOES NOT change the priority of any RCU reader
476  * that was boosted. The RCU readers do that when they release
477  * the RCU lock. This function only sets the global
478  * rcu_boost_prio to MAX_PRIO so that new RCU readers that sleep
479  * do not increase their priority.
480  */
481 void rcu_unboost_readers(void)
482 {
483         struct rcu_boost_dat *rbd;
484         unsigned long flags;
485         int cpu;
486 
487         spin_lock_irqsave(&rcu_boost_wake_lock, flags);
488 
489         rcu_trace_boost_try_unboost_readers(RCU_BOOST_ME);
490 
491         if (current->rcu_preempt_counter != rcu_boost_counter)
492                 goto out;
493 
494         rcu_trace_boost_unboost_readers(RCU_BOOST_ME);
495 
496         /*
497          * We could also put in something that
498          * would allow other synchronize_rcu callers
499          * of lower priority that are still waiting
500          * to boost the prio.
501          */
502         rcu_boost_prio = MAX_PRIO;
503 
504         for_each_online_cpu(cpu) {
505                 rbd = &per_cpu(rcu_boost_data, cpu);
506 
507                 spin_lock(&rbd->rbs_lock);
508                 rbd->rbs_prio = rcu_boost_prio;
509                 spin_unlock(&rbd->rbs_lock);
510         }
511 
512  out:
513         spin_unlock_irqrestore(&rcu_boost_wake_lock, flags);
514 }
515 
516 /*
517  * The krcupreemptd wakes up every "rcu_preempt_thread_secs"
518  * seconds at the minimum priority of 1 to do a
519  * synchronize_rcu. This ensures that grace periods finish
520  * and that we do not starve the system. If there are RT
521  * tasks above priority 1 that are hogging the system and
522  * preventing release of memory, then its the fault of the
523  * system designer running RT tasks too aggressively and the
524  * system is flawed regardless.
525  */
526 static int krcupreemptd(void *data)
527 {
528         struct sched_param param = { .sched_priority = 1 };
529         int ret;
530         int prio;
531 
532         ret = sched_setscheduler(current, SCHED_FIFO, &param);
533         printk("krcupreemptd setsched %d\n", ret);
534         prio = current->prio;
535         printk("  prio = %d\n", prio);
536         set_current_state(TASK_INTERRUPTIBLE);
537 
538         while (!kthread_should_stop()) {
539                 schedule_timeout(rcu_preempt_thread_secs * HZ);
540 
541                 __set_current_state(TASK_RUNNING);
542                 if (prio != current->prio) {
543                         prio = current->prio;
544                         printk("krcupreemptd new prio is %d??\n",prio);
545                 }
546 
547                 synchronize_rcu();
548 
549                 set_current_state(TASK_INTERRUPTIBLE);
550         }
551         __set_current_state(TASK_RUNNING);
552         return 0;
553 }
554 
555 int __init rcu_preempt_boost_init(void)
556 {
557         struct rcu_boost_dat *rbd;
558         int cpu;
559 
560         for_each_possible_cpu(cpu) {
561                 rbd = &per_cpu(rcu_boost_data, cpu);
562 
563                 spin_lock_init(&rbd->rbs_lock);
564                 rbd->rbs_prio = MAX_PRIO;
565                 INIT_LIST_HEAD(&rbd->rbs_toboost);
566                 INIT_LIST_HEAD(&rbd->rbs_boosted);
567         }
568 
569         return 0;
570 }
571 
572 static int __init rcu_preempt_start_krcupreemptd(void)
573 {
574         struct task_struct *p;
575 
576         p = kthread_create(krcupreemptd, NULL,
577                            "krcupreemptd");
578 
579         if (IS_ERR(p)) {
580                 printk("krcupreemptd failed\n");
581                 return NOTIFY_BAD;
582         }
583         wake_up_process(p);
584 
585         return 0;
586 }
587 
588 __initcall(rcu_preempt_start_krcupreemptd);
589 
  This page was automatically generated by the LXR engine.