Linux kernel & device driver programming

Cross-Referenced Linux and Device Driver Code

[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ]
Version: [ 2.6.11.8 ] [ 2.6.25 ] [ 2.6.25.8 ] [ 2.6.31.13 ] Architecture: [ i386 ]
  1 /*
  2  * net/sched/estimator.c        Simple rate estimator.
  3  *
  4  *              This program is free software; you can redistribute it and/or
  5  *              modify it under the terms of the GNU General Public License
  6  *              as published by the Free Software Foundation; either version
  7  *              2 of the License, or (at your option) any later version.
  8  *
  9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
 10  */
 11 
 12 #include <asm/uaccess.h>
 13 #include <asm/system.h>
 14 #include <linux/bitops.h>
 15 #include <linux/module.h>
 16 #include <linux/types.h>
 17 #include <linux/kernel.h>
 18 #include <linux/jiffies.h>
 19 #include <linux/string.h>
 20 #include <linux/mm.h>
 21 #include <linux/socket.h>
 22 #include <linux/sockios.h>
 23 #include <linux/in.h>
 24 #include <linux/errno.h>
 25 #include <linux/interrupt.h>
 26 #include <linux/netdevice.h>
 27 #include <linux/skbuff.h>
 28 #include <linux/rtnetlink.h>
 29 #include <linux/init.h>
 30 #include <net/sock.h>
 31 #include <net/pkt_sched.h>
 32 
 33 /*
 34    This code is NOT intended to be used for statistics collection,
 35    its purpose is to provide a base for statistical multiplexing
 36    for controlled load service.
 37    If you need only statistics, run a user level daemon which
 38    periodically reads byte counters.
 39 
 40    Unfortunately, rate estimation is not a very easy task.
 41    F.e. I did not find a simple way to estimate the current peak rate
 42    and even failed to formulate the problem 8)8)
 43 
 44    So I preferred not to built an estimator into the scheduler,
 45    but run this task separately.
 46    Ideally, it should be kernel thread(s), but for now it runs
 47    from timers, which puts apparent top bounds on the number of rated
 48    flows, has minimal overhead on small, but is enough
 49    to handle controlled load service, sets of aggregates.
 50 
 51    We measure rate over A=(1<<interval) seconds and evaluate EWMA:
 52 
 53    avrate = avrate*(1-W) + rate*W
 54 
 55    where W is chosen as negative power of 2: W = 2^(-ewma_log)
 56 
 57    The resulting time constant is:
 58 
 59    T = A/(-ln(1-W))
 60 
 61 
 62    NOTES.
 63 
 64    * The stored value for avbps is scaled by 2^5, so that maximal
 65      rate is ~1Gbit, avpps is scaled by 2^10.
 66 
 67    * Minimal interval is HZ/4=250msec (it is the greatest common divisor
 68      for HZ=100 and HZ=1024 8)), maximal interval
 69      is (HZ*2^EST_MAX_INTERVAL)/4 = 8sec. Shorter intervals
 70      are too expensive, longer ones can be implemented
 71      at user level painlessly.
 72  */
 73 
 74 #define EST_MAX_INTERVAL        5
 75 
 76 struct qdisc_estimator
 77 {
 78         struct qdisc_estimator  *next;
 79         struct tc_stats         *stats;
 80         spinlock_t              *stats_lock;
 81         unsigned                interval;
 82         int                     ewma_log;
 83         u64                     last_bytes;
 84         u32                     last_packets;
 85         u32                     avpps;
 86         u32                     avbps;
 87 };
 88 
 89 struct qdisc_estimator_head
 90 {
 91         struct timer_list       timer;
 92         struct qdisc_estimator  *list;
 93 };
 94 
 95 static struct qdisc_estimator_head elist[EST_MAX_INTERVAL+1];
 96 
 97 /* Estimator array lock */
 98 static DEFINE_RWLOCK(est_lock);
 99 
100 static void est_timer(unsigned long arg)
101 {
102         int idx = (int)arg;
103         struct qdisc_estimator *e;
104 
105         read_lock(&est_lock);
106         for (e = elist[idx].list; e; e = e->next) {
107                 struct tc_stats *st = e->stats;
108                 u64 nbytes;
109                 u32 npackets;
110                 u32 rate;
111 
112                 spin_lock(e->stats_lock);
113                 nbytes = st->bytes;
114                 npackets = st->packets;
115                 rate = (nbytes - e->last_bytes)<<(7 - idx);
116                 e->last_bytes = nbytes;
117                 e->avbps += ((long)rate - (long)e->avbps) >> e->ewma_log;
118                 st->bps = (e->avbps+0xF)>>5;
119 
120                 rate = (npackets - e->last_packets)<<(12 - idx);
121                 e->last_packets = npackets;
122                 e->avpps += ((long)rate - (long)e->avpps) >> e->ewma_log;
123                 e->stats->pps = (e->avpps+0x1FF)>>10;
124                 spin_unlock(e->stats_lock);
125         }
126 
127         mod_timer(&elist[idx].timer, jiffies + ((HZ<<idx)/4));
128         read_unlock(&est_lock);
129 }
130 
131 int qdisc_new_estimator(struct tc_stats *stats, spinlock_t *stats_lock, struct rtattr *opt)
132 {
133         struct qdisc_estimator *est;
134         struct tc_estimator *parm = RTA_DATA(opt);
135 
136         if (RTA_PAYLOAD(opt) < sizeof(*parm))
137                 return -EINVAL;
138 
139         if (parm->interval < -2 || parm->interval > 3)
140                 return -EINVAL;
141 
142         est = kmalloc(sizeof(*est), GFP_KERNEL);
143         if (est == NULL)
144                 return -ENOBUFS;
145 
146         memset(est, 0, sizeof(*est));
147         est->interval = parm->interval + 2;
148         est->stats = stats;
149         est->stats_lock = stats_lock;
150         est->ewma_log = parm->ewma_log;
151         est->last_bytes = stats->bytes;
152         est->avbps = stats->bps<<5;
153         est->last_packets = stats->packets;
154         est->avpps = stats->pps<<10;
155 
156         est->next = elist[est->interval].list;
157         if (est->next == NULL) {
158                 init_timer(&elist[est->interval].timer);
159                 elist[est->interval].timer.data = est->interval;
160                 elist[est->interval].timer.expires = jiffies + ((HZ<<est->interval)/4);
161                 elist[est->interval].timer.function = est_timer;
162                 add_timer(&elist[est->interval].timer);
163         }
164         write_lock_bh(&est_lock);
165         elist[est->interval].list = est;
166         write_unlock_bh(&est_lock);
167         return 0;
168 }
169 
170 void qdisc_kill_estimator(struct tc_stats *stats)
171 {
172         int idx;
173         struct qdisc_estimator *est, **pest;
174 
175         for (idx=0; idx <= EST_MAX_INTERVAL; idx++) {
176                 int killed = 0;
177                 pest = &elist[idx].list;
178                 while ((est=*pest) != NULL) {
179                         if (est->stats != stats) {
180                                 pest = &est->next;
181                                 continue;
182                         }
183 
184                         write_lock_bh(&est_lock);
185                         *pest = est->next;
186                         write_unlock_bh(&est_lock);
187 
188                         kfree(est);
189                         killed++;
190                 }
191                 if (killed && elist[idx].list == NULL)
192                         del_timer(&elist[idx].timer);
193         }
194 }
195 
196 EXPORT_SYMBOL(qdisc_kill_estimator);
197 EXPORT_SYMBOL(qdisc_new_estimator);
198 
  This page was automatically generated by the LXR engine.