diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched index 7e803fc..3a6e831 100644 --- a/block/Kconfig.iosched +++ b/block/Kconfig.iosched @@ -12,6 +12,12 @@ config IOSCHED_NOOP that do their own scheduling and require only minimal assistance from the kernel. +config IOSCHED_RTQ + tristate "RT I/O scheduler" + default y + ---help--- + Real-time scheduler for use in prioritized I/O requests. + config IOSCHED_AS tristate "Anticipatory I/O scheduler" default y @@ -56,6 +62,9 @@ choice config DEFAULT_CFQ bool "CFQ" if IOSCHED_CFQ=y + config DEFAULT_RTQ + bool "RTQ" if IOSCHED_RTQ=y + config DEFAULT_NOOP bool "No-op" @@ -63,6 +72,7 @@ endchoice config DEFAULT_IOSCHED string + default "rtio" if DEFAULT_RTQ default "anticipatory" if DEFAULT_AS default "deadline" if DEFAULT_DEADLINE default "cfq" if DEFAULT_CFQ diff --git a/block/Makefile b/block/Makefile index 5a43c7d..1641e61 100644 --- a/block/Makefile +++ b/block/Makefile @@ -7,6 +7,7 @@ obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \ blk-exec.o blk-merge.o ioctl.o genhd.o scsi_ioctl.o obj-$(CONFIG_BLK_DEV_BSG) += bsg.o +obj-$(CONFIG_IOSCHED_RTQ) += rtq-iosched.o obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o obj-$(CONFIG_IOSCHED_AS) += as-iosched.o obj-$(CONFIG_IOSCHED_DEADLINE) += deadline-iosched.o diff --git a/block/rtq-iosched.c b/block/rtq-iosched.c new file mode 100644 index 0000000..8940767 --- /dev/null +++ b/block/rtq-iosched.c @@ -0,0 +1,1041 @@ +/* + * started with elevator noop, and made changes for real-time I/O + */ +#include +#include +#include +#include +#include +#include +#include +#include /* ktime conversions (e.g. to ns) */ +#include /* linked lists */ +#include /* tasklets */ +#include /* timing */ +#include +#include + +#include + +#define RTQ_MSG_LVL KERN_DEBUG +/* value from 0-6(0 - least messages, 6 - most messages) */ +/* 6 - reserved for function naming */ +#define RTQ_MSG_DEBUG_LVL 1 + +#define RTQ_MSG(level, fmt, args...) do { \ + if (level <= RTQ_MSG_DEBUG_LVL) { \ + printk(RTQ_MSG_LVL "rtq_msg * " fmt "\n", ## args ); \ + } \ +} while (0) + +#define RTQ_FUNC_LVL 6 +#define RTQ_FUNC() do { \ + RTQ_MSG(RTQ_FUNC_LVL, "%s", __func__); \ +} while (0) + +/* + * Function Prototypes + */ + +/* + * tunables + */ +/* if requests exists min to keep disk efficient */ +static const int rtq_min_driver_busy = 4; +static const int rtq_max_driver_busy = 1; +static const int rtq_wait_rt = 0; + +/* + * data for device(s) that use the real-time I/O scheduler + * remember that there will be one rtq_data structure for + * each device that uses it + */ +struct rtq_data { + struct request_queue *req_queue; + struct list_head rtqq_list; /* list of priority rtqq's */ + /* + * number of requests that have been issued to the driver + * these are out of our control + */ + atomic_t driver_busy; + unsigned int nr_held_requests; + /* optimized minimal outstanding request to keep disk efficient */ + int min_driver_busy; + int max_driver_busy; + + int wait_rt; + int rt_busy; + /* + * Requests timing information. + * NOTE: For now all these times are in nanoseconds. + * This makes it easier since most of the time functions are + * int nanoseconds. + */ + s64 pred_time; + u64 recent_request_rtn; + /* + * queue draining information + */ + u64 response_time; /* in nanoseconds */ + struct rb_root deadline_root; + int reserve_rt; /* number of slots to reserve for rt requests */ + /* + * used to remove warning for sys entry + * This allows a location to write to if someone tries + * to write to certain sysfs file locations + */ + int sys_temp_variable; +}; + +/** + * rtqq + */ +struct rtq_queue { + struct list_head rtqq_list; /* list of priority queues */ + /* parent rtq_data */ + struct rtq_data *rtqd; + struct rb_root sort_list; + struct list_head fifo; + + /* io priority of this queue */ + unsigned short ioprio_class; + /* the number associated with this queue */ + /* NOTE: currently this is just the ioprio_class */ + unsigned short ioprio_queue; +}; + +/** + * rq_data + */ +struct rtq_rq_data { + u64 out_time_ns; + u64 pred_time; + u64 deadline_ns; + struct rb_node deadline_node; +}; + +/** + * Helpers to get common structures given a structure + * + * NOTE: can think of function names as struct a -> struct b where + * the function name would be a_b(a) + */ +static inline struct rtq_rq_data * +rq_rq_data(struct request *rq) +{ + return rq->elevator_private; +} + +static inline struct rtq_queue * +rq_rtqq(struct request *rq) +{ + return rq->elevator_private2; +} + +static inline struct rtq_data * +rq_rtqd(struct request *rq) +{ + return rq->q->elevator->elevator_data; +} + +static inline struct rtq_data * +rtqq_rtqd(struct rtq_queue *rtqq) +{ + return rtqq->rtqd; +} + +static inline struct rtq_data * +q_rtqd(struct request_queue *q) +{ + return q->elevator->elevator_data; +} + +static inline struct request_queue * +rq_q(struct request *rq) +{ + return rq->q; +} + + + +/** + * rtq_find_queue + * + * @rtqd: elevator data in which to search for the priority queue + * @prio_queue: number of the priority queue + * @return: rtqq* mapped to @prio_queue, else NULL + * + * Locks: + * queue lock must be held when entering here since we cannot have + * a rtqq removed while we are iterating over the rtqq_list + * + * Description: + * iterate over all queues to find existing prio_queue + * NOTE: for now the prio_queue is just the IOPRIO_PRIO_CLASS + * + */ +static struct rtq_queue * +rtq_find_queue(struct rtq_data *rtqd, int prio_queue) +{ + struct rtq_queue *rtqq; + struct list_head *list = &rtqd->rtqq_list; + + list_for_each_entry(rtqq, list, rtqq_list) { + if (prio_queue == rtqq->ioprio_class) { + return rtqq; + } + } + + return NULL; +} + +/** + * rtq_get_queue + * + * @rtqd: elevator data in which to get a priority queue + * @prio_queue: number of the priority queue + * @return: rtqq* mapped to @prio_queue + * NOTE: for now should ALWAYS return a queue (later if add pool + * it may not) + * + * NOTE: for now we will just have one queue for each priority class + * + * Locks: + * queue lock must be held when entering here since we cannot have + * a rtqq removed while we are iterating over the rtqq_list, also we will + * be adding to the list of queues + * + * queue lock will be locked when exiting + * + * Description: + * get a priority queue to hold requests. If one exists it will be + * returned, if not, a new one will be created + */ +static struct rtq_queue * +rtq_get_queue(struct rtq_data *rtqd, int prio_queue, gfp_t gfp_mask) +{ + struct rtq_queue *rtqq, *new_rtqq = NULL; + + rtqq = rtq_find_queue(rtqd, prio_queue); +retry: + if (!rtqq) { + if (new_rtqq) { + rtqq = new_rtqq; + new_rtqq = NULL; + } else if (gfp_mask & __GFP_WAIT) { + /* + * Inform the allocator of the fact that we will + * just repeat this allocation if it fails, to allow + * the allocator to do whatever it needs to attempt to + * free memory. + */ + spin_unlock_irq(rtqd->req_queue->queue_lock); + new_rtqq = kmalloc_node(sizeof(*rtqq), gfp_mask|__GFP_NOFAIL, rtqd->req_queue->node); + RTQ_MSG(3, "allocating new rtqq(%d)", prio_queue); + spin_lock_irq(rtqd->req_queue->queue_lock); + goto retry; + } else { + // TODO: implement this case where we will not wait for the allocation since we have a pool + // not a concern now since all allocations will be done during initialization + //rtqq = kmem_cache_alloc_node(rtq_pool, gfp_mask, rtqd->queue->node); + //if (!rtqq) + BUG_ON(1); + goto out; + } + + memset(rtqq, 0, sizeof(*rtqq)); + INIT_LIST_HEAD(&rtqq->fifo); + rtqq->rtqd = rtqd; + rtqq->ioprio_class = prio_queue; + rtqq->ioprio_queue = prio_queue; + list_add_tail(&rtqq->rtqq_list, &rtqd->rtqq_list); + + // TODO: do we need to handle the new queue case here? + // cfq_mark_cfqq_queue_new(cfqq); + // cfq_init_prio_data(cfqq); + } + +out: + WARN_ON((gfp_mask & __GFP_WAIT) && !rtqq); + return rtqq; +} + +/** + * rtq_get_queue_tsk + * + * @rtqd: elevator data in which to get a priority queue + * @tsk: process to retrieve a matching priority queue + * @return: rtqq* mapped to @tsk + * NOTE: for now should ALWAYS return a queue + * + * NOTE: for now we will just have one queue for each priority class + * + * Locks: + * + * Description: + * get a priority queue given a task + */ +static struct rtq_queue * +rtq_get_queue_tsk(struct rtq_data *rtqd, struct task_struct *tsk, gfp_t gfp_mask) +{ + struct io_context *ioc = NULL; + + ioc = get_io_context(gfp_mask, rtqd->req_queue->node); + + if (!ioc) { + RTQ_MSG(1, "no io context returned, this case is not handled"); + return NULL; + } + + return rtq_get_queue(rtqd, IOPRIO_PRIO_CLASS(ioc->ioprio), gfp_mask); +} + +static u64 +rtq_drain_time(int nr_requests) +{ + const u64 drain_times[] = { + 0, // 0 + 15, // 1 + 27, // 2 + 35, // 3 + 43, // 4 + 55, // 5 + 60, // 6 + 68, // 7 + 74, // 8 + 83, // 9 + 93, // 10 + 101, // 11 + 103, // 12 + 113, // 13 + 120, // 14 + 126, // 15 + 133, // 16 + 141, // 17 + 143, // 18 + 151, // 19 + 158, // 20 + }; + + static const int num_drain_times = sizeof(drain_times) / sizeof(*drain_times); + + if (nr_requests <= num_drain_times) { + return drain_times[nr_requests] * (u64)1E6; + } else { + return (u64)160E6; + } +} + +static u64 +rtq_earliest_deadline(struct rtq_data *rtqd) +{ + struct rtq_rq_data *rq_data; + struct rb_node *first_node = rb_first(&rtqd->deadline_root); + + if (!first_node) { + return 0; + } else { + if (!rtqd->response_time) { + RTQ_MSG(1, "first node exists, but response time is zero?"); + return 0; + } + rq_data = rb_entry(first_node, struct rtq_rq_data, deadline_node); + if (!rq_data->deadline_ns) { + RTQ_MSG(0, "zero response time inserted into rb-tree!!!"); + } + return rq_data->deadline_ns; + } +} + +/* + * Description: + * Get next queue for service. + * + * NOTE: + * It may be the case that there are requests that need to be serviced, but due + * to constraints they may not be made available. An example of this is waiting + * for real-time requests to complete first. + * + * NOTE: IOPRIO_CLASS_NONE will be passed when the I/O priority is not + * explicitly set. + */ +static struct rtq_queue * +rtq_select_queue(struct rtq_data *rtqd, int force) +{ + struct rtq_queue *rtqq; + int prio_class = IOPRIO_CLASS_RT; + int driver_busy = atomic_read(&rtqd->driver_busy); + + if (force) { + RTQ_MSG(1, "elvator forced to send requests to device"); + } + + while (1) { + if (force) { /* force sending, most likely because switching I/O schedulers */ + rtqq = rtq_find_queue(rtqd, prio_class); + if (rtqq && (!list_empty(&rtqq->fifo))) { + return rtqq; + } + + if (list_empty(&rtqq->fifo)) { + RTQ_MSG(5, "list empty for prio_class: %d", prio_class); + } + } else { /* normal operation */ + /* + * put this here to force max queue depth + * put before above if to allow rt req to go right away even + * if the max queue depth has been reached + */ + /* + * NOTE: This will only send real-time request right away. + * Others will be stopped by max check because will be called before + * other priorities. However, this forces the max allowable to be + * done with this check. It cannot be separated. + */ + rtqq = rtq_find_queue(rtqd, prio_class); + if (rtqq && (!list_empty(&rtqq->fifo))) { + return rtqq; + } + + /* + * has the max allowable number of requests on the drive been reached + */ + if (driver_busy >= rtqd->max_driver_busy && IOPRIO_CLASS_RT != prio_class) { + RTQ_MSG(5, "returning NULL, max_driver_busy reached: " + "%d", rtqd->max_driver_busy); + RTQ_MSG(5, "driver_busy: %d", driver_busy); + return NULL; + } + + /* + * This is the second check to determine if we should send a request + * to the disk. If any real-time requests are outstading then no further + * requests will be sent to the disk until the real-time request + * returns. + * + * This is the case for non-real-time requests not all requests. + * That is, if a new real-time request wants to go to + * the disk, send it immediately. + */ + if (rtqd->wait_rt && rtqd->rt_busy && IOPRIO_CLASS_RT != prio_class) { + RTQ_MSG(5, "waiting for rt request to complete"); + return NULL; + } + + /* + * check if any request is currently outstanding + * if the deadline is greater than the time to drain + * the queue then we should not send any further requests + */ + if (rtq_earliest_deadline(rtqd)) { /* are there any deadlines */ + u64 curr_ns = ktime_to_ns(ktime_get()); + u64 nearest_deadline = rtq_earliest_deadline(rtqd); + u64 drain_time; + u64 wc_deadline; + int drain_number; + + /* add one for this request which will add to the total drain time */ + drain_number = driver_busy + 1; + /* for multiple real-time requests */ + if (rtqd->reserve_rt) { + drain_number += rtqd->reserve_rt - rtqd->rt_busy; + } + drain_time = rtq_drain_time(drain_number); + wc_deadline = curr_ns + drain_time; + + if (wc_deadline >= nearest_deadline) { + // HACK: should check deadline of next request and + // any request on the disk + if (IOPRIO_CLASS_RT != prio_class) { + RTQ_MSG(4, "rt request deadline in jeopardy"); + RTQ_MSG(4, "driver_busy: %d", driver_busy); + /* NOTE: this is an absolute NOT relative deadline, therefore + * the number may be quite large */ + if (curr_ns > nearest_deadline) { + RTQ_MSG(1, "inconsistent state, deadline will be missed"); + RTQ_MSG(1, "current ns: %llu", curr_ns); + RTQ_MSG(1, "nearest deadline: %llu", nearest_deadline); + } else { + RTQ_MSG(5, "%llu nearest deadline(ns)", nearest_deadline-curr_ns); + } + return NULL; + } + } + } + } + + if (IOPRIO_CLASS_RT == prio_class) prio_class = IOPRIO_CLASS_BE; + else if (IOPRIO_CLASS_BE == prio_class) prio_class = IOPRIO_CLASS_NONE; + else if (IOPRIO_CLASS_NONE == prio_class) break; + } + + return NULL; +} + +/** + * + * Description: + * Put a timestamp on when the request left for the device. + * + */ +static void +rtq_timestamp_request(struct request *rq) +{ + struct rtq_rq_data *rq_data = rq_rq_data(rq); + + rq_data->out_time_ns = ktime_to_ns(ktime_get()); +} + +static void +rtq_alloc_rq_data(struct request *rq) +{ + struct rtq_rq_data *rq_data = NULL; + + RTQ_FUNC(); + RTQ_MSG(5, "enter rtq_alloc_rq_data()"); + + rq_data = kzalloc(sizeof(struct rtq_rq_data), GFP_ATOMIC); + if (!rq_data) { + BUG_ON(1); + RTQ_MSG(0, "rq allocation failed, this is NOT handled"); + } else { + rq->elevator_private = rq_data; + } + + RTQ_MSG(5, "exit rtq_alloc_rq_data()"); +} + +static void +rtq_dealloc_rq_data(struct request *rq) +{ + struct rtq_rq_data *rq_data = rq_rq_data(rq); + + RTQ_FUNC(); + + // TODO: make sure rq is not in deadline rb tree + kfree(rq_data); +} + +/** + * + * Description: + * Return the requested response time in nanoseconds. + */ +static u64 +rtq_rq_response_time(struct request *rq, struct rtq_data *rtqd) +{ + return rtqd->response_time; +} + +/* + * rb tree functions + */ +static void +rtq_deadline_insert(struct rb_root *root, struct request *rq) +{ + struct rb_node **p = &root->rb_node; + struct rb_node *parent = NULL; + struct rtq_rq_data *__rq_data; + struct rtq_rq_data *rq_data; + + rq_data = rq_rq_data(rq); + while (*p) { + parent = *p; + __rq_data = rb_entry(parent, struct rtq_rq_data, deadline_node); + + if (rq_data->deadline_ns < __rq_data->deadline_ns) + p = &(*p)->rb_left; + else if (rq_data->deadline_ns > __rq_data->deadline_ns) + p = &(*p)->rb_right; + else { + // HACK: just increase by one so we don't have multiple + // entries at one node + rq_data->deadline_ns++; + rtq_deadline_insert(root, rq); + return; + } + } + + rb_link_node(&rq_data->deadline_node, parent, p); + rb_insert_color(&rq_data->deadline_node, root); + return; +} + + +/** + * + * Description: + * Remove the request from the set of those with deadlines. + */ +static void +rtq_deadline_remove(struct request *rq) +{ + struct rtq_rq_data *rq_data; + struct rtq_data *rtqd = rq_rtqd(rq); + + rq_data = rq_rq_data(rq); + rb_erase(&rq_data->deadline_node, &rtqd->deadline_root); +} + +/** + * + * Description: + * Assign a deadline to the request and add it to the set of all requests + * with pending deadlines. + */ +static void +rtq_deadline_stamp(struct request *rq, struct rtq_data *rtqd) +{ + u64 resp_time = rtq_rq_response_time(rq, rtqd); + + if (resp_time) { + struct rtq_rq_data *rq_data = rq_rq_data(rq); + + rq_data->deadline_ns = ktime_to_ns(ktime_get()) + resp_time; + /* add to rb tree */ + rtq_deadline_insert(&rtqd->deadline_root, rq); + } +} + +/** + * Allocate rtq data structures associated with this request. + */ +static int +rtq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask) +{ + struct rtq_data *rtqd = q->elevator->elevator_data; + struct task_struct *tsk = current; + struct rtq_queue *rtqq; + unsigned long flags; + + RTQ_FUNC(); + + spin_lock_irqsave(q->queue_lock, flags); + rtqq = rtq_get_queue_tsk(rtqd, tsk, gfp_mask); + spin_unlock_irqrestore(q->queue_lock, flags); + + rq->elevator_private2 = rtqq; + + rtq_alloc_rq_data(rq); + + if (IOPRIO_CLASS_RT == rtqq->ioprio_class) { + /* TODO: is this the earliest point to stamp? */ + //RTQ_MSG(1, "stamping rt request"); + rtq_deadline_stamp(rq, rtqd); + } + + return 0; +} + +static void +rtq_merged_requests(struct request_queue *q, struct request *rq, struct request *next) +{ + RTQ_FUNC(); + RTQ_MSG(0, "ERROR!!!!!!!!!!!!!"); + RTQ_MSG(0, "request was merged, this should not happen"); + list_del_init(&next->queuelist); +} + +/** + * + * Description: + * Called by lower level layer (e.g. scsi) when a new request can be sent + * to the device. In this function we may hold back requests to lower levels + * to implement constraints (e.g. to reduce missed deadlines) + * + */ +static int +rtq_dispatch(struct request_queue *q, int force) +{ + struct rtq_data *rtqd = q_rtqd(q); + struct rtq_queue *rtqq; + RTQ_FUNC(); + + while (1) { + if (!(rtqq = rtq_select_queue(rtqd, force))) return 0; + + if (!list_empty(&rtqq->fifo)) { + struct request *rq; + + rq = list_entry(rtqq->fifo.next, struct request, queuelist); + WARN_ON(!rtqd->nr_held_requests); + if (rtqd->nr_held_requests) { + rtqd->nr_held_requests--; + //RTQ_MSG(1, "held: %d", rtqd->nr_held_requests); + } + list_del_init(&rq->queuelist); + elv_dispatch_sort(q, rq); + return 1; + } + } +} + +static void +rtq_add_request(struct request_queue *q, struct request *rq) +{ + struct rtq_queue *rtqq = rq_rtqq(rq); + struct rtq_data *rtqd = rtqq_rtqd(rtqq); + + RTQ_FUNC(); + + rtqd->nr_held_requests++; + //RTQ_MSG(1, "held: %d", rtqd->nr_held_requests); + list_add_tail(&rq->queuelist, &rtqq->fifo); + + RTQ_MSG(5, "driver_busy: %d", atomic_read(&rtqd->driver_busy)); + RTQ_MSG(5, "nr_held_requests: %d", rtqd->nr_held_requests); + + /* do not wait for unplug timer */ + /* + * This is important especially when servicing real-time requests. + * + * Note: maybe this should only be called if the request + * added was a real-time request. + * + * Problems may develop because of how the requests sent + * to the device are limited. The limit is not based wholly + * on the device capcity, therefore it may be desirable to + * send a request to the disk immediatley, even though no + * requests have completed(the normal service order). + */ + if (IOPRIO_CLASS_RT == rtqq->ioprio_class) { + blk_start_queueing(q); + } +} + +static int +rtq_queue_empty(struct request_queue *q) +{ + struct rtq_data *rtqd = q->elevator->elevator_data; + RTQ_FUNC(); + RTQ_MSG(5, "nr_held_requests: %d", rtqd->nr_held_requests); + + return !rtqd->nr_held_requests; +} + +static struct request * +rtq_former_request(struct request_queue *q, struct request *rq) +{ + RTQ_FUNC(); + + RTQ_MSG(0, "function may not be implemented correctly"); + return NULL; +} + +static struct request * +rtq_latter_request(struct request_queue *q, struct request *rq) +{ + RTQ_FUNC(); + + RTQ_MSG(0, "function may not be implemented correctly"); + return NULL; +} + +/** + * rtq_init_queue + * + * @q: request_queue in which to setup our private data + * @return: pointer to our private data + * + * Description: + * initialization function when we are picked as the elevator for + * a certain block device + */ +static void * +rtq_init_queue(struct request_queue *q) +{ + struct rtq_data *rtqd; + RTQ_FUNC(); + + rtqd = kmalloc_node(sizeof(*rtqd), GFP_KERNEL, q->node); + if (!rtqd) { + return NULL; + } + RTQ_MSG(5, "successfully allocated rtqd"); + memset(rtqd, 0, sizeof(*rtqd)); + rtqd->req_queue = q; + INIT_LIST_HEAD(&rtqd->rtqq_list); + rtqd->min_driver_busy = rtq_min_driver_busy; + rtqd->max_driver_busy = rtq_max_driver_busy; + rtqd->wait_rt = rtq_wait_rt; + rtqd->response_time = 75000000; + rtqd->reserve_rt = 0; + atomic_set(&rtqd->driver_busy, 0); + + rtqd->deadline_root = RB_ROOT; + /* + * NOTE: initialize some number of queue's now, may want to init + * more later + */ + RTQ_MSG(5, "attempting to initialize rtq queue(s)"); + spin_lock_irq(q->queue_lock); + rtq_get_queue(rtqd, IOPRIO_CLASS_NONE, GFP_KERNEL); + rtq_get_queue(rtqd, IOPRIO_CLASS_RT , GFP_KERNEL); + rtq_get_queue(rtqd, IOPRIO_CLASS_BE , GFP_KERNEL); + spin_unlock_irq(q->queue_lock); + + return rtqd; +} + +/** + * rtq_exit_queue + * + * @e: the elevator in which we are known + * + * Locks: + * take and releases the request_queue lock + * + * Description: + * cleanup anything that we allocated in rtq_init_queue + */ +static void +rtq_exit_queue(elevator_t *e) +{ + struct rtq_data *rtqd = e->elevator_data; + RTQ_FUNC(); + + kfree(rtqd); + + RTQ_MSG(5, "exiting %s", __func__); +} + +/** + * + * Description: + * Called when a request is sent to the device. + * + */ +static void +rtq_request_sent(struct request *rq) +{ + struct rtq_queue *rtqq = rq_rtqq(rq); + struct rtq_data *rtqd = rq_rtqd(rq); + + rtq_timestamp_request(rq); + + if (IOPRIO_CLASS_RT == rtqq->ioprio_class) { + rtqd->rt_busy++; + } + + atomic_inc(&rtqd->driver_busy); +} + +/** + * + * Description: + * Called when a completed request is returned from the device. + */ +static void +rtq_request_received(struct request *rq) +{ + struct rtq_queue *rtqq = rq_rtqq(rq); + struct rtq_data *rtqd = rq->q->elevator->elevator_data; + struct rtq_rq_data *rq_data = rq_rq_data(rq); + //struct request_queue *q = rq_q(rq); + + if (IOPRIO_CLASS_RT == rtqq->ioprio_class) { + WARN_ON(!rtqd->rt_busy); + rtqd->rt_busy--; + rtq_deadline_remove(rq); + } + + if (!rq_data) { + RTQ_MSG(0, "warning no request data, " + "it could not be allocated?"); + } + + rtq_dealloc_rq_data(rq); + + WARN_ON(!atomic_read(&rtqd->driver_busy)); + atomic_dec(&rtqd->driver_busy); + + //blk_start_queueing(q); +} + +static void +rtq_activate_request(struct request_queue *q, struct request *rq) +{ + RTQ_FUNC(); + + rtq_request_sent(rq); +} + +static void +rtq_deactivate_request(struct request_queue *q, struct request *rq) +{ + RTQ_FUNC(); + + rtq_request_received(rq); +} + +static void +rtq_completed_request(struct request_queue *q, struct request *rq) +{ + RTQ_FUNC(); + + rtq_request_received(rq); +} + + +/* + * sysfs parts below --> + */ +static ssize_t +rtq_var_show(unsigned int var, char *page) +{ + return sprintf(page, "%d\n", var); +} + +static ssize_t +rtq_var_store(unsigned int *var, const char *page, size_t count) +{ + char *p = (char *) page; + + *var = simple_strtoul(p, &p, 10); + return count; +} + +#define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \ +static ssize_t __FUNC(elevator_t *e, char *page) \ +{ \ + struct rtq_data *rtqd = e->elevator_data; \ + unsigned int __data = __VAR; \ + if (__CONV) \ + __data = jiffies_to_msecs(__data); \ + return rtq_var_show(__data, (page)); \ +} +/* + * Adding ProcFS entry + * Step 1 (of 3): read ability + * @1: format: rtq_##name##_show + * @2: variable to make visible through procfs + * @3: convert value from jiffies to milliseconds before showing + */ +SHOW_FUNCTION(rtq_max_driver_busy_show, rtqd->max_driver_busy, 0); +SHOW_FUNCTION(rtq_wait_rt_show, rtqd->wait_rt, 0); +SHOW_FUNCTION(rtq_driver_busy_show, atomic_read(&rtqd->driver_busy), 0); +SHOW_FUNCTION(rtq_resp_time_show, rtqd->response_time, 0); +SHOW_FUNCTION(rtq_reserve_rt_show, rtqd->reserve_rt, 0); +#undef SHOW_FUNCTION + +#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \ +static ssize_t __FUNC(elevator_t *e, const char *page, size_t count) \ +{ \ + struct rtq_data *rtqd = e->elevator_data; \ + unsigned int __data; \ + int ret = rtq_var_store(&__data, (page), count); \ + if (__data < (MIN)) \ + __data = (MIN); \ + else if (__data > (MAX)) \ + __data = (MAX); \ + if (__CONV) \ + *(__PTR) = msecs_to_jiffies(__data); \ + else \ + *(__PTR) = __data; \ + return ret; \ +} +/* + * Adding ProcFS entry + * Step 2 (of 3): write ability + * @1: format: rtq_##name##_store + * @2: variable to make visible through procfs + * @3: min valid value + * @4: max valid value + * @5: convert value from milliseconds to jiffies before storing + */ +STORE_FUNCTION(rtq_max_driver_busy_store, &rtqd->max_driver_busy, 0, INT_MAX, 0); +// TODO: this should not be adjustable +STORE_FUNCTION(rtq_wait_rt_store, &rtqd->wait_rt, 0, 1, 0); +STORE_FUNCTION(rtq_driver_busy_store, &rtqd->sys_temp_variable, 0, 1, 0); +STORE_FUNCTION(rtq_resp_time_store, &rtqd->response_time, 0, INT_MAX, 0); +STORE_FUNCTION(rtq_reserve_rt_store, &rtqd->reserve_rt, 0, INT_MAX, 0); +#undef STORE_FUNCTION + +#define RTQ_ATTR(name) \ + __ATTR(name, S_IRUGO|S_IWUSR, rtq_##name##_show, rtq_##name##_store) + +/* + * Adding ProcFS entry + * Step 3 (of 3): + */ +static struct elv_fs_entry rtq_attrs[] = { + RTQ_ATTR(max_driver_busy), + RTQ_ATTR(wait_rt), + RTQ_ATTR(driver_busy), + RTQ_ATTR(resp_time), + RTQ_ATTR(reserve_rt), + __ATTR_NULL +}; + + + +static int rtq_allow_merge(struct request_queue *q, struct request *rq, + struct bio *bio) +{ + RTQ_FUNC(); + /* + * 0 - do not allow merge + * 1 - allow merge + */ + return 0; +} + +static int +rtq_merge(struct request_queue *q, struct request **req, struct bio *bio) +{ + RTQ_FUNC(); + + return ELEVATOR_NO_MERGE; +} + +static void +rtq_merged_request(struct request_queue *q, struct request *req, + int type) +{ + RTQ_FUNC(); + + RTQ_MSG(0, "ERROR!!!!!!!!!!!!!"); + RTQ_MSG(0, "request was merged, this should not happen"); +} + +static struct elevator_type elevator_rtq = { + .ops = { + .elevator_merge_fn = rtq_merge, + .elevator_merged_fn = rtq_merged_request, + .elevator_merge_req_fn = rtq_merged_requests, + .elevator_allow_merge_fn = rtq_allow_merge, + + .elevator_dispatch_fn = rtq_dispatch, + .elevator_add_req_fn = rtq_add_request, + .elevator_queue_empty_fn = rtq_queue_empty, + .elevator_former_req_fn = rtq_former_request, + .elevator_latter_req_fn = rtq_latter_request, + .elevator_init_fn = rtq_init_queue, + .elevator_exit_fn = rtq_exit_queue, + .elevator_activate_req_fn = rtq_activate_request, + .elevator_deactivate_req_fn = rtq_deactivate_request, + .elevator_completed_req_fn = rtq_completed_request, + .elevator_set_req_fn = rtq_set_request, + }, + .elevator_attrs = rtq_attrs, + .elevator_name = "rtq", + .elevator_owner = THIS_MODULE, +}; + +static int __init rtq_init(void) +{ + elv_register(&elevator_rtq); + + return 0; +} + +static void __exit rtq_exit(void) +{ + elv_unregister(&elevator_rtq); +} + +module_init(rtq_init); +module_exit(rtq_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("RTQ scheduler");