Re: 2.6.17-rc5-mm2 — Linux Kernel

On Thu, 01 Jun 2006 11:30:33 +0200
Mike Galbraith <[email protected]> wrote:

> On Thu, 2006-06-01 at 01:48 -0700, Andrew Morton wrote:
> > ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.17-rc5/2.6.17-rc5-mm2/
> > 
> > 
> > - A cfq bug was fixed in mainline, so the git-cfq tree has been restored.
> 
> I put the fix for slab corruption into mm1, and it did indeed cure that.
> However, if I add git-cfq.patch, my box still explodes.

OK, thanks.  I put a revert patch into the hot-fixes directory.


--- devel/block/cfq-iosched.c~revert-git-cfq	2006-06-01 02:46:39.000000000 -0700
+++ devel-akpm/block/cfq-iosched.c	2006-06-01 02:46:39.000000000 -0700
@@ -103,9 +103,8 @@ struct cfq_data {
 	 * rr list of queues with requests and the count of them
 	 */
 	struct list_head rr_list[CFQ_PRIO_LISTS];
+	struct list_head busy_rr;
 	struct list_head cur_rr;
-	unsigned short cur_prio;
-
 	struct list_head idle_rr;
 	unsigned int busy_queues;
 
@@ -141,6 +140,7 @@ struct cfq_data {
 
 	struct cfq_queue *active_queue;
 	struct cfq_io_context *active_cic;
+	int cur_prio, cur_end_prio;
 	unsigned int dispatch_slice;
 
 	struct timer_list idle_class_timer;
@@ -199,13 +199,8 @@ struct cfq_queue {
 	int on_dispatch[2];
 
 	/* io prio of this group */
-	unsigned short ioprio_class, ioprio;
-
-	/* current dynamic stair priority */
-	unsigned short dyn_ioprio;
-
-	/* same as real ioprio, except if queue has been elevated */
-	unsigned short org_ioprio_class, org_ioprio;
+	unsigned short ioprio, org_ioprio;
+	unsigned short ioprio_class, org_ioprio_class;
 
 	/* various state flags, see below */
 	unsigned int flags;
@@ -484,13 +479,25 @@ static void cfq_resort_rr_list(struct cf
 		list = &cfqd->cur_rr;
 	else if (cfq_class_idle(cfqq))
 		list = &cfqd->idle_rr;
-	else
-		list = &cfqd->rr_list[cfqq->dyn_ioprio];
+	else {
+		/*
+		 * if cfqq has requests in flight, don't allow it to be
+		 * found in cfq_set_active_queue before it has finished them.
+		 * this is done to increase fairness between a process that
+		 * has lots of io pending vs one that only generates one
+		 * sporadically or synchronously
+		 */
+		if (cfq_cfqq_dispatched(cfqq))
+			list = &cfqd->busy_rr;
+		else
+			list = &cfqd->rr_list[cfqq->ioprio];
+	}
 
 	/*
-	 * if queue was preempted, just add to front to be fair.
+	 * if queue was preempted, just add to front to be fair. busy_rr
+	 * isn't sorted.
 	 */
-	if (preempted) {
+	if (preempted || list == &cfqd->busy_rr) {
 		list_add(&cfqq->cfq_list, list);
 		return;
 	}
@@ -502,8 +509,6 @@ static void cfq_resort_rr_list(struct cf
 	while ((entry = entry->prev) != list) {
 		struct cfq_queue *__cfqq = list_entry_cfqq(entry);
 
-		if (__cfqq->ioprio < cfqq->ioprio)
-			break;
 		if (!__cfqq->service_last)
 			break;
 		if (time_before(__cfqq->service_last, cfqq->service_last))
@@ -729,49 +734,23 @@ cfq_merged_requests(request_queue_t *q, 
 	cfq_remove_request(next);
 }
 
-/*
- * Scale schedule slice based on io priority. Use the sync time slice only
- * if a queue is marked sync and has sync io queued. A sync queue with async
- * io only, should not get full sync slice length.
- */
-static inline int
-cfq_prio_to_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
-{
-	const int base_slice = cfqd->cfq_slice[cfq_cfqq_sync(cfqq)];
-	unsigned short prio = cfqq->dyn_ioprio;
-
-	WARN_ON(prio >= IOPRIO_BE_NR);
-
-	if (cfq_class_rt(cfqq))
-		prio = 0;
-
-	return base_slice + (base_slice / CFQ_SLICE_SCALE * (4 - prio));
-}
-
 static inline void
-cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
-{
-	cfqq->slice_end = cfq_prio_to_slice(cfqd, cfqq) + jiffies;
-}
-
-static inline int
-cfq_prio_to_maxrq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
+__cfq_set_active_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 {
-	const int base_rq = cfqd->cfq_slice_async_rq;
-	unsigned short prio = cfqq->dyn_ioprio;
-
-	WARN_ON(cfqq->dyn_ioprio >= IOPRIO_BE_NR);
-
-	if (cfq_class_rt(cfqq))
-		prio = 0;
+	if (cfqq) {
+		/*
+		 * stop potential idle class queues waiting service
+		 */
+		del_timer(&cfqd->idle_class_timer);
 
-	return 2 * (base_rq + base_rq * (CFQ_PRIO_LISTS - 1 - prio));
-}
+		cfqq->slice_start = jiffies;
+		cfqq->slice_end = 0;
+		cfqq->slice_left = 0;
+		cfq_clear_cfqq_must_alloc_slice(cfqq);
+		cfq_clear_cfqq_fifo_expire(cfqq);
+	}
 
-static inline void cfq_prio_inc(unsigned short *p, unsigned int low_p)
-{
-	if (++(*p) == CFQ_PRIO_LISTS)
-		*p = low_p;
+	cfqd->active_queue = cfqq;
 }
 
 /*
@@ -803,8 +782,6 @@ __cfq_slice_expired(struct cfq_data *cfq
 	else
 		cfqq->slice_left = 0;
 
-	cfq_prio_inc(&cfqq->dyn_ioprio, cfqq->ioprio);
-
 	if (cfq_cfqq_on_rr(cfqq))
 		cfq_resort_rr_list(cfqq, preempted);
 
@@ -827,58 +804,73 @@ static inline void cfq_slice_expired(str
 		__cfq_slice_expired(cfqd, cfqq, preempted);
 }
 
-static struct cfq_queue *cfq_get_next_cfqq(struct cfq_data *cfqd)
+/*
+ * 0
+ * 0,1
+ * 0,1,2
+ * 0,1,2,3
+ * 0,1,2,3,4
+ * 0,1,2,3,4,5
+ * 0,1,2,3,4,5,6
+ * 0,1,2,3,4,5,6,7
+ */
+static int cfq_get_next_prio_level(struct cfq_data *cfqd)
 {
-	if (!cfqd->busy_queues)
-		return NULL;
-
-	if (list_empty(&cfqd->cur_rr)) {
-		unsigned short prio = cfqd->cur_prio;
+	int prio, wrap;
 
-		do {
-			struct list_head *list = &cfqd->rr_list[prio];
+	prio = -1;
+	wrap = 0;
+	do {
+		int p;
 
-			if (!list_empty(list)) {
-				list_splice_init(list, &cfqd->cur_rr);
+		for (p = cfqd->cur_prio; p <= cfqd->cur_end_prio; p++) {
+			if (!list_empty(&cfqd->rr_list[p])) {
+				prio = p;
 				break;
 			}
+		}
 
-			cfq_prio_inc(&prio, 0);
-		} while (prio != cfqd->cur_prio);
+		if (prio != -1)
+			break;
+		cfqd->cur_prio = 0;
+		if (++cfqd->cur_end_prio == CFQ_PRIO_LISTS) {
+			cfqd->cur_end_prio = 0;
+			if (wrap)
+				break;
+			wrap = 1;
+		}
+	} while (1);
 
-		cfq_prio_inc(&cfqd->cur_prio, 0);
-	}
+	if (unlikely(prio == -1))
+		return -1;
 
-	if (!list_empty(&cfqd->cur_rr));
-		return list_entry_cfqq(cfqd->cur_rr.next);
+	BUG_ON(prio >= CFQ_PRIO_LISTS);
 
-	return NULL;
-}
+	list_splice_init(&cfqd->rr_list[prio], &cfqd->cur_rr);
 
-static inline void
-__cfq_set_active_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq)
-{
-	if (cfqq) {
-		WARN_ON(RB_EMPTY(&cfqq->sort_list));
-
-		/*
-		 * stop potential idle class queues waiting service
-		 */
-		del_timer(&cfqd->idle_class_timer);
-
-		cfqq->slice_start = jiffies;
-		cfqq->slice_end = 0;
-		cfqq->slice_left = 0;
-		cfq_clear_cfqq_must_alloc_slice(cfqq);
-		cfq_clear_cfqq_fifo_expire(cfqq);
+	cfqd->cur_prio = prio + 1;
+	if (cfqd->cur_prio > cfqd->cur_end_prio) {
+		cfqd->cur_end_prio = cfqd->cur_prio;
+		cfqd->cur_prio = 0;
+	}
+	if (cfqd->cur_end_prio == CFQ_PRIO_LISTS) {
+		cfqd->cur_prio = 0;
+		cfqd->cur_end_prio = 0;
 	}
 
-	cfqd->active_queue = cfqq;
+	return prio;
 }
 
 static struct cfq_queue *cfq_set_active_queue(struct cfq_data *cfqd)
 {
-	struct cfq_queue *cfqq = cfq_get_next_cfqq(cfqd);
+	struct cfq_queue *cfqq = NULL;
+
+	/*
+	 * if current list is non-empty, grab first entry. if it is empty,
+	 * get next prio level and grab first entry then if any are spliced
+	 */
+	if (!list_empty(&cfqd->cur_rr) || cfq_get_next_prio_level(cfqd) != -1)
+		cfqq = list_entry_cfqq(cfqd->cur_rr.next);
 
 	/*
 	 * if we have idle queues and no rt or be queues had pending
@@ -976,6 +968,37 @@ static inline struct cfq_rq *cfq_check_f
 }
 
 /*
+ * Scale schedule slice based on io priority. Use the sync time slice only
+ * if a queue is marked sync and has sync io queued. A sync queue with async
+ * io only, should not get full sync slice length.
+ */
+static inline int
+cfq_prio_to_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
+{
+	const int base_slice = cfqd->cfq_slice[cfq_cfqq_sync(cfqq)];
+
+	WARN_ON(cfqq->ioprio >= IOPRIO_BE_NR);
+
+	return base_slice + (base_slice/CFQ_SLICE_SCALE * (4 - cfqq->ioprio));
+}
+
+static inline void
+cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
+{
+	cfqq->slice_end = cfq_prio_to_slice(cfqd, cfqq) + jiffies;
+}
+
+static inline int
+cfq_prio_to_maxrq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
+{
+	const int base_rq = cfqd->cfq_slice_async_rq;
+
+	WARN_ON(cfqq->ioprio >= IOPRIO_BE_NR);
+
+	return 2 * (base_rq + base_rq * (CFQ_PRIO_LISTS - 1 - cfqq->ioprio));
+}
+
+/*
  * get next queue for service
  */
 static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
@@ -1092,6 +1115,7 @@ cfq_forced_dispatch(struct cfq_data *cfq
 	for (i = 0; i < CFQ_PRIO_LISTS; i++)
 		dispatched += cfq_forced_dispatch_cfqqs(&cfqd->rr_list[i]);
 
+	dispatched += cfq_forced_dispatch_cfqqs(&cfqd->busy_rr);
 	dispatched += cfq_forced_dispatch_cfqqs(&cfqd->cur_rr);
 	dispatched += cfq_forced_dispatch_cfqqs(&cfqd->idle_rr);
 
@@ -1331,11 +1355,6 @@ static void cfq_init_prio_data(struct cf
 	cfqq->org_ioprio = cfqq->ioprio;
 	cfqq->org_ioprio_class = cfqq->ioprio_class;
 
-	/*
-	 * start priority
-	 */
-	cfqq->dyn_ioprio = cfqq->ioprio;
-
 	if (cfq_cfqq_on_rr(cfqq))
 		cfq_resort_rr_list(cfqq, 0);
 
@@ -2210,6 +2229,7 @@ static int cfq_init_queue(request_queue_
 	for (i = 0; i < CFQ_PRIO_LISTS; i++)
 		INIT_LIST_HEAD(&cfqd->rr_list[i]);
 
+	INIT_LIST_HEAD(&cfqd->busy_rr);
 	INIT_LIST_HEAD(&cfqd->cur_rr);
 	INIT_LIST_HEAD(&cfqd->idle_rr);
 	INIT_LIST_HEAD(&cfqd->empty_list);
_

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/
References:
- 2.6.17-rc5-mm2
  - From: Andrew Morton <[email protected]>
- Re: 2.6.17-rc5-mm2
  - From: Mike Galbraith <[email protected]>
Prev by Date: Re: 2.6.17-rc5-mm2
Next by Date: Re: XFS related hang (was Re: How to send a break? - dump from frozen 64bit linux)
Previous by thread: Re: 2.6.17-rc5-mm2
Next by thread: Re: 2.6.17-rc5-mm2
Index(es):
- Date
- Thread
[Index of Archives] [Kernel Newbies] [Netfilter] [Bugtraq] [Photo] [Stuff] [Gimp] [Yosemite News] [MIPS Linux] [ARM Linux] [Linux Security] [Linux RAID] [Video 4 Linux] [Linux for the blind] [Linux Resources]