

Patch from Nick Piggin <piggin@cyberone.com.au>

Implement application thinktime monitoring, fix HZ=1000 assumptions


 drivers/block/as-iosched.c |   52 +++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 52 insertions(+)

diff -puN drivers/block/as-iosched.c~as-thinktime drivers/block/as-iosched.c
--- 25/drivers/block/as-iosched.c~as-thinktime	2003-03-08 00:02:21.000000000 -0800
+++ 25-akpm/drivers/block/as-iosched.c	2003-03-08 00:02:21.000000000 -0800
@@ -36,6 +36,7 @@ struct ant_stats {
 	int broken_by_write;
 	int exitted_tasks;
 	int queued_request;
+	int big_thinktime;
 
 	int ant_delay_hist[100];	/* milliseconds */
 
@@ -93,17 +94,26 @@ static unsigned long antic_expire = HZ /
  * to a single as_io_context may appear in multiple queues at once.
  */
 
+/* Keep track of up to 30ms thinktimes */
+#define MAX_THINKTIME (HZ/33UL)
+
 struct as_io_context {
 	atomic_t refcount;
 	pid_t pid;
 	unsigned long state;
 	atomic_t nr_queued; /* queued reads & sync writes */
 	atomic_t nr_dispatched; /* number of requests gone to the drivers */
+
+	/* IO History tracking */
+	unsigned long last_end_request;
+	unsigned long thinktime[MAX_THINKTIME];
+	unsigned long mean_thinktime;
 };
 
 /* Bits in as_io_context.state */
 enum as_io_states {
 	AS_TASK_RUNNING=0,	/* Process has not exitted */
+	AS_TASK_IORUNNING,	/* Process has completed some IO */
 	AS_REQ_FINISHED,	/* Set in ad->as_io_context upon completion */
 };
 
@@ -243,6 +253,8 @@ static struct as_io_context *get_as_io_c
 			ret->state = 1 << AS_TASK_RUNNING;
 			atomic_set(&ret->nr_queued, 0);
 			atomic_set(&ret->nr_dispatched, 0);
+			memset(ret->thinktime, 0, sizeof(ret->thinktime));
+			ret->mean_thinktime = 0;
 			tsk->as_io_context = ret;
 		}
 	}
@@ -476,12 +488,46 @@ as_find_arq_rb(struct as_data *ad, secto
 
 static void as_antic_waitnext(struct as_data *ad);
 
+/*
+ * as_update_iohist keeps a decaying histogram of IO thinktimes, and
+ * updates @aic->mean_thinktime based on that. It is called when a new
+ * request is queued.
+ */
+static void as_update_iohist(struct as_io_context *aic)
+{
+	unsigned i;
+	unsigned long thinktime,
+		      total = 0,
+		      num = 0;
+
+	if (aic == NULL)
+		return;
+
+	if (test_bit(AS_TASK_IORUNNING, &aic->state)) {
+		thinktime = jiffies - aic->last_end_request;
+		thinktime = min(thinktime, MAX_THINKTIME-1);
+		aic->thinktime[thinktime] += 256; /* fixed point: 1.0 == 1<<8 */
+
+		for (i = 0; i < MAX_THINKTIME; i++) {
+			unsigned long tt = aic->thinktime[i];
+			total += i*tt;
+			num += tt;
+
+			aic->thinktime[i] = (tt>>1) + (tt>>2); /* 75% decay */
+		}
+		/* fixed point factor is cancelled here */
+		aic->mean_thinktime = total / num;
+	}
+}
+
 static void as_complete_arq(struct as_data *ad, struct as_rq *arq)
 {
 	if (!arq->as_io_context)
 		return;
 
+	set_bit(AS_TASK_IORUNNING, &arq->as_io_context->state);
 	set_bit(AS_REQ_FINISHED, &arq->as_io_context->state);
+	arq->as_io_context->last_end_request = jiffies;
 
 	if (ad->as_io_context == arq->as_io_context) {
 		ad->antic_start = jiffies;
@@ -509,6 +555,7 @@ static void as_add_request(struct as_dat
 	if (arq->as_io_context) {
 		atomic_inc(&arq->as_io_context->nr_queued);
 		clear_bit(AS_REQ_FINISHED, &arq->as_io_context->state);
+		as_update_iohist(arq->as_io_context);
 	}
 
 	as_add_arq_rb(ad, arq);
@@ -962,6 +1009,11 @@ static int as_can_break_anticipation(str
 		ant_stats.queued_request++;
 		return 1;
 	}
+
+	if (aic && aic->mean_thinktime > max(HZ/200, 1)) {
+		ant_stats.big_thinktime++;
+		return 1;
+	}
 	
 	return 0;
 }

_
