
From: David Mosberger <davidm@napali.hpl.hp.com>

Basically, what the patch does is provide two hooks such that platforms
(and subplatforms) can provide time-interpolation in a way that guarantees
that two causally related gettimeofday() calls will never see time going
backwards (unless there is a settimeofday() call, of course).

There is some evidence that the current scheme does work: we use it on ia64
both for cycle-counter-based interpolation and the SGI folks use it with a
chipset-based high-performance counter.


It seems like enough platforms do this sort of thing to provide _some_
support in the core, especially because it's rather tricky to guarantee
that time never goes backwards (short of a settimeofday, of course).

This patch is based on something Jes Sorensen wrote for the SGI Itanium 2
platform (which has a chipset-internal high-res clock).  I adapted it so it
can be used for cycle-counter interpolation also.  The net effect is that
"last_time_offset" can be removed completely from the kernel.

The basic idea behind the patch is simply: every time you advance xtime by
N nanoseconds, you call update_wall_time_hook(NSEC).  Every time the time
gets set (i.e., discontinuity is OK), reset_wall_time_hook() is called.

DESC
make timer interpolation patch compile
EDESC

"arguments provided to macro `nop'"


 include/linux/timex.h |  102 ++++++++++++++++++++++++++++++++++++++++++++++++++
 kernel/time.c         |    8 +--
 kernel/timer.c        |   94 ++++++++++++++++++++++++++++++++++++++++++----
 3 files changed, 192 insertions(+), 12 deletions(-)

diff -puN include/linux/timex.h~time-interpolation-infrastructure include/linux/timex.h
--- 25/include/linux/timex.h~time-interpolation-infrastructure	2003-05-28 18:29:30.000000000 -0700
+++ 25-akpm/include/linux/timex.h	2003-05-28 18:29:30.000000000 -0700
@@ -51,6 +51,9 @@
 #ifndef _LINUX_TIMEX_H
 #define _LINUX_TIMEX_H
 
+#include <linux/config.h>
+#include <linux/compiler.h>
+
 #include <asm/param.h>
 
 /*
@@ -310,6 +313,105 @@ extern long pps_calcnt;		/* calibration 
 extern long pps_errcnt;		/* calibration errors */
 extern long pps_stbcnt;		/* stability limit exceeded */
 
+#ifdef CONFIG_TIME_INTERPOLATION
+
+struct time_interpolator {
+	/* cache-hot stuff first: */
+	unsigned long (*get_offset) (void);
+	void (*update) (long);
+	void (*reset) (void);
+
+	/* cache-cold stuff follows here: */
+	struct time_interpolator *next;
+	unsigned long frequency;	/* frequency in counts/second */
+	long drift;			/* drift in parts-per-million (or -1) */
+};
+
+extern volatile unsigned long last_nsec_offset;
+#ifndef __HAVE_ARCH_CMPXCHG
+extern spin_lock_t last_nsec_offset_lock;
+#endif
+extern struct time_interpolator *time_interpolator;
+
+extern void register_time_interpolator(struct time_interpolator *);
+extern void unregister_time_interpolator(struct time_interpolator *);
+
+/* Called with xtime WRITE-lock acquired.  */
+static inline void
+time_interpolator_update(long delta_nsec)
+{
+	struct time_interpolator *ti = time_interpolator;
+
+	if (last_nsec_offset > 0) {
+#ifdef __HAVE_ARCH_CMPXCHG
+		unsigned long new, old;
+
+		do {
+			old = last_nsec_offset;
+			if (old > delta_nsec)
+				new = old - delta_nsec;
+			else
+				new = 0;
+		} while (cmpxchg(&last_nsec_offset, old, new) != old);
+#else
+		/*
+		 * This really hurts, because it serializes gettimeofday(), but without an
+		 * atomic single-word compare-and-exchange, there isn't all that much else
+		 * we can do.
+		 */
+		spin_lock(&last_nsec_offset_lock);
+		{
+			last_nsec_offset -= min(last_nsec_offset, delta_nsec);
+		}
+		spin_unlock(&last_nsec_offset_lock);
+#endif
+	}
+
+	if (ti)
+		(*ti->update)(delta_nsec);
+}
+
+/* Called with xtime WRITE-lock acquired.  */
+static inline void
+time_interpolator_reset(void)
+{
+	struct time_interpolator *ti = time_interpolator;
+
+	last_nsec_offset = 0;
+	if (ti)
+		(*ti->reset)();
+}
+
+/* Called with xtime READ-lock acquired.  */
+static inline unsigned long
+time_interpolator_get_offset(void)
+{
+	struct time_interpolator *ti = time_interpolator;
+	if (ti)
+		return (*ti->get_offset)();
+	return last_nsec_offset;
+}
+
+#else /* !CONFIG_TIME_INTERPOLATION */
+
+static inline void
+time_interpolator_update(long delta_nsec)
+{
+}
+
+static inline void
+time_interpolator_reset(void)
+{
+}
+
+static inline unsigned long
+time_interpolator_get_offset(void)
+{
+	return 0;
+}
+
+#endif /* !CONFIG_TIME_INTERPOLATION */
+
 #endif /* KERNEL */
 
 #endif /* LINUX_TIMEX_H */
diff -puN kernel/time.c~time-interpolation-infrastructure kernel/time.c
--- 25/kernel/time.c~time-interpolation-infrastructure	2003-05-28 18:29:30.000000000 -0700
+++ 25-akpm/kernel/time.c	2003-05-28 18:29:30.000000000 -0700
@@ -35,8 +35,6 @@
  */
 struct timezone sys_tz;
 
-extern unsigned long last_time_offset;
-
 #if !defined(__alpha__) && !defined(__ia64__)
 
 /*
@@ -77,9 +75,10 @@ asmlinkage long sys_stime(int * tptr)
 	if (get_user(value, tptr))
 		return -EFAULT;
 	write_seqlock_irq(&xtime_lock);
+
+	time_interpolator_reset();
 	xtime.tv_sec = value;
 	xtime.tv_nsec = 0;
-	last_time_offset = 0;
 	time_adjust = 0;	/* stop active adjtime() */
 	time_status |= STA_UNSYNC;
 	time_maxerror = NTP_PHASE_LIMIT;
@@ -125,7 +124,7 @@ inline static void warp_clock(void)
 {
 	write_seqlock_irq(&xtime_lock);
 	xtime.tv_sec += sys_tz.tz_minuteswest * 60;
-	last_time_offset = 0;
+	time_interpolator_update(sys_tz.tz_minuteswest * 60 * NSEC_PER_SEC);
 	write_sequnlock_irq(&xtime_lock);
 }
 
@@ -381,7 +380,6 @@ leave:	if ((time_status & (STA_UNSYNC|ST
 	txc->calcnt	   = pps_calcnt;
 	txc->errcnt	   = pps_errcnt;
 	txc->stbcnt	   = pps_stbcnt;
-	last_time_offset = 0;
 	write_sequnlock_irq(&xtime_lock);
 	do_gettimeofday(&txc->time);
 	return(result);
diff -puN kernel/timer.c~time-interpolation-infrastructure kernel/timer.c
--- 25/kernel/timer.c~time-interpolation-infrastructure	2003-05-28 18:29:30.000000000 -0700
+++ 25-akpm/kernel/timer.c	2003-05-28 18:29:30.000000000 -0700
@@ -517,6 +517,7 @@ static void second_overflow(void)
 	if (xtime.tv_sec % 86400 == 0) {
 	    xtime.tv_sec--;
 	    wall_to_monotonic.tv_sec++;
+	    time_interpolator_update(-NSEC_PER_SEC);
 	    time_state = TIME_OOP;
 	    clock_was_set();
 	    printk(KERN_NOTICE "Clock: inserting leap second 23:59:60 UTC\n");
@@ -527,6 +528,7 @@ static void second_overflow(void)
 	if ((xtime.tv_sec + 1) % 86400 == 0) {
 	    xtime.tv_sec++;
 	    wall_to_monotonic.tv_sec--;
+	    time_interpolator_update(NSEC_PER_SEC);
 	    time_state = TIME_WAIT;
 	    clock_was_set();
 	    printk(KERN_NOTICE "Clock: deleting leap second 23:59:59 UTC\n");
@@ -605,7 +607,7 @@ static void second_overflow(void)
 /* in the NTP reference this is called "hardclock()" */
 static void update_wall_time_one_tick(void)
 {
-	long time_adjust_step;
+	long time_adjust_step, delta_nsec;
 
 	if ( (time_adjust_step = time_adjust) != 0 ) {
 	    /* We are doing an adjtime thing. 
@@ -621,11 +623,11 @@ static void update_wall_time_one_tick(vo
 		time_adjust_step = tickadj;
 	     else if (time_adjust < -tickadj)
 		time_adjust_step = -tickadj;
-	     
+
 	    /* Reduce by this step the amount of time left  */
 	    time_adjust -= time_adjust_step;
 	}
-	xtime.tv_nsec += tick_nsec + time_adjust_step * 1000;
+	delta_nsec = tick_nsec + time_adjust_step * 1000;
 	/*
 	 * Advance the phase, once it gets to one microsecond, then
 	 * advance the tick more.
@@ -634,13 +636,15 @@ static void update_wall_time_one_tick(vo
 	if (time_phase <= -FINEUSEC) {
 		long ltemp = -time_phase >> (SHIFT_SCALE - 10);
 		time_phase += ltemp << (SHIFT_SCALE - 10);
-		xtime.tv_nsec -= ltemp;
+		delta_nsec -= ltemp;
 	}
 	else if (time_phase >= FINEUSEC) {
 		long ltemp = time_phase >> (SHIFT_SCALE - 10);
 		time_phase -= ltemp << (SHIFT_SCALE - 10);
-		xtime.tv_nsec += ltemp;
+		delta_nsec += ltemp;
 	}
+	xtime.tv_nsec += delta_nsec;
+	time_interpolator_update(delta_nsec);
 }
 
 /*
@@ -660,6 +664,7 @@ static void update_wall_time(unsigned lo
 	if (xtime.tv_nsec >= 1000000000) {
 	    xtime.tv_nsec -= 1000000000;
 	    xtime.tv_sec++;
+	    time_interpolator_update(NSEC_PER_SEC);
 	    second_overflow();
 	}
 }
@@ -777,7 +782,6 @@ unsigned long wall_jiffies = INITIAL_JIF
 #ifndef ARCH_HAVE_XTIME_LOCK
 seqlock_t xtime_lock __cacheline_aligned_in_smp = SEQLOCK_UNLOCKED;
 #endif
-unsigned long last_time_offset;
 
 /*
  * This function runs timers and the timer-tq in bottom half context.
@@ -811,7 +815,6 @@ static inline void update_times(void)
 		wall_jiffies += ticks;
 		update_wall_time(ticks);
 	}
-	last_time_offset = 0;
 	calc_load(ticks);
 }
   
@@ -1221,3 +1224,80 @@ void __init init_timers(void)
 	register_cpu_notifier(&timers_nb);
 	open_softirq(TIMER_SOFTIRQ, run_timer_softirq, NULL);
 }
+
+#ifdef CONFIG_TIME_INTERPOLATION
+
+volatile unsigned long last_nsec_offset;
+
+struct time_interpolator *time_interpolator;
+
+#ifndef __HAVE_ARCH_CMPXCHG
+spinlock_t last_nsec_offset_lock = SPIN_LOCK_UNLOCKED;
+#endif
+
+static struct {
+	spinlock_t lock;		/* lock protecting list */
+	struct time_interpolator *list;	/* list of registered interpolators */
+} ti_global = {
+	.lock = SPIN_LOCK_UNLOCKED
+};
+
+static inline int
+is_better_time_interpolator(struct time_interpolator *new)
+{
+	if (!time_interpolator)
+		return 1;
+	return new->frequency > 2*time_interpolator->frequency
+		|| (unsigned long) new->drift < (unsigned long) time_interpolator->drift;
+}
+
+void
+register_time_interpolator(struct time_interpolator *ti)
+{
+	spin_lock(&ti_global.lock);
+	{
+		write_seqlock_irq(&xtime_lock);
+		{
+			if (is_better_time_interpolator(ti))
+				time_interpolator = ti;
+		}
+		write_sequnlock_irq(&xtime_lock);
+
+		ti->next = ti_global.list;
+		ti_global.list = ti;
+	}
+	spin_unlock(&ti_global.lock);
+}
+
+void
+unregister_time_interpolator(struct time_interpolator *ti)
+{
+	struct time_interpolator *curr, **prev;
+
+	spin_lock(&ti_global.lock);
+	{
+		prev = &ti_global.list;
+		for (curr = *prev; curr; curr = curr->next) {
+			if (curr == ti) {
+				*prev = curr->next;
+				break;
+			}
+			prev = &curr->next;
+		}
+		write_seqlock_irq(&xtime_lock);
+		{
+			if (ti == time_interpolator) {
+				/* we lost the best time-interpolator: */
+				time_interpolator = NULL;
+				/* find the next-best interpolator */
+				for (curr = ti_global.list; curr; curr = curr->next)
+					if (is_better_time_interpolator(curr))
+						time_interpolator = curr;
+			}
+		}
+		write_sequnlock_irq(&xtime_lock);
+	}
+	spin_unlock(&ti_global.lock);
+}
+
+#endif /* CONFIG_TIME_INTERPOLATION */

_
