[patch 7/9] Adapt the time initialization code

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



We need to:
 - initialize the RDTSCP instruction if available
 - decide which hardware timer to use as MT (PM or HPET)
 - decide what level of TSC->MT approximation to use
   - none (as slow as the hardware MT can get) -- "notsc" option
   - strictly monotonic (can't be done in a vsyscall) -- default
   - unguaranteed monotonicity (very fast, vsyscall) -- "nomonotonic" option

Signed-off-by: Jiri Bohac <[email protected]>

Index: linux-2.6.20-rc5/arch/x86_64/kernel/smpboot.c
===================================================================
--- linux-2.6.20-rc5.orig/arch/x86_64/kernel/smpboot.c
+++ linux-2.6.20-rc5/arch/x86_64/kernel/smpboot.c
@@ -318,6 +318,8 @@ static inline void set_cpu_sibling_map(i
 	}
 }
 
+extern void time_initialize_cpu(void);
+
 /*
  * Setup code on secondary processor (after comming out of the trampoline)
  */
@@ -345,6 +347,7 @@ void __cpuinit start_secondary(void)
 		enable_NMI_through_LVT0(NULL);
 		enable_8259A_irq(0);
 	}
+	time_initialize_cpu();
 
 	enable_APIC_timer();
 
@@ -963,6 +966,8 @@ int __cpuinit __cpu_up(unsigned int cpu)
 	return err;
 }
 
+extern void time_init_gtod(void);
+
 /*
  * Finish the SMP boot.
  */
Index: linux-2.6.20-rc5/arch/x86_64/kernel/time.c
===================================================================
--- linux-2.6.20-rc5.orig/arch/x86_64/kernel/time.c
+++ linux-2.6.20-rc5/arch/x86_64/kernel/time.c
@@ -968,6 +968,16 @@ static struct irqaction irq0 = {
 	timer_interrupt, IRQF_DISABLED, CPU_MASK_NONE, "timer", NULL, NULL
 };
 
+static void time_init_rdtsc(void)
+{
+	if (cpu_has(&boot_cpu_data, X86_FEATURE_RDTSCP)) {
+		int p;
+		p = smp_processor_id();
+		printk(KERN_INFO "Initializing RDTSCP feature on cpu %d.\n", p);
+		write_rdtscp_aux(p);
+	}
+}
+
 void __init time_init(void)
 {
 	if (nohpet)
@@ -979,27 +989,40 @@ void __init time_init(void)
 	set_normalized_timespec(&wall_to_monotonic,
 	                        -xtime.tv_sec, -xtime.tv_nsec);
 
-	if (!hpet_init())
-                vxtime_hz = (FSEC_PER_SEC + hpet_period / 2) / hpet_period;
-	else
-		vxtime.hpet_address = 0;
+	/* decide what to use as Master Timer: HPET or PM? */
+	if (!hpet_init()) {
+		vxtime.mt_q = (hpet_period << 32) / FSEC_PER_NSEC;
+		mt_per_tick = hpet_use_timer ?
+			hpet_tick : LATCH * 12;
+		read_master_timer = read_master_timer_hpet;
+		timename = "HPET";
+	} else
+	if (pmtmr_ioport) {
+		vxtime.mt_q = (NSEC_PER_SEC << 32) / 916363636UL;
+		mt_per_tick = (LATCH * 3) << 8;
+		read_master_timer = read_master_timer_pm;
+		timename = "PM timer";
+	} else
+		panic("No suitable Master Timer found.\n");
 
-	if (hpet_use_timer) {
-		/* set tick_nsec to use the proper rate for HPET */
-	  	tick_nsec = TICK_NSEC_HPET;
+	/* use PIT as main timer interrupt source if we can't use HPET */
+	if (hpet_use_timer)
 		cpu_khz = hpet_calibrate_tsc();
-		timename = "HPET";
-	} else {
+	else {
 		pit_init();
 		cpu_khz = pit_calibrate_tsc();
-		timename = "PIT";
 	}
 
-	vxtime.mode = VXTIME_TSC;
-	vxtime.quot = (USEC_PER_SEC << US_SCALE) / vxtime_hz;
-	vxtime.tsc_quot = (USEC_PER_MSEC << US_SCALE) / cpu_khz;
-	vxtime.last_tsc = get_cycles_sync();
-	set_cyc2ns_scale(cpu_khz);
+	/* initialize the master timer */
+	set_master_timer64(0);
+
+	/* initialize the timekeeping variables of the boot CPU */
+	vxtime.cpu[0].tsc_last = get_cycles_sync();
+	vxtime.cpu[0].mt_last = vxtime.cpu[0].mt_base = 0;
+	vxtime.cpu[0].tsc_slope = vxtime.cpu[0].tsc_slope_avg = (((USEC_PER_SEC << 32) / vxtime.mt_q) << TSC_SLOPE_SCALE) / cpu_khz;
+	time_init_rdtsc();
+
+	vxtime.mode = VXTIME_TSCS; /* not sure yet if CPUs have synced TSCs */
 	setup_irq(0, &irq0);
 
 #ifndef CONFIG_SMP
@@ -1037,28 +1060,71 @@ __cpuinit int unsynchronized_tsc(void)
  */
 void time_init_gtod(void)
 {
-	char *timetype;
-
-	if (unsynchronized_tsc())
-		notsc = 1;
+	char *tsc_method;
 
  	if (cpu_has(&boot_cpu_data, X86_FEATURE_RDTSCP))
 		vgetcpu_mode = VGETCPU_RDTSCP;
 	else
 		vgetcpu_mode = VGETCPU_LSL;
 
-		timetype = hpet_use_timer ? "HPET/TSC" : "PIT/TSC";
+	if (notsc) {
+		tsc_method = "nothing";
+		vxtime.mode = VXTIME_MT;
+	}
+	else if (cpu_has(&boot_cpu_data, X86_FEATURE_RDTSCP)) {
+		if (nomonotonic) {
+			tsc_method = "RDTSCP";
+			vxtime.mode = VXTIME_TSCP;
+		}
+		else {
+			tsc_method = "RDTSCM (syscall)";
+			vxtime.mode = VXTIME_TSCM;
+		}
+	}
+	else {
+		tsc_method = "RDTSC";
 		vxtime.mode = VXTIME_TSC;
+#ifdef CONFIG_SMP
+		if (unsynchronized_tsc()) {
+			if (nomonotonic) {
+				tsc_method = "RDTSC (syscall)";
+				vxtime.mode = VXTIME_TSCS;
+			}
+			else {
+				tsc_method = "RDTSCM (syscall)";
+				vxtime.mode = VXTIME_TSCM;
+			}
+		}
+#endif
+ 	}
+
 
-	printk(KERN_INFO "time.c: Using %ld.%06ld MHz WALL %s GTOD %s timer.\n",
-	       vxtime_hz / 1000000, vxtime_hz % 1000000, timename, timetype);
+	printk(KERN_INFO "time.c: Using %s as master timer, %s for time caching; vsyscall %s.\n",
+	       timename, tsc_method, sysctl_vsyscall ? "enabled" : "disabled");
+	printk(KERN_INFO "time.c: Using %s as interrupt source.\n",
+	       hpet_use_timer ? "HPET" : "PIT");
 	printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n",
 		cpu_khz / 1000, cpu_khz % 1000);
-	vxtime.quot = (USEC_PER_SEC << US_SCALE) / vxtime_hz;
-	vxtime.tsc_quot = (USEC_PER_MSEC << US_SCALE) / cpu_khz;
-	vxtime.last_tsc = get_cycles_sync();
+}
 
-	set_cyc2ns_scale(cpu_khz);
+/*
+ * initialize the per_cpu timekeeping variables
+ * for non-boot CPUs
+ */
+void time_initialize_cpu(void *info)
+{
+	unsigned long flags;
+	int cpu = smp_processor_id();
+	u64 mt_now;
+	write_seqlock_irqsave(&xtime_lock, flags);
+	mt_now = get_master_timer64();
+	vxtime.cpu[cpu].tsc_last = get_cycles_sync();
+	vxtime.cpu[cpu].mt_last = mt_now;
+	vxtime.cpu[cpu].mt_base = mt_now;
+	vxtime.cpu[cpu].tsc_slope = vxtime.cpu[0].tsc_slope;
+	vxtime.cpu[cpu].tsc_slope_avg = vxtime.cpu[0].tsc_slope_avg;
+	write_sequnlock_irqrestore(&xtime_lock, flags);
+	time_init_rdtsc();
 }
 
 __setup("report_lost_ticks", time_setup);
@@ -1098,19 +1164,38 @@ static int timer_resume(struct sys_devic
 		sleep_length = 0;
 		ctime = sleep_start;
 	}
-	if (vxtime.hpet_address)
+
+	write_seqlock_irqsave(&xtime_lock,flags);
+
+	/* reenable the Master Timer */
+	if (read_master_timer == read_master_timer_hpet || hpet_use_timer)
 		hpet_reenable();
+	/* reenable PIT if used as main timer interrupt source */
 	else
 		i8254_timer_resume();
 
 	sec = ctime + clock_cmos_diff;
-	write_seqlock_irqsave(&xtime_lock,flags);
 	xtime.tv_sec = sec;
 	xtime.tv_nsec = 0;
-		vxtime.last_tsc = get_cycles_sync();
-	write_sequnlock_irqrestore(&xtime_lock,flags);
+
+	/* re-initialize the master timer */
+	add_master_timer64(sleep_length * mt_per_tick);
+	vxtime.mt_wall += sleep_length * mt_per_tick;
+
 	jiffies += sleep_length;
-	monotonic_base += sleep_length * (NSEC_PER_SEC/HZ);
+
+	/* re-initialize the timekeeping variables of the boot CPU */
+	vxtime.cpu[0].tsc_last = get_cycles_sync();
+	vxtime.cpu[0].mt_last = vxtime.cpu[0].mt_base = get_master_timer64();
+	/* FIXME: what speed does the cpu really start at; I doubt cpu_khz is right at this point ??!!!
+	   And what speed do the non-boot cpus start at? Their timekeeping variables will probably be set wrong
+	   by copying from CPU 0 in time_initialize_cpu(); Not a great deal, as they will be synced somehow,
+	   but not exactly nice -JB */
+	vxtime.cpu[0].tsc_slope = vxtime.cpu[0].tsc_slope_avg =
+		(((USEC_PER_SEC << 32) / vxtime.mt_q) << TSC_SLOPE_SCALE) / cpu_khz;
+
+	write_sequnlock_irqrestore(&xtime_lock, flags);
+	
 	touch_softlockup_watchdog();
 	return 0;
 }
@@ -1402,3 +1487,11 @@ int __init notsc_setup(char *s)
 }
 
 __setup("notsc", notsc_setup);
+
+int __init nomonotonic_setup(char *s)
+{
+	nomonotonic = 1;
+	return 1;
+}
+
+__setup("nomonotonic", nomonotonic_setup);
Index: linux-2.6.20-rc5/include/linux/time.h
===================================================================
--- linux-2.6.20-rc5.orig/include/linux/time.h
+++ linux-2.6.20-rc5/include/linux/time.h
@@ -31,6 +31,7 @@ struct timezone {
 #define MSEC_PER_SEC	1000L
 #define USEC_PER_MSEC	1000L
 #define NSEC_PER_USEC	1000L
+#define FSEC_PER_NSEC 	1000000L
 #define NSEC_PER_MSEC	1000000L
 #define USEC_PER_SEC	1000000L
 #define NSEC_PER_SEC	1000000000L

--
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Stuff]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]     [Linux Resources]
  Powered by Linux