Re: High irq load (Re: [PATCH] i386: Selectable Frequency of the Timer Interrupt)

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Linus Torvalds <[email protected]> writes:

> On Wed, 13 Jul 2005, Jan Engelhardt wrote:
> > 
> > No, some kernel code causes a triple-fault-and-reboot when the HZ is >=
> > 10KHz. Maybe the highest possible value is 8192 Hz, not sure.
> 
> Can you post the triple-fault message? It really shouldn't triple-fault, 
> although it _will_ obviously spend all time just doing timer interrupts, 
> so it shouldn't get much (if any) real work done either.
...
> There should be no conceptual "highest possible HZ", although there are 
> certainly obvious practical limits to it (both on the timer hw itself, and 
> just the fact that at some point we'll spend all time on the timer 
> interrupt and won't get anything done..)

HZ=10000 appears to work fine here after some hacks to avoid
over/underflows in integer arithmetics. gkrellm reports about 3-4% CPU
usage when the system is idle, on a 3.07 GHz P4.

---

 Makefile                                    |    2 +-
 arch/i386/kernel/cpu/proc.c                 |    6 ++++++
 fs/nfsd/nfssvc.c                            |    2 +-
 include/linux/jiffies.h                     |    6 ++++++
 include/linux/nfsd/stats.h                  |    4 ++++
 include/linux/timex.h                       |    2 +-
 include/net/tcp.h                           |   12 +++++++++---
 init/calibrate.c                            |   21 +++++++++++++++++++++
 kernel/Kconfig.hz                           |    6 ++++++
 kernel/timer.c                              |    4 ++--
 net/ipv4/netfilter/ip_conntrack_proto_tcp.c |    2 +-
 11 files changed, 58 insertions(+), 9 deletions(-)

diff --git a/Makefile b/Makefile
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 2
 PATCHLEVEL = 6
 SUBLEVEL = 13
-EXTRAVERSION =-rc3
+EXTRAVERSION =-rc3-test
 NAME=Woozy Numbat
 
 # *DOCUMENTATION*
diff --git a/arch/i386/kernel/cpu/proc.c b/arch/i386/kernel/cpu/proc.c
--- a/arch/i386/kernel/cpu/proc.c
+++ b/arch/i386/kernel/cpu/proc.c
@@ -128,9 +128,15 @@ static int show_cpuinfo(struct seq_file 
 		     x86_cap_flags[i] != NULL )
 			seq_printf(m, " %s", x86_cap_flags[i]);
 
+#if HZ <= 5000
 	seq_printf(m, "\nbogomips\t: %lu.%02lu\n\n",
 		     c->loops_per_jiffy/(500000/HZ),
 		     (c->loops_per_jiffy/(5000/HZ)) % 100);
+#else
+	seq_printf(m, "\nbogomips\t: %lu.%02lu\n\n",
+		     c->loops_per_jiffy/(500000/HZ),
+		     (c->loops_per_jiffy*(HZ/5000)) % 100);
+#endif
 
 	return 0;
 }
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -160,7 +160,7 @@ update_thread_usage(int busy_threads)
 	decile = busy_threads*10/nfsdstats.th_cnt;
 	if (decile>0 && decile <= 10) {
 		diff = nfsd_last_call - prev_call;
-		if ( (nfsdstats.th_usage[decile-1] += diff) >= NFSD_USAGE_WRAP)
+		if ( (nfsdstats.th_usage[decile-1] += diff) >= NFSD_USAGE_WRAP) 
 			nfsdstats.th_usage[decile-1] -= NFSD_USAGE_WRAP;
 		if (decile == 10)
 			nfsdstats.th_fullcnt++;
diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h
--- a/include/linux/jiffies.h
+++ b/include/linux/jiffies.h
@@ -38,6 +38,12 @@
 # define SHIFT_HZ	9
 #elif HZ >= 768 && HZ < 1536
 # define SHIFT_HZ	10
+#elif HZ >= 1536 && HZ < 3072
+# define SHIFT_HZ	11
+#elif HZ >= 3072 && HZ < 6144
+# define SHIFT_HZ	12
+#elif HZ >= 6144 && HZ < 12288
+# define SHIFT_HZ	13
 #else
 # error You lose.
 #endif
diff --git a/include/linux/nfsd/stats.h b/include/linux/nfsd/stats.h
--- a/include/linux/nfsd/stats.h
+++ b/include/linux/nfsd/stats.h
@@ -30,7 +30,11 @@ struct nfsd_stats {
 };
 
 /* thread usage wraps very million seconds (approx one fortnight) */
+#if HZ < 2048
 #define	NFSD_USAGE_WRAP	(HZ*1000000)
+#else
+#define	NFSD_USAGE_WRAP	(2048*1000000)
+#endif
 
 #ifdef __KERNEL__
 
diff --git a/include/linux/timex.h b/include/linux/timex.h
--- a/include/linux/timex.h
+++ b/include/linux/timex.h
@@ -90,7 +90,7 @@
  *
  * FINENSEC is 1 ns in SHIFT_UPDATE units of the time_phase variable.
  */
-#define SHIFT_SCALE 22		/* phase scale (shift) */
+#define SHIFT_SCALE 25		/* phase scale (shift) */
 #define SHIFT_UPDATE (SHIFT_KG + MAXTC) /* time offset scale (shift) */
 #define SHIFT_USEC 16		/* frequency offset scale (shift) */
 #define FINENSEC (1L << (SHIFT_SCALE - 10)) /* ~1 ns in phase units */
diff --git a/include/net/tcp.h b/include/net/tcp.h
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -486,8 +486,8 @@ static __inline__ int tcp_sk_listen_hash
    so that we select tick to get range about 4 seconds.
  */
 
-#if HZ <= 16 || HZ > 4096
-# error Unsupported: HZ <= 16 or HZ > 4096
+#if HZ <= 16
+# error Unsupported: HZ <= 16
 #elif HZ <= 32
 # define TCP_TW_RECYCLE_TICK (5+2-TCP_TW_RECYCLE_SLOTS_LOG)
 #elif HZ <= 64
@@ -502,8 +502,14 @@ static __inline__ int tcp_sk_listen_hash
 # define TCP_TW_RECYCLE_TICK (10+2-TCP_TW_RECYCLE_SLOTS_LOG)
 #elif HZ <= 2048
 # define TCP_TW_RECYCLE_TICK (11+2-TCP_TW_RECYCLE_SLOTS_LOG)
-#else
+#elif HZ <= 4096
 # define TCP_TW_RECYCLE_TICK (12+2-TCP_TW_RECYCLE_SLOTS_LOG)
+#elif HZ <= 8192
+# define TCP_TW_RECYCLE_TICK (13+2-TCP_TW_RECYCLE_SLOTS_LOG)
+#elif HZ <= 16384
+# define TCP_TW_RECYCLE_TICK (14+2-TCP_TW_RECYCLE_SLOTS_LOG)
+#else
+# error Unsupported: HZ > 16384
 #endif
 /*
  *	TCP option
diff --git a/init/calibrate.c b/init/calibrate.c
--- a/init/calibrate.c
+++ b/init/calibrate.c
@@ -119,16 +119,30 @@ void __devinit calibrate_delay(void)
 
 	if (preset_lpj) {
 		loops_per_jiffy = preset_lpj;
+#if HZ <= 5000
 		printk("Calibrating delay loop (skipped)... "
 			"%lu.%02lu BogoMIPS preset\n",
 			loops_per_jiffy/(500000/HZ),
 			(loops_per_jiffy/(5000/HZ)) % 100);
+#else
+		printk("Calibrating delay loop (skipped)... "
+			"%lu.%02lu BogoMIPS preset\n",
+			loops_per_jiffy/(500000/HZ),
+			(loops_per_jiffy*(HZ/5000)) % 100);
+#endif
 	} else if ((loops_per_jiffy = calibrate_delay_direct()) != 0) {
 		printk("Calibrating delay using timer specific routine.. ");
+#if HZ <= 5000
 		printk("%lu.%02lu BogoMIPS (lpj=%lu)\n",
 			loops_per_jiffy/(500000/HZ),
 			(loops_per_jiffy/(5000/HZ)) % 100,
 			loops_per_jiffy);
+#else
+		printk("%lu.%02lu BogoMIPS (lpj=%lu)\n",
+			loops_per_jiffy/(500000/HZ),
+			(loops_per_jiffy*(HZ/5000)) % 100,
+			loops_per_jiffy);
+#endif
 	} else {
 		loops_per_jiffy = (1<<12);
 
@@ -164,10 +178,17 @@ void __devinit calibrate_delay(void)
 		}
 
 		/* Round the value and print it */
+#if HZ <= 5000
 		printk("%lu.%02lu BogoMIPS (lpj=%lu)\n",
 			loops_per_jiffy/(500000/HZ),
 			(loops_per_jiffy/(5000/HZ)) % 100,
 			loops_per_jiffy);
+#else
+		printk("%lu.%02lu BogoMIPS (lpj=%lu)\n",
+			loops_per_jiffy/(500000/HZ),
+			(loops_per_jiffy*(HZ/5000)) % 100,
+			loops_per_jiffy);
+#endif
 	}
 
 }
diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
--- a/kernel/Kconfig.hz
+++ b/kernel/Kconfig.hz
@@ -36,6 +36,11 @@ choice
 	 1000 HZ is the preferred choice for desktop systems and other
 	 systems requiring fast interactive responses to events.
 
+	config HZ_10000
+		bool "10000 HZ"
+	help
+	 10000 HZ is for testing only.
+
 endchoice
 
 config HZ
@@ -43,4 +48,5 @@ config HZ
 	default 100 if HZ_100
 	default 250 if HZ_250
 	default 1000 if HZ_1000
+	default 10000 if HZ_10000
 
diff --git a/kernel/timer.c b/kernel/timer.c
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -710,7 +710,7 @@ static void second_overflow(void)
 	if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
 	    ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE;
 	time_offset += ltemp;
-	time_adj = -ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE);
+	time_adj = -ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE); 
     } else {
 	ltemp = time_offset;
 	if (!(time_status & STA_FLL))
@@ -718,7 +718,7 @@ static void second_overflow(void)
 	if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
 	    ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE;
 	time_offset -= ltemp;
-	time_adj = ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE);
+	time_adj = ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE); 
     }
 
     /*
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
--- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
@@ -87,7 +87,7 @@ static const char *tcp_conntrack_names[]
 
 unsigned long ip_ct_tcp_timeout_syn_sent =      2 MINS;
 unsigned long ip_ct_tcp_timeout_syn_recv =     60 SECS;
-unsigned long ip_ct_tcp_timeout_established =   5 DAYS;
+unsigned long ip_ct_tcp_timeout_established =   2 DAYS;
 unsigned long ip_ct_tcp_timeout_fin_wait =      2 MINS;
 unsigned long ip_ct_tcp_timeout_close_wait =   60 SECS;
 unsigned long ip_ct_tcp_timeout_last_ack =     30 SECS;

-- 
Peter Osterlund - [email protected]
http://web.telia.com/~u89404340
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]
  Powered by Linux