[patch 21/23] debugging feature: timer stats

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Thomas Gleixner <[email protected]>

add /proc/tstats support: debugging feature to profile timer expiration.
Both the starting site, process/PID and the expiration function is
captured. This allows the quick identification of timer event sources
in a system.

sample output:

 # echo 1 > /proc/tstats
 # cat /proc/tstats
 Timerstats sample period: 3.888770 s
   12,     0 swapper          hrtimer_stop_sched_tick (hrtimer_sched_tick)
   15,     1 swapper          hcd_submit_urb (rh_timer_func)
    4,   959 kedac            schedule_timeout (process_timeout)
    1,     0 swapper          page_writeback_init (wb_timer_fn)
   28,     0 swapper          hrtimer_stop_sched_tick (hrtimer_sched_tick)
   22,  2948 IRQ 4            tty_flip_buffer_push (delayed_work_timer_fn)
    3,  3100 bash             schedule_timeout (process_timeout)
    1,     1 swapper          queue_delayed_work_on (delayed_work_timer_fn)
    1,     1 swapper          queue_delayed_work_on (delayed_work_timer_fn)
    1,     1 swapper          neigh_table_init_no_netlink (neigh_periodic_timer)
    1,  2292 ip               __netdev_watchdog_up (dev_watchdog)
    1,    23 events/1         do_cache_clean (delayed_work_timer_fn)
 90 total events, 30.0 events/sec

Signed-off-by: Thomas Gleixner <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
--
 include/linux/hrtimer.h   |   44 ++++++++
 include/linux/timer.h     |   48 +++++++++
 kernel/hrtimer.c          |   26 +++++
 kernel/time/Makefile      |    3 
 kernel/time/timer_stats.c |  227 ++++++++++++++++++++++++++++++++++++++++++++++
 kernel/timer.c            |   29 +++++
 kernel/workqueue.c        |    8 +
 lib/Kconfig.debug         |   11 ++
 8 files changed, 391 insertions(+), 5 deletions(-)

Index: linux-2.6.18-mm2/include/linux/hrtimer.h
===================================================================
--- linux-2.6.18-mm2.orig/include/linux/hrtimer.h	2006-09-30 01:41:18.000000000 +0200
+++ linux-2.6.18-mm2/include/linux/hrtimer.h	2006-09-30 01:41:20.000000000 +0200
@@ -74,6 +74,11 @@ struct hrtimer {
 	int				cb_mode;
 	struct list_head		cb_entry;
 #endif
+#ifdef CONFIG_TIMER_STATS
+	void				*start_site;
+	char				start_comm[16];
+	int				start_pid;
+#endif
 };
 
 /**
@@ -258,4 +263,43 @@ static inline int hrtimer_stop_sched_tic
 /* Bootup initialization: */
 extern void __init hrtimers_init(void);
 
+/*
+ * Timer-statistics info:
+ */
+#ifdef CONFIG_TIMER_STATS
+
+extern void tstats_update_stats(void *timer, pid_t pid, void *startf,
+				void *timerf, char * comm);
+
+static inline void tstats_account_hrtimer(struct hrtimer *timer)
+{
+	tstats_update_stats(timer, timer->start_pid, timer->start_site,
+			    timer->function, timer->start_comm);
+}
+
+extern void __tstats_hrtimer_set_start_info(struct hrtimer *timer, void *addr);
+
+static inline void tstats_hrtimer_set_start_info(struct hrtimer *timer)
+{
+	__tstats_hrtimer_set_start_info(timer, __builtin_return_address(0));
+}
+
+static inline void tstats_hrtimer_clear_start_info(struct hrtimer *timer)
+{
+	timer->start_site = NULL;
+}
+#else
+static inline void tstats_account_hrtimer(struct hrtimer *timer)
+{
+}
+
+static inline void tstats_hrtimer_set_start_info(struct hrtimer *timer)
+{
+}
+
+static inline void tstats_hrtimer_clear_start_info(struct hrtimer *timer)
+{
+}
+#endif
+
 #endif
Index: linux-2.6.18-mm2/include/linux/timer.h
===================================================================
--- linux-2.6.18-mm2.orig/include/linux/timer.h	2006-09-30 01:41:19.000000000 +0200
+++ linux-2.6.18-mm2/include/linux/timer.h	2006-09-30 01:41:20.000000000 +0200
@@ -2,6 +2,7 @@
 #define _LINUX_TIMER_H
 
 #include <linux/list.h>
+#include <linux/ktime.h>
 #include <linux/spinlock.h>
 #include <linux/stddef.h>
 
@@ -15,6 +16,11 @@ struct timer_list {
 	unsigned long data;
 
 	struct tvec_t_base_s *base;
+#ifdef CONFIG_TIMER_STATS
+	void *start_site;
+	char start_comm[16];
+	int start_pid;
+#endif
 };
 
 extern struct tvec_t_base_s boot_tvec_bases;
@@ -74,6 +80,48 @@ extern unsigned long next_timer_interrup
  */
 extern unsigned long get_next_timer_interrupt(unsigned long now);
 
+/*
+ * Timer-statistics info:
+ */
+#ifdef CONFIG_TIMER_STATS
+
+extern void tstats_update_stats(void *timer, pid_t pid, void *startf,
+				void *timerf, char * comm);
+
+static inline void tstats_account_timer(struct timer_list *timer)
+{
+	tstats_update_stats(timer, timer->start_pid, timer->start_site,
+			    timer->function, timer->start_comm);
+}
+
+extern void __tstats_timer_set_start_info(struct timer_list *timer, void *addr);
+
+static inline void tstats_timer_set_start_info(struct timer_list *timer)
+{
+	__tstats_timer_set_start_info(timer, __builtin_return_address(0));
+}
+
+static inline void tstats_timer_clear_start_info(struct timer_list *timer)
+{
+	timer->start_site = NULL;
+}
+#else
+static inline void tstats_account_timer(struct timer_list *timer)
+{
+}
+
+static inline void tstats_timer_set_start_info(struct timer_list *timer)
+{
+}
+
+static inline void tstats_timer_clear_start_info(struct timer_list *timer)
+{
+}
+#endif
+
+extern void delayed_work_timer_fn(unsigned long __data);
+
+
 /***
  * add_timer - start a timer
  * @timer: the timer to be added
Index: linux-2.6.18-mm2/kernel/hrtimer.c
===================================================================
--- linux-2.6.18-mm2.orig/kernel/hrtimer.c	2006-09-30 01:41:18.000000000 +0200
+++ linux-2.6.18-mm2/kernel/hrtimer.c	2006-09-30 01:41:20.000000000 +0200
@@ -814,6 +814,18 @@ static inline void hrtimer_resume_jiffie
 
 #endif /* CONFIG_HIGH_RES_TIMERS */
 
+#ifdef CONFIG_TIMER_STATS
+void __tstats_hrtimer_set_start_info(struct hrtimer *timer, void *addr)
+{
+	if (timer->start_site)
+		return;
+
+	timer->start_site = addr;
+	memcpy(timer->start_comm, current->comm, TASK_COMM_LEN);
+	timer->start_pid = current->pid;
+}
+#endif
+
 /*
  * Timekeeping resumed notification
  */
@@ -959,6 +971,7 @@ remove_hrtimer(struct hrtimer *timer, st
 		 * Remove the timer and force reprogramming when high
 		 * resolution mode is active
 		 */
+		tstats_hrtimer_clear_start_info(timer);
 		__remove_hrtimer(timer, base, HRTIMER_INACTIVE, 1);
 		return 1;
 	}
@@ -1005,6 +1018,8 @@ hrtimer_start(struct hrtimer *timer, kti
 	}
 	timer->expires = tim;
 
+	tstats_hrtimer_set_start_info(timer);
+
 	enqueue_hrtimer(timer, new_base);
 
 	unlock_hrtimer_base(timer, &flags);
@@ -1146,6 +1161,12 @@ void hrtimer_init(struct hrtimer *timer,
 
 	timer->base = &cpu_base->clock_base[clock_id];
 	hrtimer_init_timer_hres(timer);
+
+#ifdef CONFIG_TIMER_STATS
+	timer->start_site = NULL;
+	timer->start_pid = -1;
+	memset(timer->start_comm, 0, TASK_COMM_LEN);
+#endif
 }
 EXPORT_SYMBOL_GPL(hrtimer_init);
 
@@ -1229,6 +1250,7 @@ void hrtimer_interrupt(struct pt_regs *r
 			}
 
 			__remove_hrtimer(timer, base, HRTIMER_CALLBACK, 0);
+			tstats_account_hrtimer(timer);
 
 			if (timer->function(timer) != HRTIMER_NORESTART) {
 				BUG_ON(timer->state != HRTIMER_CALLBACK);
@@ -1277,6 +1299,8 @@ static void run_hrtimer_softirq(struct s
 		timer = list_entry(cpu_base->cb_pending.next,
 				   struct hrtimer, cb_entry);
 
+		tstats_account_hrtimer(timer);
+
 		fn = timer->function;
 		__remove_hrtimer(timer, timer->base, HRTIMER_CALLBACK, 0);
 		spin_unlock_irq(&cpu_base->lock);
@@ -1326,6 +1350,8 @@ static inline void run_hrtimer_queue(str
 		if (base->softirq_time.tv64 <= timer->expires.tv64)
 			break;
 
+		tstats_account_hrtimer(timer);
+
 		fn = timer->function;
 		__remove_hrtimer(timer, base, HRTIMER_CALLBACK, 0);
 		spin_unlock_irq(&cpu_base->lock);
Index: linux-2.6.18-mm2/kernel/time/Makefile
===================================================================
--- linux-2.6.18-mm2.orig/kernel/time/Makefile	2006-09-30 01:41:17.000000000 +0200
+++ linux-2.6.18-mm2/kernel/time/Makefile	2006-09-30 01:41:20.000000000 +0200
@@ -1,3 +1,4 @@
 obj-y += ntp.o clocksource.o jiffies.o
 
-obj-$(CONFIG_GENERIC_TIME) += clockevents.o
+obj-$(CONFIG_GENERIC_TIME)	+= clockevents.o
+obj-$(CONFIG_TIMER_STATS)	+= timer_stats.o
Index: linux-2.6.18-mm2/kernel/time/timer_stats.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.18-mm2/kernel/time/timer_stats.c	2006-09-30 01:41:20.000000000 +0200
@@ -0,0 +1,227 @@
+/*
+ * kernel/time/timer_stats.c
+ *
+ * Copyright(C) 2006, Red Hat, Inc., Ingo Molnar
+ * Copyright(C) 2006, Thomas Gleixner <[email protected]>
+ *
+ * Based on: timer_top.c
+ *	Copyright (C) 2005 Instituto Nokia de Tecnologia - INdT - Manaus
+ *	Written by Daniel Petrini <[email protected]>
+ *
+ * Collect timer usage statistics.
+ *
+ * We export the addresses and counting of timer functions being called,
+ * the pid and cmdline from the owner process if applicable.
+ *
+ * Start/stop data collection:
+ * # echo 1[0] >/proc/tstats
+ *
+ * Display the collected information:
+ * # cat /proc/tstats
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/list.h>
+#include <linux/proc_fs.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/sched.h>
+#include <linux/seq_file.h>
+#include <linux/kallsyms.h>
+
+#include <asm/uaccess.h>
+
+static DEFINE_SPINLOCK(tstats_lock);
+static int tstats_status;
+static ktime_t tstats_time;
+
+enum tstats_stat {
+	TSTATS_INACTIVE,
+	TSTATS_ACTIVE,
+	TSTATS_READOUT,
+	TSTATS_RESET,
+};
+
+struct tstats_entry {
+	void			*timer;
+	void			*start_func;
+	void			*expire_func;
+	unsigned long		counter;
+	pid_t			pid;
+	char			comm[TASK_COMM_LEN + 1];
+};
+
+#define TSTATS_MAX_ENTRIES	1024
+
+static struct tstats_entry tstats[TSTATS_MAX_ENTRIES];
+
+void tstats_update_stats(void *timer, pid_t pid, void *startf,
+			 void *timerf, char * comm)
+{
+	struct tstats_entry *entry = tstats;
+	unsigned long flags;
+	int i;
+
+	spin_lock_irqsave(&tstats_lock, flags);
+	if (tstats_status != TSTATS_ACTIVE)
+		goto out_unlock;
+
+	for (i = 0; i < TSTATS_MAX_ENTRIES; i++, entry++) {
+		if (entry->timer == timer &&
+		    entry->start_func == startf &&
+		    entry->expire_func == timerf &&
+		    entry->pid == pid) {
+
+			entry->counter++;
+			break;
+		}
+		if (!entry->timer) {
+			entry->timer = timer;
+			entry->start_func = startf;
+			entry->expire_func = timerf;
+			entry->counter = 1;
+			entry->pid = pid;
+			memcpy(entry->comm, comm, TASK_COMM_LEN);
+			entry->comm[TASK_COMM_LEN] = 0;
+			break;
+		}
+	}
+
+ out_unlock:
+	spin_unlock_irqrestore(&tstats_lock, flags);
+}
+
+static void tstats_reset(void)
+{
+	memset(tstats, 0, sizeof(tstats));
+}
+
+static void print_name_offset(struct seq_file *m, unsigned long addr)
+{
+	char namebuf[KSYM_NAME_LEN+1];
+	unsigned long size, offset;
+	const char *sym_name;
+	char *modname;
+
+	sym_name = kallsyms_lookup(addr, &size, &offset, &modname, namebuf);
+	if (sym_name)
+		seq_printf(m, "%s", sym_name);
+	else
+		seq_printf(m, "<%p>", (void *)addr);
+}
+
+static int tstats_show(struct seq_file *m, void *v)
+{
+	struct tstats_entry *entry = tstats;
+	struct timespec period;
+	unsigned long ms;
+	long events = 0;
+	int i;
+
+	spin_lock_irq(&tstats_lock);
+	switch(tstats_status) {
+	case TSTATS_ACTIVE:
+		tstats_time = ktime_sub(ktime_get(), tstats_time);
+	case TSTATS_INACTIVE:
+		tstats_status = TSTATS_READOUT;
+		break;
+	default:
+		spin_unlock_irq(&tstats_lock);
+		return -EBUSY;
+	}
+	spin_unlock_irq(&tstats_lock);
+
+	period = ktime_to_timespec(tstats_time);
+	ms = period.tv_nsec % 1000000;
+
+	seq_printf(m, "Timerstats sample period: %ld.%3ld s\n",
+		   period.tv_sec, ms);
+
+	for (i = 0; i < TSTATS_MAX_ENTRIES && entry->timer; i++, entry++) {
+		seq_printf(m, "%4lu, %5d %-16s ", entry->counter, entry->pid,
+			   entry->comm);
+
+		print_name_offset(m, (unsigned long)entry->start_func);
+		seq_puts(m, " (");
+		print_name_offset(m, (unsigned long)entry->expire_func);
+		seq_puts(m, ")\n");
+		events += entry->counter;
+	}
+
+	ms += period.tv_sec * 1000;
+	if (events && period.tv_sec)
+		seq_printf(m, "%ld total events, %ld.%ld events/sec\n", events,
+			   events / period.tv_sec, events * 1000 / ms);
+	else
+		seq_printf(m, "%ld total events\n", events);
+
+	tstats_status = TSTATS_INACTIVE;
+	return 0;
+}
+
+static ssize_t tstats_write(struct file *file, const char __user *buf,
+			    size_t count, loff_t *offs)
+{
+	char ctl[2];
+
+	if (count != 2 || *offs)
+		return -EINVAL;
+
+	if (copy_from_user(ctl, buf, count))
+		return -EFAULT;
+
+	switch (ctl[0]) {
+	case '0':
+		spin_lock_irq(&tstats_lock);
+		if (tstats_status == TSTATS_ACTIVE) {
+			tstats_status = TSTATS_INACTIVE;
+			tstats_time = ktime_sub(ktime_get(), tstats_time);
+		}
+		spin_unlock_irq(&tstats_lock);
+		break;
+	case '1':
+		spin_lock_irq(&tstats_lock);
+		if (tstats_status == TSTATS_INACTIVE) {
+			tstats_status = TSTATS_RESET;
+			spin_unlock_irq(&tstats_lock);
+			tstats_reset();
+			tstats_time = ktime_get();
+			tstats_status = TSTATS_ACTIVE;
+		} else
+			spin_unlock_irq(&tstats_lock);
+		break;
+	default:
+		count = -EINVAL;
+	}
+
+	return count;
+}
+
+static int tstats_open(struct inode *inode, struct file *filp)
+{
+	return single_open(filp, tstats_show, NULL);
+}
+
+static struct file_operations tstats_fops = {
+	.open		= tstats_open,
+	.read		= seq_read,
+	.write		= tstats_write,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+
+static int __init init_tstats(void)
+{
+	struct proc_dir_entry *pe = create_proc_entry("tstats", 0666, NULL);
+
+	if (!pe)
+		return -ENOMEM;
+
+	pe->proc_fops = &tstats_fops;
+
+	return 0;
+}
+module_init(init_tstats);
Index: linux-2.6.18-mm2/kernel/timer.c
===================================================================
--- linux-2.6.18-mm2.orig/kernel/timer.c	2006-09-30 01:41:19.000000000 +0200
+++ linux-2.6.18-mm2/kernel/timer.c	2006-09-30 01:41:20.000000000 +0200
@@ -34,6 +34,7 @@
 #include <linux/cpu.h>
 #include <linux/syscalls.h>
 #include <linux/delay.h>
+#include <linux/kallsyms.h>
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -135,6 +136,18 @@ static void internal_add_timer(tvec_base
 	list_add_tail(&timer->entry, vec);
 }
 
+#ifdef CONFIG_TIMER_STATS
+void __tstats_timer_set_start_info(struct timer_list *timer, void *addr)
+{
+	if (timer->start_site)
+		return;
+
+	timer->start_site = addr;
+	memcpy(timer->start_comm, current->comm, TASK_COMM_LEN);
+	timer->start_pid = current->pid;
+}
+#endif
+
 /**
  * init_timer - initialize a timer.
  * @timer: the timer to be initialized
@@ -146,11 +159,16 @@ void fastcall init_timer(struct timer_li
 {
 	timer->entry.next = NULL;
 	timer->base = __raw_get_cpu_var(tvec_bases);
+#ifdef CONFIG_TIMER_STATS
+	timer->start_site = NULL;
+	timer->start_pid = -1;
+	memset(timer->start_comm, 0, TASK_COMM_LEN);
+#endif
 }
 EXPORT_SYMBOL(init_timer);
 
 static inline void detach_timer(struct timer_list *timer,
-					int clear_pending)
+				int clear_pending)
 {
 	struct list_head *entry = &timer->entry;
 
@@ -197,6 +215,7 @@ int __mod_timer(struct timer_list *timer
 	unsigned long flags;
 	int ret = 0;
 
+	tstats_timer_set_start_info(timer);
 	BUG_ON(!timer->function);
 
 	base = lock_timer_base(timer, &flags);
@@ -247,6 +266,7 @@ void add_timer_on(struct timer_list *tim
 	tvec_base_t *base = per_cpu(tvec_bases, cpu);
   	unsigned long flags;
 
+	tstats_timer_set_start_info(timer);
   	BUG_ON(timer_pending(timer) || !timer->function);
 	spin_lock_irqsave(&base->lock, flags);
 	timer->base = base;
@@ -279,6 +299,7 @@ int mod_timer(struct timer_list *timer, 
 {
 	BUG_ON(!timer->function);
 
+	tstats_timer_set_start_info(timer);
 	/*
 	 * This is a common optimization triggered by the
 	 * networking code - if the timer is re-modified
@@ -309,6 +330,7 @@ int del_timer(struct timer_list *timer)
 	unsigned long flags;
 	int ret = 0;
 
+	tstats_timer_clear_start_info(timer);
 	if (timer_pending(timer)) {
 		base = lock_timer_base(timer, &flags);
 		if (timer_pending(timer)) {
@@ -444,6 +466,8 @@ static inline void __run_timers(tvec_bas
  			fn = timer->function;
  			data = timer->data;
 
+			tstats_account_timer(timer);
+
 			set_running_timer(base, timer);
 			detach_timer(timer, 1);
 			spin_unlock_irq(&base->lock);
@@ -1114,7 +1138,8 @@ static void run_timer_softirq(struct sof
 {
 	tvec_base_t *base = __get_cpu_var(tvec_bases);
 
- 	hrtimer_run_queues();
+	hrtimer_run_queues();
+
 	if (time_after_eq(jiffies, base->timer_jiffies))
 		__run_timers(base);
 }
Index: linux-2.6.18-mm2/kernel/workqueue.c
===================================================================
--- linux-2.6.18-mm2.orig/kernel/workqueue.c	2006-09-30 01:41:09.000000000 +0200
+++ linux-2.6.18-mm2/kernel/workqueue.c	2006-09-30 01:41:20.000000000 +0200
@@ -119,12 +119,14 @@ int fastcall queue_work(struct workqueue
 }
 EXPORT_SYMBOL_GPL(queue_work);
 
-static void delayed_work_timer_fn(unsigned long __data)
+void delayed_work_timer_fn(unsigned long __data)
 {
 	struct work_struct *work = (struct work_struct *)__data;
 	struct workqueue_struct *wq = work->wq_data;
 	int cpu = smp_processor_id();
+	struct list_head *head;
 
+	head = &per_cpu_ptr(wq->cpu_wq, cpu)->more_work.task_list;
 	if (unlikely(is_single_threaded(wq)))
 		cpu = singlethread_cpu;
 
@@ -140,11 +142,12 @@ static void delayed_work_timer_fn(unsign
  * Returns non-zero if it was successfully added.
  */
 int fastcall queue_delayed_work(struct workqueue_struct *wq,
-			struct work_struct *work, unsigned long delay)
+				struct work_struct *work, unsigned long delay)
 {
 	int ret = 0;
 	struct timer_list *timer = &work->timer;
 
+	tstats_timer_set_start_info(&work->timer);
 	if (!test_and_set_bit(0, &work->pending)) {
 		BUG_ON(timer_pending(timer));
 		BUG_ON(!list_empty(&work->entry));
@@ -469,6 +472,7 @@ EXPORT_SYMBOL(schedule_work);
  */
 int fastcall schedule_delayed_work(struct work_struct *work, unsigned long delay)
 {
+	tstats_timer_set_start_info(&work->timer);
 	return queue_delayed_work(keventd_wq, work, delay);
 }
 EXPORT_SYMBOL(schedule_delayed_work);
Index: linux-2.6.18-mm2/lib/Kconfig.debug
===================================================================
--- linux-2.6.18-mm2.orig/lib/Kconfig.debug	2006-09-30 01:41:09.000000000 +0200
+++ linux-2.6.18-mm2/lib/Kconfig.debug	2006-09-30 01:41:20.000000000 +0200
@@ -109,6 +109,17 @@ config SCHEDSTATS
 	  application, you can say N to avoid the very slight overhead
 	  this adds.
 
+config TIMER_STATS
+	bool "Collect kernel timers statistics"
+	depends on DEBUG_KERNEL && PROC_FS
+	help
+	  If you say Y here, additional code will be inserted into the
+	  timer routines to collect statistics about kernel timers being
+	  reprogrammed. The statistics can be read from /proc/tstats.
+	  The statistics collection is started by writing 1 to /proc/tstats,
+	  writing 0 stops it. This feature is useful to collect information
+	  about timer usage patterns in kernel and userspace.
+
 config DEBUG_SLAB
 	bool "Debug slab memory allocations"
 	depends on DEBUG_KERNEL && SLAB

--

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Stuff]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]     [Linux Resources]
  Powered by Linux