[RFC] [Patch 4/4] timer_stats slimmed down: using statistics infrastucture

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This patch implements timer_stats based on the statistics
infrastructure.

Here is some sample output, which even somewhat
resembles /proc/timer_stats.
(>80 char lines because of symbol names, sorry for line breaks).


It reads:
<statistic name> <basket> <hits> <label>

with label being:
<pid> <task name> <start_fn>(<stop_fn>)


[[email protected] timer_stats]# cat data
expired missed 0x0
expired - 5381   1019 multipathd       do_nanosleep (hrtimer_wakeup)
expired - 2707      1 swapper          queue_delayed_work_on
(delayed_work_timer_fn)
expired - 2707      1 swapper          queue_delayed_work_on
(delayed_work_timer_fn)
expired - 2707      1 swapper          queue_delayed_work_on
(delayed_work_timer_fn)
expired - 2707      1 swapper          queue_delayed_work_on
(delayed_work_timer_fn)
expired - 1353      1 swapper          neigh_table_init_no_netlink
(neigh_periodic_timer)
expired - 1083      0 swapper          page_writeback_init (wb_timer_fn)
expired - 1083   1021 multipathd       schedule_timeout
(process_timeout)
expired - 1081      1 init             schedule_timeout
(process_timeout)
expired - 677       1 swapper          neigh_table_init_no_netlink
(neigh_periodic_timer)
expired - 278       0 swapper          sk_reset_timer (tcp_delack_timer)
expired - 193       1 swapper          nfsd_export_init
(delayed_work_timer_fn)
expired - 181     975 syslogd          do_setitimer (it_real_fn)
expired - 104    1296 sshd             sk_reset_timer (tcp_write_timer)
expired - 90        1 swapper          ip_rt_init (rt_check_expire)
expired - 90     1244 crond            do_nanosleep (hrtimer_wakeup)
expired - 54      896 ip               __netdev_watchdog_up
(dev_watchdog)
expired - 44        1 swapper          inet_initpeers
(peer_check_expire)
expired - 44        0 swapper          addrconf_verify (addrconf_verify)
.
.
.
expired - 1      1368 sadc             schedule_timeout
(process_timeout)
expired - 1      1370 sadc             blk_plug_device
(blk_unplug_timeout)
expired - 1      1370 sadc             schedule_timeout
(process_timeout)


# cat /debug/timer_stats/definition
name=expired state=on units=timer/occurrence type=sparse entries=1024
data=[6920660.417313] started=[6920660.417313] stopped=[    0.000000]


Signed-off-by: Martin Peschke <[email protected]>
---

 time/timer_stats.c |  385 ++++++++---------------------------------------------
 timer.c            |    2
 2 files changed, 59 insertions(+), 328 deletions(-)

Index: linux/kernel/time/timer_stats.c
===================================================================
--- linux.orig/kernel/time/timer_stats.c
+++ linux/kernel/time/timer_stats.c
@@ -26,197 +26,95 @@
  * the pid and cmdline from the owner process if applicable.
  *
  * Start/stop data collection:
- * # echo 1[0] >/proc/timer_stats
+ * # echo state=on[off] > /debug/timer_stats/definition
  *
  * Display the information collected so far:
- * # cat /proc/timer_stats
+ * # cat /debug/timer_stats/data
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
 
-#include <linux/proc_fs.h>
 #include <linux/module.h>
-#include <linux/spinlock.h>
 #include <linux/sched.h>
 #include <linux/seq_file.h>
 #include <linux/kallsyms.h>
-
-#include <asm/uaccess.h>
+#include <linux/statistic.h>
 
 /*
  * This is our basic unit of interest: a timer expiry event identified
  * by the timer, its start/expire functions and the PID of the task that
  * started the timer. We count the number of times an event happens:
  */
-struct entry {
-	/*
-	 * Hash list:
-	 */
-	struct entry		*next;
-
-	/*
-	 * Hash keys:
-	 */
-	void			*timer;
-	void			*start_func;
-	void			*expire_func;
-	pid_t			pid;
-
-	/*
-	 * Number of timeout events:
-	 */
-	unsigned long		count;
-
+struct tstat_key {
+	void	*timer;
+	void	*start_func;
+	void	*expire_func;
+	pid_t	pid;
 	/*
 	 * We save the command-line string to preserve
 	 * this information past task exit:
 	 */
-	char			comm[TASK_COMM_LEN + 1];
-
-} ____cacheline_aligned_in_smp;
-
-/*
- * Spinlock protecting the tables - not taken during lookup:
- */
-static DEFINE_SPINLOCK(table_lock);
-
-/*
- * Per-CPU lookup locks for fast hash lookup:
- */
-static DEFINE_PER_CPU(spinlock_t, lookup_lock);
-
-/*
- * Mutex to serialize state changes with show-stats activities:
- */
-static DEFINE_MUTEX(show_mutex);
-
-/*
- * Collection status, active/inactive:
- */
-static int __read_mostly active;
-
-/*
- * Beginning/end timestamps of measurement:
- */
-static ktime_t time_start, time_stop;
-
-/*
- * tstat entry structs only get allocated while collection is
- * active and never freed during that time - this simplifies
- * things quite a bit.
- *
- * They get freed when a new collection period is started.
- */
-#define MAX_ENTRIES_BITS	10
-#define MAX_ENTRIES		(1UL << MAX_ENTRIES_BITS)
-
-static unsigned long nr_entries;
-static struct entry entries[MAX_ENTRIES];
-
-static atomic_t overflow_count;
-
-static void reset_entries(void)
-{
-	nr_entries = 0;
-	memset(entries, 0, sizeof(entries));
-	atomic_set(&overflow_count, 0);
-}
+	char	comm[TASK_COMM_LEN + 1];
+};
 
-static struct entry *alloc_entry(void)
+static void tstat_print_symbol(struct seq_file *m, unsigned long addr)
 {
-	if (nr_entries >= MAX_ENTRIES)
-		return NULL;
-
-	return entries + nr_entries++;
-}
-
-/*
- * The entries are in a hash-table, for fast lookup:
- */
-#define TSTAT_HASH_BITS		(MAX_ENTRIES_BITS - 1)
-#define TSTAT_HASH_SIZE		(1UL << TSTAT_HASH_BITS)
-#define TSTAT_HASH_MASK		(TSTAT_HASH_SIZE - 1)
-
-#define __tstat_hashfn(entry)						\
-	(((unsigned long)(entry)->timer       ^				\
-	  (unsigned long)(entry)->start_func  ^				\
-	  (unsigned long)(entry)->expire_func ^				\
-	  (unsigned long)(entry)->pid		) & TSTAT_HASH_MASK)
-
-#define tstat_hashentry(entry)	(tstat_hash_table + __tstat_hashfn(entry))
-
-static struct entry *tstat_hash_table[TSTAT_HASH_SIZE] __read_mostly;
+	char symname[KSYM_NAME_LEN+1];
 
-static int match_entries(struct entry *entry1, struct entry *entry2)
-{
-	return entry1->timer       == entry2->timer	  &&
-	       entry1->start_func  == entry2->start_func  &&
-	       entry1->expire_func == entry2->expire_func &&
-	       entry1->pid	   == entry2->pid;
+	if (lookup_symbol_name(addr, symname) < 0)
+		seq_printf(m, "<%p>", (void *)addr);
+	else
+		seq_printf(m, "%s", symname);
 }
 
-/*
- * Look up whether an entry matching this item is present
- * in the hash already. Must be called with irqs off and the
- * lookup lock held:
- */
-static struct entry *tstat_lookup(struct entry *entry, char *comm)
+static void tstat_label(struct statistic_interface *interface, int i,
+			void *_key, struct seq_file *m)
 {
-	struct entry **head, *curr, *prev;
+	struct tstat_key *key = _key;
 
-	head = tstat_hashentry(entry);
-	curr = *head;
+	seq_printf(m, "\t%5d %-16s ", key->pid, key->comm);
+	tstat_print_symbol(m, (unsigned long)key->start_func);
+	seq_puts(m, " (");
+	tstat_print_symbol(m, (unsigned long)key->expire_func);
+	seq_puts(m, ")");
+}
 
-	/*
-	 * The fastpath is when the entry is already hashed,
-	 * we do this with the lookup lock held, but with the
-	 * table lock not held:
-	 */
-	while (curr) {
-		if (match_entries(curr, entry))
-			return curr;
+enum tstat_enum {
+	TSTAT_EXPIRED,
+	_TSTAT_NUMBER,
+};
 
-		curr = curr->next;
+static struct statistic_info tstat_info[] = {
+	[TSTAT_EXPIRED] = {
+		.name	  = "expired",
+		.x_unit	  = "timer",
+		.y_unit	  = "occurrence",
+		.defaults = "type=sparse entries=1024",
+		.flags	  = STATISTIC_FLAGS_KEY,
+		.key_size = sizeof(struct tstat_key)
 	}
-	/*
-	 * Slowpath: allocate, set up and link a new hash entry:
-	 */
-	prev = NULL;
-	curr = *head;
-
-	spin_lock(&table_lock);
-	/*
-	 * Make sure we have not raced with another CPU:
-	 */
-	while (curr) {
-		if (match_entries(curr, entry))
-			goto out_unlock;
+};
 
-		prev = curr;
-		curr = curr->next;
-	}
+struct statistic tstat_stat[_TSTAT_NUMBER];
 
-	curr = alloc_entry();
-	if (curr) {
-		*curr = *entry;
-		curr->count = 0;
-		memcpy(curr->comm, comm, TASK_COMM_LEN);
-		if (prev)
-			prev->next = curr;
-		else
-			*head = curr;
-		curr->next = NULL;
-	}
- out_unlock:
-	spin_unlock(&table_lock);
+struct statistic_interface tstat_interface = {
+	.stat	= tstat_stat,
+	.info	= tstat_info,
+	.number	= _TSTAT_NUMBER,
+	.label	= tstat_label,
+};
 
-	return curr;
+static int __init tstat_init(void)
+{
+	statistic_create(&tstat_interface, "timer_stats");
+	return 0;
 }
+__initcall(tstat_init);
 
 /**
- * timer_stats_update_stats - Update the statistics for a timer.
+ * tstat_inc - Update the statistics for a timer.
  * @timer:	pointer to either a timer_list or a hrtimer
  * @pid:	the pid of the task which set up the timer
  * @startf:	pointer to the function which did the timer setup
@@ -229,179 +127,14 @@ static struct entry *tstat_lookup(struct
 void timer_stats_update_stats(void *timer, pid_t pid, void *startf,
 			      void *timerf, char * comm)
 {
-	/*
-	 * It doesnt matter which lock we take:
-	 */
-	spinlock_t *lock = &per_cpu(lookup_lock, raw_smp_processor_id());
-	struct entry *entry, input;
-	unsigned long flags;
-
-	input.timer = timer;
-	input.start_func = startf;
-	input.expire_func = timerf;
-	input.pid = pid;
-
-	spin_lock_irqsave(lock, flags);
-	if (!active)
-		goto out_unlock;
-
-	entry = tstat_lookup(&input, comm);
-	if (likely(entry))
-		entry->count++;
-	else
-		atomic_inc(&overflow_count);
-
- out_unlock:
-	spin_unlock_irqrestore(lock, flags);
-}
-
-static void print_name_offset(struct seq_file *m, unsigned long addr)
-{
-	char symname[KSYM_NAME_LEN+1];
-
-	if (lookup_symbol_name(addr, symname) < 0)
-		seq_printf(m, "<%p>", (void *)addr);
-	else
-		seq_printf(m, "%s", symname);
-}
-
-static int tstats_show(struct seq_file *m, void *v)
-{
-	struct timespec period;
-	struct entry *entry;
-	unsigned long ms;
-	long events = 0;
-	ktime_t time;
-	int i;
-
-	mutex_lock(&show_mutex);
-	/*
-	 * If still active then calculate up to now:
-	 */
-	if (active)
-		time_stop = ktime_get();
-
-	time = ktime_sub(time_stop, time_start);
-
-	period = ktime_to_timespec(time);
-	ms = period.tv_nsec / 1000000;
-
-	seq_puts(m, "Timer Stats Version: v0.1\n");
-	seq_printf(m, "Sample period: %ld.%03ld s\n", period.tv_sec, ms);
-	if (atomic_read(&overflow_count))
-		seq_printf(m, "Overflow: %d entries\n",
-			atomic_read(&overflow_count));
-
-	for (i = 0; i < nr_entries; i++) {
-		entry = entries + i;
-		seq_printf(m, "%4lu, %5d %-16s ",
-				entry->count, entry->pid, entry->comm);
-
-		print_name_offset(m, (unsigned long)entry->start_func);
-		seq_puts(m, " (");
-		print_name_offset(m, (unsigned long)entry->expire_func);
-		seq_puts(m, ")\n");
-
-		events += entry->count;
-	}
-
-	ms += period.tv_sec * 1000;
-	if (!ms)
-		ms = 1;
-
-	if (events && period.tv_sec)
-		seq_printf(m, "%ld total events, %ld.%ld events/sec\n", events,
-			   events / period.tv_sec, events * 1000 / ms);
-	else
-		seq_printf(m, "%ld total events\n", events);
-
-	mutex_unlock(&show_mutex);
-
-	return 0;
-}
-
-/*
- * After a state change, make sure all concurrent lookup/update
- * activities have stopped:
- */
-static void sync_access(void)
-{
-	unsigned long flags;
-	int cpu;
-
-	for_each_online_cpu(cpu) {
-		spin_lock_irqsave(&per_cpu(lookup_lock, cpu), flags);
-		/* nothing */
-		spin_unlock_irqrestore(&per_cpu(lookup_lock, cpu), flags);
-	}
-}
-
-static ssize_t tstats_write(struct file *file, const char __user *buf,
-			    size_t count, loff_t *offs)
-{
-	char ctl[2];
+	struct tstat_key key;
 
-	if (count != 2 || *offs)
-		return -EINVAL;
+	memset(&key, 0, sizeof(struct tstat_key)); /* FIXME: get rid of it? */
+	key.timer = timer;
+	key.start_func = startf;
+	key.expire_func = timerf;
+	key.pid = pid;
+	memcpy(key.comm, comm, TASK_COMM_LEN);
 
-	if (copy_from_user(ctl, buf, count))
-		return -EFAULT;
-
-	mutex_lock(&show_mutex);
-	switch (ctl[0]) {
-	case '0':
-		if (active) {
-			active = 0;
-			time_stop = ktime_get();
-			sync_access();
-		}
-		break;
-	case '1':
-		if (!active) {
-			reset_entries();
-			time_start = ktime_get();
-			active = 1;
-		}
-		break;
-	default:
-		count = -EINVAL;
-	}
-	mutex_unlock(&show_mutex);
-
-	return count;
-}
-
-static int tstats_open(struct inode *inode, struct file *filp)
-{
-	return single_open(filp, tstats_show, NULL);
-}
-
-static struct file_operations tstats_fops = {
-	.open		= tstats_open,
-	.read		= seq_read,
-	.write		= tstats_write,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
-};
-
-void __init init_timer_stats(void)
-{
-	int cpu;
-
-	for_each_possible_cpu(cpu)
-		spin_lock_init(&per_cpu(lookup_lock, cpu));
-}
-
-static int __init init_tstats_procfs(void)
-{
-	struct proc_dir_entry *pe;
-
-	pe = create_proc_entry("timer_stats", 0644, NULL);
-	if (!pe)
-		return -ENOMEM;
-
-	pe->proc_fops = &tstats_fops;
-
-	return 0;
+	statistic_inc_as_key(STAT_SPARSE, tstat_stat, TSTAT_EXPIRED, &key);
 }
-__initcall(init_tstats_procfs);
Index: linux/kernel/timer.c
===================================================================
--- linux.orig/kernel/timer.c
+++ linux/kernel/timer.c
@@ -1315,8 +1315,6 @@ void __init init_timers(void)
 	int err = timer_cpu_notify(&timers_nb, (unsigned long)CPU_UP_PREPARE,
 				(void *)(long)smp_processor_id());
 
-	init_timer_stats();
-
 	BUG_ON(err == NOTIFY_BAD);
 	register_cpu_notifier(&timers_nb);
 	open_softirq(TIMER_SOFTIRQ, run_timer_softirq, NULL);


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Stuff]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]     [Linux Resources]
  Powered by Linux