Re: [PATCH] Documentation update sched-stat.txt

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri, Jul 20, 2007 at 09:56:03AM +0200, Joachim Deguara wrote:
> On Friday 20 July 2007 09:25:22 Nick Piggin wrote:
> > On Wed, Jul 18, 2007 at 11:11:30AM +0200, Joachim Deguara wrote:
> > > While learning about schedstats I found that the documentation in the
> > > tree is old.  I updated it and found some interesting stuff like
> > > schedstats version 14 is the same as version and version 13 never saw a
> > > kernel release!  Also there are 6 fields in the current schedstats that
> > > are not used anymore.  Nick had made them irrelevant in commit
> > > 476d139c218e44e045e4bc6d4cc02b010b343939 but never removed them.
> > >
> > > Thanks to Rick's perl script who I borrowed some of the updated
> > > descriptions from.
> >
> > Ah, thanks, I actually didn't realise there was such good documentation
> > there. Patch looks good.
> >
> > BTW. I have a simple program to do a basic statistical summary of the
> > multiprocessor balancing if you are interested and haven't seen it.
> 
> Yes I am interested.  Actually I started down this road looking to find out if 
> task migration could be tracked and I saw that got kicked out from early 
> versions.

What do you mean by that? You mean if you can check information on the
migration events that a particular task has experienced?


> Your script could come in useful to link to in the documentation.  Rick has a 
> great page but hasn't been updated in a little while (though still up-to-date 
> as version 12==14) and his email bounced (though just a config error).

Here it is, its a bit ugly and I think it may still have a bug somewhere,
but I haven't looked at it for a while.

---
#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>

#define SCHEDSTAT_VERSION 14
struct rq_stats {
	/* sys_sched_yield stats */
	unsigned long long yld_both_empty;
	unsigned long long yld_act_empty;
	unsigned long long yld_exp_empty;
	unsigned long long yld_cnt;
										
	/* schedule stats */
	unsigned long long sched_active; //new
	unsigned long long sched_switch;
	unsigned long long sched_cnt;
	unsigned long long sched_idle;

	/* wake stats */
	unsigned long long ttwu_cnt;
	unsigned long long ttwu_local;

	/* latency stats */
	unsigned long long cpu_time;
	unsigned long long delay_time;
	unsigned long long pcnt;
};

struct domain_stats {
	unsigned long long lb_cnt[3];
	unsigned long long lb_balanced[3];
	unsigned long long lb_failed[3];
	unsigned long long lb_pulled[3];
	unsigned long long lb_hot_pulled[3];
	unsigned long long lb_imbalance[3];
	unsigned long long lb_nobusyq[3];
	unsigned long long lb_nobusyg[3];
										
	/* Active load balancing */
	unsigned long long alb_cnt;
	unsigned long long alb_failed;
	unsigned long long alb_pushed;
	
	/* Wake ups */
	unsigned long long ttwu_wake_remote;

	/* Passive load balancing */
	unsigned long long ttwu_move_balance;
										
	/* Affine wakeups */
	unsigned long long ttwu_move_affine;
										
	/* SD_BALANCE_EXEC */
	unsigned long long sbe_cnt;
	unsigned long long sbe_balanced;
	unsigned long long sbe_pushed;

	/* SD_BALANCE_FORK */
	unsigned long long sbf_cnt;
	unsigned long long sbf_balanced;
	unsigned long long sbf_pushed;
};

enum idle_type {
	IDLE,
	NOT_IDLE,
	NEWLY_IDLE,
};

#define MAXDOMAINS	4
#define MAXCPUS		32
#define HZ		100UL

static void parse_file(FILE *file, unsigned long long *ts,
		int *cpus, int *domains,
		struct rq_stats rq_stats[MAXCPUS],
		struct domain_stats domain_stats[MAXCPUS][MAXDOMAINS])
{
	int i, j;
	int ret, cpu, domain;
	int tmp;

	*domains = -1;

	ret = fscanf(file, "version %d\n", &tmp);
	if (ret == 0 || ret == EOF) {
		fprintf(stderr, "file format error 0\n");
		exit(1);
	}
	if (tmp != SCHEDSTAT_VERSION) {
		fprintf(stderr, "wrong file format version\n");
		exit(1);
	}

	ret = fscanf(file, "timestamp %llu\n", ts);
	if (ret == 0 || ret == EOF) {
		fprintf(stderr, "file format error 1\n");
		exit(1);
	}

	for (i = 0; i < MAXCPUS; i++) {
		struct rq_stats *rs = &rq_stats[i];

		ret = fscanf(file, "cpu%d ", &cpu);
		if (ret == EOF)
			break;
		if (ret == 0 || cpu != i) {
			fprintf(stderr, "file format error 2\n");
			exit(1);
		}

		ret = fscanf(file, "%llu %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu",
			&rs->yld_both_empty, &rs->yld_act_empty,
			&rs->yld_exp_empty, &rs->yld_cnt,
			&rs->sched_switch, &rs->sched_cnt, &rs->sched_idle,
			&rs->ttwu_cnt, &rs->ttwu_local,
			&rs->cpu_time, &rs->delay_time, &rs->pcnt);
		if (ret == 0 || ret == EOF) {
			fprintf(stderr, "file format error 3\n");
			exit(1);
		}
		
		for (j = 0; j < MAXDOMAINS; j++) {
			int k;
			struct domain_stats *ds = &domain_stats[i][j];
			
			/* We discard the domain's cpumask for now */
			ret = fscanf(file, " domain%d %*s", &domain);
			if (ret == 0 || ret == EOF)
				break;
			if (domain != j) {
				fprintf(stderr, "file format error 4\n");
				exit(1);
			}

			for (k = 0; k < 3; k++) {
				ret = fscanf(file, "%llu %llu %llu %llu %llu %llu %llu %llu",
					&ds->lb_cnt[k], &ds->lb_balanced[k],
					&ds->lb_failed[k], &ds->lb_imbalance[k],
					&ds->lb_pulled[k], &ds->lb_hot_pulled[k],
					&ds->lb_nobusyq[k], &ds->lb_nobusyg[k]);
				if (ret == 0 || ret == EOF) {
					fprintf(stderr, "file format error 5\n");
					exit(1);
				}
			}

			ret = fscanf(file, " %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu",
				&ds->alb_cnt, &ds->alb_failed, &ds->alb_pushed,
				&ds->sbe_cnt, &ds->sbe_balanced, &ds->sbe_pushed,
				&ds->sbf_cnt, &ds->sbf_balanced, &ds->sbf_pushed,
				&ds->ttwu_wake_remote, &ds->ttwu_move_affine,
				&ds->ttwu_move_balance);
			if (ret == 0 || ret == EOF) {
				fprintf(stderr, "file format error 6\n");
				exit(1);
			}
		}

		if (*domains != -1 && *domains != j) {
			fprintf(stderr, "domains mismatch within file\n");
			exit(1);
		}
		*domains = j;

		ret = fscanf(file, "\n");
		if (ret == EOF)
			break;
	}
	
	*cpus = i;
}

static void find_stats_delta(struct rq_stats rq_pre[MAXCPUS],
			struct rq_stats rq_post[MAXCPUS],
			struct rq_stats *rq_delta,
			struct domain_stats domain_pre[MAXCPUS][MAXDOMAINS],
			struct domain_stats domain_post[MAXCPUS][MAXDOMAINS],
			struct domain_stats domain_delta[MAXDOMAINS])
{
	int i;

	memset(rq_delta, 0, sizeof(struct rq_stats));
	memset(domain_delta, 0, sizeof(struct domain_stats)*MAXDOMAINS);
	
	for (i = 0; i < MAXCPUS; i++) {
		unsigned int j, k;

		/* No problem because they're all unsigned long long */
		for (j = 0; j < sizeof(struct rq_stats)/sizeof(unsigned long long); j++) {
			*((unsigned long long *)rq_delta + j) +=
			*((unsigned long long *)&rq_post[i] + j) -
			*((unsigned long long *)&rq_pre[i] + j);
		}
		
		for (j = 0; j < MAXDOMAINS; j++) {
			for (k = 0; k < sizeof(struct domain_stats)/sizeof(unsigned long long); k++) {
				*((unsigned long long *)&domain_delta[j] + k) +=
				*((unsigned long long *)&domain_post[i][j] + k) -
				*((unsigned long long *)&domain_pre[i][j] + k);
			}
		}
	}
}

static void show_stats(unsigned long long time_delta, int cpus, int domains,
		struct rq_stats *rq_stats,
		struct domain_stats domain_stats[MAXDOMAINS])
{
	unsigned long long ttwu_remote;
	double s, tmp;
	int i;

	/* Ensures we don't get 0 time delta */
	s = ((double)0.5 + time_delta) / HZ;
	printf("sample period: %.3fs\n", s);
	
	/* TODO add the runqueue stats */
	tmp = (double)rq_stats->sched_cnt / s;
	printf("%.3f calls to schedule / s\n", tmp);

	tmp = (double)rq_stats->cpu_time / rq_stats->pcnt;
	printf("%.3fms average timeslice\n", tmp);

	tmp = (double)rq_stats->delay_time / rq_stats->pcnt;
	printf("%.3fms average runqueue delay\n", tmp);

	printf("\n--- wakeup statistics ---\n");
	tmp = (double)rq_stats->ttwu_cnt / s;
	printf("  %.3f task wakes / s\n", tmp);
	tmp = (double)100 * rq_stats->ttwu_local / rq_stats->ttwu_cnt;
	printf("    %.3f%% of them from the local CPU\n", tmp);

	ttwu_remote = rq_stats->ttwu_cnt - rq_stats->ttwu_local;

	for (i = 0; i < domains; i++) {
		tmp = (double)100 * domain_stats[i].ttwu_wake_remote / ttwu_remote;
		printf("    %.3f%% of remote wakeups come from domain%d\n", tmp, i);

		tmp = (double)100 * domain_stats[i].ttwu_move_balance / domain_stats[i].ttwu_wake_remote;
		printf("      %.3f%% are moved to the local CPU via passive load balancing\n", tmp);

		tmp = (double)100 * domain_stats[i].ttwu_move_affine / domain_stats[i].ttwu_wake_remote;
		printf("      %.3f%% are moved to the local CPU via affine wakeups\n", tmp);
	}

	printf("\n--- load balancing statistics ---\n");

	for (i = 0; i < domains; i++) {
		unsigned long long total_lb = 0;
		unsigned long long total_pulled = 0;
		int j;

		printf("  for domain%d\n", i);

		for (j = 0; j < 3; j++) {
			total_lb += domain_stats[i].lb_cnt[j];
			total_pulled += domain_stats[i].lb_pulled[j];
		}

		tmp = (double)total_lb / s;
		printf("    %.3f load balance calls / s", tmp);
		tmp = (double)total_pulled / s;
		printf(" move %.3f tasks / s\n", tmp);

		for (j = 0; j < 3; j++) {
			unsigned long long lb = domain_stats[i].lb_cnt[j];
			unsigned long long pulled = domain_stats[i].lb_pulled[j];
			tmp = (double)100 * lb / total_lb;
			printf("      %.3f%% calls and", tmp);
			tmp = (double)100 * pulled / total_pulled;
			printf(" %.3f%% task moves came from ", tmp);
			if (j == 0)
				printf("idle balancing\n");
			else if (j == 1)
				printf("busy balancing\n");
			else if (j == 2)
				printf("new-idle balancing\n");

			if (lb) {
				tmp = (double)100 * (lb - domain_stats[i].lb_balanced[j]) / lb;
				printf("        %.3f%% were imbalanced", tmp);

				tmp = (double)domain_stats[i].lb_imbalance[j] / (lb - domain_stats[i].lb_balanced[j]);
				printf(" with an average imbalance of %.3f\n", tmp);

				tmp = (double)100 * domain_stats[i].lb_failed[j] / lb;
				printf("        %.3f%% found an imbalance but failed\n", tmp);
			}

			if (pulled) {
				tmp = (double)100 * domain_stats[i].lb_hot_pulled[j] / pulled;
				printf("        %.3f%% of tasks moved were cache hot\n", tmp);
			}
		}
		
		tmp = (double)domain_stats[i].alb_cnt / s;
		printf("    %.3f active balances / s ", tmp);
		
		tmp = (double)domain_stats[i].alb_pushed / s;
		printf(" move %.3f tasks / s\n", tmp);

		if (domain_stats[i].alb_cnt) {
			tmp = (double)100 * domain_stats[i].alb_failed / domain_stats[i].alb_cnt;
			printf("      %%%.3f attempts failed\n", tmp);
		}

		tmp = (double)domain_stats[i].sbe_cnt / s;
		printf("    %.3f exec balances / s ", tmp);
		
		tmp = (double)domain_stats[i].sbe_pushed / s;
		printf(" move %.3f tasks / s\n", tmp);

		if (domain_stats[i].sbe_cnt) {
			tmp = (double)100 * domain_stats[i].sbe_balanced / domain_stats[i].sbe_cnt;
			printf("      %%%.3f found no imbalance\n", tmp);
		}

		tmp = (double)domain_stats[i].sbf_cnt / s;
		printf("    %.3f fork balances / s ", tmp);
		
		tmp = (double)domain_stats[i].sbf_pushed / s;
		printf(" move %.3f tasks / s\n", tmp);

		if (domain_stats[i].sbf_cnt) {
			tmp = (double)100 * domain_stats[i].sbf_balanced / domain_stats[i].sbf_cnt;
			printf("      %%%.3f found no imbalance\n", tmp);
		}


		printf("\n");
	}
}

static unsigned long long pre_ts, post_ts;
static int pre_cpus, post_cpus;
static int pre_domains, post_domains;
static struct rq_stats pre_rq_stats[MAXCPUS];
static struct rq_stats post_rq_stats[MAXCPUS];
static struct rq_stats delta_rq_stats;
static struct domain_stats pre_domain_stats[MAXCPUS][MAXDOMAINS];
static struct domain_stats post_domain_stats[MAXCPUS][MAXDOMAINS];
static struct domain_stats delta_domain_stats[MAXDOMAINS];

int main(int argc, char *argv[])
{
	FILE *pre, *post;

	if (argc < 2) {
		fprintf(stderr, "Usage: %s <starts before> <stats after>\n",
				argv[0]);
		exit(1);
	}

	pre = fopen(argv[1], "r");
	if (pre == NULL)
		perror("fopen pre file"), exit(1);
	post = fopen(argv[2], "r");
	if (post == NULL)
		perror("fopen post file"), exit(1);

	parse_file(pre, &pre_ts, &pre_cpus, &pre_domains,
			pre_rq_stats, pre_domain_stats);
	parse_file(post, &post_ts, &post_cpus, &post_domains,
			post_rq_stats, post_domain_stats);
	if (pre_cpus != post_cpus || pre_domains != post_domains) {
		fprintf(stderr, "pre and post file formats mismatch\n");
		exit(1);
	}

	find_stats_delta(pre_rq_stats, post_rq_stats, &delta_rq_stats,
		pre_domain_stats, post_domain_stats, delta_domain_stats);

	show_stats(post_ts - pre_ts, pre_cpus, pre_domains,
			&delta_rq_stats, delta_domain_stats);

	exit(0);
}
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Stuff]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]     [Linux Resources]
  Powered by Linux