[Patch][RFC] Disabling per-tgid stats on task exit in taskstats

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Jay, Chris, Could you check if this patch does the needful ? Its tested and runs fine for me. A quick response would be appreciated so that it can be included in -mm before the 2.6.18 merge window begins.

I decided against adding the configuration to the taskstats interface directly (as another command) since the sysfs solution
is much simpler and the configuration operation is infrequent.

Balbir, all, comments welcome.

--Shailabh


Selective sending of per-tgid statistics in taskstats interface

The taskstats interface currently sends both per-pid and per-tgid stats
whenever a thread exits and its thread group is non-empty. Some potential
users of taskstats, currently SGI's CSA, do not need the per-tgid stats.

Hence, this patch introduces a configuration parameter
	/sys/kernel/taskstats_tgid_exit
through which a privileged user can turn on/off sending of per-tgid stats on
task exit. The default is on. Regardless of the parameter, explicit commands
requesting per-tgid stats are always satisfied.

--

Signed-Off-By: Shailabh Nagar <[email protected]>


Documentation/accounting/taskstats.txt |   42 ++++++++++++++++++++++++---------
include/linux/taskstats_kern.h         |   14 +++--------
kernel/ksysfs.c                        |    9 +++++++
kernel/taskstats.c                     |   26 ++++++++++++++++++++
4 files changed, 70 insertions(+), 21 deletions(-)

Index: linux-2.6.17-rc5-mm3/include/linux/taskstats_kern.h
===================================================================
--- linux-2.6.17-rc5-mm3.orig/include/linux/taskstats_kern.h	2006-06-09 02:02:31.000000000 -0400
+++ linux-2.6.17-rc5-mm3/include/linux/taskstats_kern.h	2006-06-09 02:04:42.000000000 -0400
@@ -18,13 +18,6 @@ enum {
#ifdef CONFIG_TASKSTATS
extern kmem_cache_t *taskstats_cache;

-static inline void taskstats_exit_alloc(struct taskstats **ptidstats,
-					struct taskstats **ptgidstats)
-{
-	*ptidstats = kmem_cache_zalloc(taskstats_cache, SLAB_KERNEL);
-	*ptgidstats = kmem_cache_zalloc(taskstats_cache, SLAB_KERNEL);
-}
-
static inline void taskstats_exit_free(struct taskstats *tidstats,
					struct taskstats *tgidstats)
{
@@ -34,17 +27,18 @@ static inline void taskstats_exit_free(s
		kmem_cache_free(taskstats_cache, tgidstats);
}

+extern void taskstats_exit_alloc(struct taskstats **, struct taskstats **);
extern void taskstats_exit_send(struct task_struct *, struct taskstats *,
				struct taskstats *);
extern void taskstats_init_early(void);

#else
-static inline void taskstats_exit_alloc(struct taskstats **ptidstats,
-					struct taskstats **ptgidstats)
-{}
static inline void taskstats_exit_free(struct taskstats *ptidstats,
					struct taskstats *ptgidstats)
{}
+static inline void taskstats_exit_alloc(struct taskstats **ptidstats,
+					struct taskstats **ptgidstats)
+{}
static inline void taskstats_exit_send(struct task_struct *tsk,
					struct taskstats *tidstats,
					struct taskstats *tgidstats)
Index: linux-2.6.17-rc5-mm3/kernel/ksysfs.c
===================================================================
--- linux-2.6.17-rc5-mm3.orig/kernel/ksysfs.c	2006-06-09 02:02:31.000000000 -0400
+++ linux-2.6.17-rc5-mm3/kernel/ksysfs.c	2006-06-09 02:04:42.000000000 -0400
@@ -63,6 +63,12 @@ static ssize_t kexec_crash_loaded_show(s
KERNEL_ATTR_RO(kexec_crash_loaded);
#endif /* CONFIG_KEXEC */

+#ifdef CONFIG_TASKSTATS
+extern ssize_t taskstats_tgid_exit_show(struct subsystem *subsys, char *page);
+extern ssize_t taskstats_tgid_exit_store(struct subsystem *subsys, const char *page, size_t count);
+KERNEL_ATTR_RW(taskstats_tgid_exit);
+#endif
+
decl_subsys(kernel, NULL, NULL);
EXPORT_SYMBOL_GPL(kernel_subsys);

@@ -75,6 +81,9 @@ static struct attribute * kernel_attrs[]
	&kexec_loaded_attr.attr,
	&kexec_crash_loaded_attr.attr,
#endif
+#ifdef CONFIG_TASKSTATS
+	&taskstats_tgid_exit_attr.attr,
+#endif
	NULL
};

Index: linux-2.6.17-rc5-mm3/kernel/taskstats.c
===================================================================
--- linux-2.6.17-rc5-mm3.orig/kernel/taskstats.c	2006-06-09 02:02:31.000000000 -0400
+++ linux-2.6.17-rc5-mm3/kernel/taskstats.c	2006-06-09 02:04:42.000000000 -0400
@@ -24,6 +24,7 @@

static DEFINE_PER_CPU(__u32, taskstats_seqnum) = { 0 };
static int family_registered = 0;
+static int tgid_exit_send = 1;   /* Should tgid stats be sent on exit */
kmem_cache_t *taskstats_cache;
static DEFINE_MUTEX(taskstats_exit_mutex);

@@ -229,6 +230,15 @@ err:
	return rc;
}

+void taskstats_exit_alloc(struct taskstats **ptidstats,
+					struct taskstats **ptgidstats)
+{
+	*ptidstats = kmem_cache_zalloc(taskstats_cache, SLAB_KERNEL);
+	*ptgidstats = NULL;
+	if (tgid_exit_send)
+		*ptgidstats = kmem_cache_zalloc(taskstats_cache, SLAB_KERNEL);
+}
+
/* Send pid data out on exit */
void taskstats_exit_send(struct task_struct *tsk, struct taskstats *tidstats,
			struct taskstats *tgidstats)
@@ -254,6 +264,7 @@ void taskstats_exit_send(struct task_str
	size = nla_total_size(sizeof(u32)) +
		nla_total_size(sizeof(struct taskstats)) + nla_total_size(0);

+	/* Allocation should not depend on tgid_exit_send value */
	if (is_thread_group)
		size = 2 * size;	/* PID + STATS + TGID + STATS */

@@ -271,6 +282,9 @@ void taskstats_exit_send(struct task_str
			*tidstats);
	nla_nest_end(rep_skb, na);

+	/* Do not check tgid_exit_send value here. If it was unset during
+	 * taskstats_exit_alloc(), tgidstats will be NULL
+	 */
	if (!is_thread_group || !tgidstats) {
		send_reply(rep_skb, 0, TASKSTATS_MSG_MULTICAST);
		goto ret;
@@ -345,3 +359,15 @@ err:
 * mechanisms precedes initialization of the taskstats interface
 */
late_initcall(taskstats_init);
+
+/* configuration through sysfs */
+ssize_t taskstats_tgid_exit_show(struct subsystem *subsys, char *page)
+{
+	return sprintf(page, "%d\n", tgid_exit_send);
+}
+ssize_t taskstats_tgid_exit_store(struct subsystem *subsys, const char *page, size_t count)
+{
+	char *p = (char *)page;
+	tgid_exit_send = simple_strtoul(p, &p, 10);
+	return count;
+}
Index: linux-2.6.17-rc5-mm3/Documentation/accounting/taskstats.txt
===================================================================
--- linux-2.6.17-rc5-mm3.orig/Documentation/accounting/taskstats.txt	2006-06-07 12:03:14.000000000 -0400
+++ linux-2.6.17-rc5-mm3/Documentation/accounting/taskstats.txt	2006-06-09 02:35:07.000000000 -0400
@@ -32,13 +32,28 @@ The response contains statistics for a t
statistics for all tasks of the process (if tgid is specified).

To obtain statistics for tasks which are exiting, userspace opens a multicast
-netlink socket. Each time a task exits, two records are sent by the kernel to
-each listener on the multicast socket. The first the per-pid task's statistics
-and the second is the sum for all tasks of the process to which the task
-belongs (the task does not need to be the thread group leader). The need for
-per-tgid stats to be sent for each exiting task is explained in the per-tgid
-stats section below.
+netlink socket. Each time a task exits, its per-pid statistics are sent by
+the kernel to each listener on the multicast socket.

+If
+a) the value of /sys/kernel/taskstats_tgid_exit is non-zero and
+b) the task's thread_group has other members
+then a second record is also sent, consisting of the sum for all tasks of the
+thread group to which the task belongs. The task does not need to be the thread
+group leader. The utility for per-tgid stats to be sent for each exiting task
+is explained in the per-tgid stats section below.
+
+# echo 0 > /sys/kernel/taskstats_tgid_exit
+turns off sending of per-tgid stats on task exit
+
+# echo 1 > /sys/kernel/taskstats_tgid_exit
+turns it back on (which is the default)
+
+Commands requesting per-tgid stats are not affected by this configuration
+parameter and are always satisified by the kernel. Also, when the last thread
+of a thread group, or a solitary thread exits, only the per-pid stats are sent
+since they are identical to the per-tgid stats at that point in time.
+
getdelays.c is a simple utility demonstrating usage of the taskstats interface
for reporting delay accounting statistics.

@@ -100,8 +115,8 @@ per-tgid stats

Taskstats provides per-process stats, in addition to per-task stats, since
resource management is often done at a process granularity and aggregating task
-stats in userspace alone is inefficient and potentially inaccurate (due to lack
-of atomicity).
+stats in userspace alone is inefficient and potentially inaccurate due to lack
+of atomicity.

However, maintaining per-process, in addition to per-task stats, within the
kernel has space and time overheads. Hence the taskstats implementation
@@ -115,9 +130,14 @@ statistic from the kernel.

The approach taken by taskstats is to return the per-tgid stats *each* time
a task exits, in addition to the per-pid stats for that task. Userspace can
-maintain task<->process mappings and use them to maintain the per-process stats
-in userspace, updating the aggregate appropriately as the tasks of a process
-exit.
+maintain task<->process mappings and use them to maintain the per-process
+stats, updating the aggregate appropriately as the tasks of a process
+exit. Userspace must also expect only per-pid stats to be sent when the last
+thread of a thread group exits (also when that is the only thread in the thread
+group, which is a common case).
+
+Installations that don't need per-tgid stats can disable their collection and
+sending on task exit as described in the Usage section.

Extending taskstats
-------------------

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Stuff]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]     [Linux Resources]
  Powered by Linux