o Patch2 has updated cpusets documentation and the core update_cpu_domains
function
o I have also moved the dentry d_lock as discussed previously
diff -Naurp linux-2.6.12-rc4-mm1-1/Documentation/cpusets.txt linux-2.6.12-rc4-mm1-2/Documentation/cpusets.txt
--- linux-2.6.12-rc4-mm1-1/Documentation/cpusets.txt 2005-05-16 15:14:05.000000000 +0530
+++ linux-2.6.12-rc4-mm1-2/Documentation/cpusets.txt 2005-05-16 22:56:43.000000000 +0530
@@ -51,6 +51,14 @@ mems_allowed vector.
If a cpuset is cpu or mem exclusive, no other cpuset, other than a direct
ancestor or descendent, may share any of the same CPUs or Memory Nodes.
+A cpuset that is cpu exclusive has a sched domain associated with it.
+The sched domain consists of all cpus in the current cpuset that are not
+part of any exclusive child cpusets.
+This ensures that the scheduler load balacing code only balances
+against the cpus that are in the sched domain as defined above and not
+all of the cpus in the system. This removes any overhead due to
+load balancing code trying to pull tasks outside of the cpu exclusive
+cpuset only to be prevented by the tasks' cpus_allowed mask.
User level code may create and destroy cpusets by name in the cpuset
virtual file system, manage the attributes and permissions of these
@@ -84,6 +92,9 @@ This can be especially valuable on:
and a database), or
* NUMA systems running large HPC applications with demanding
performance characteristics.
+ * Also cpu-exclusive cpusets are useful for servers running orthogonal
+ workloads such as RT applications requiring low latency and HPC
+ applications that are throughput sensitive
These subsets, or "soft partitions" must be able to be dynamically
adjusted, as the job mix changes, without impacting other concurrently
@@ -125,6 +136,8 @@ Cpusets extends these two mechanisms as
- A cpuset may be marked exclusive, which ensures that no other
cpuset (except direct ancestors and descendents) may contain
any overlapping CPUs or Memory Nodes.
+ Also a cpu-exclusive cpuset would be associated with a sched
+ domain.
- You can list all the tasks (by pid) attached to any cpuset.
The implementation of cpusets requires a few, simple hooks
@@ -136,6 +149,9 @@ into the rest of the kernel, none in per
allowed in that tasks cpuset.
- in sched.c migrate_all_tasks(), to keep migrating tasks within
the CPUs allowed by their cpuset, if possible.
+ - in sched.c, a new API partition_sched_domains for handling
+ sched domain changes associated with cpu-exclusive cpusets
+ and related changes in both sched.c and arch/ia64/kernel/domain.c
- in the mbind and set_mempolicy system calls, to mask the requested
Memory Nodes by what's allowed in that tasks cpuset.
- in page_alloc, to restrict memory to allowed nodes.
diff -Naurp linux-2.6.12-rc4-mm1-1/kernel/cpuset.c linux-2.6.12-rc4-mm1-2/kernel/cpuset.c
--- linux-2.6.12-rc4-mm1-1/kernel/cpuset.c 2005-05-16 15:08:08.000000000 +0530
+++ linux-2.6.12-rc4-mm1-2/kernel/cpuset.c 2005-05-16 15:19:54.000000000 +0530
@@ -596,12 +596,62 @@ static int validate_change(const struct
return 0;
}
+/*
+ * For a given cpuset cur, partition the system as follows
+ * a. All cpus in the parent cpuset's cpus_allowed that are not part of any
+ * exclusive child cpusets
+ * b. All cpus in the current cpuset's cpus_allowed that are not part of any
+ * exclusive child cpusets
+ * Build these two partitions by calling partition_sched_domains
+ */
+static void update_cpu_domains(struct cpuset *cur)
+{
+ struct cpuset *c, *par = cur->parent;
+ cpumask_t pspan, cspan;
+
+ if (par == NULL || cpus_empty(cur->cpus_allowed))
+ return;
+
+ /*
+ * Get all cpus from parent's cpus_allowed not part of exclusive
+ * children
+ */
+ pspan = par->cpus_allowed;
+ list_for_each_entry(c, &par->children, sibling) {
+ if (is_cpu_exclusive(c))
+ cpus_andnot(pspan, pspan, c->cpus_allowed);
+ }
+ if (is_removed(cur) || !is_cpu_exclusive(cur)) {
+ cpus_or(pspan, pspan, cur->cpus_allowed);
+ if (cpus_equal(pspan, cur->cpus_allowed))
+ return;
+ cspan = CPU_MASK_NONE;
+ }
+ else {
+ if (cpus_empty(pspan))
+ return;
+ cspan = cur->cpus_allowed;
+ /*
+ * Get all cpus from current cpuset's cpus_allowed not part
+ * of exclusive children
+ */
+ list_for_each_entry(c, &cur->children, sibling) {
+ if (is_cpu_exclusive(c))
+ cpus_andnot(cspan, cspan, c->cpus_allowed);
+ }
+ }
+
+ lock_cpu_hotplug();
+ partition_sched_domains(&pspan, &cspan);
+ unlock_cpu_hotplug();
+}
+
static int update_cpumask(struct cpuset *cs, char *buf)
{
- struct cpuset trialcs;
+ struct cpuset trialcs, oldcs;
int retval;
- trialcs = *cs;
+ trialcs = oldcs = *cs;
retval = cpulist_parse(buf, trialcs.cpus_allowed);
if (retval < 0)
return retval;
@@ -609,9 +659,13 @@ static int update_cpumask(struct cpuset
if (cpus_empty(trialcs.cpus_allowed))
return -ENOSPC;
retval = validate_change(cs, &trialcs);
- if (retval == 0)
- cs->cpus_allowed = trialcs.cpus_allowed;
- return retval;
+ if (retval < 0)
+ return retval;
+ cs->cpus_allowed = trialcs.cpus_allowed;
+ if (is_cpu_exclusive(cs) &&
+ (!cpus_equal(cs->cpus_allowed, oldcs.cpus_allowed)))
+ update_cpu_domains(cs);
+ return 0;
}
static int update_nodemask(struct cpuset *cs, char *buf)
@@ -646,25 +700,28 @@ static int update_nodemask(struct cpuset
static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, char *buf)
{
int turning_on;
- struct cpuset trialcs;
+ struct cpuset trialcs, oldcs;
int err;
turning_on = (simple_strtoul(buf, NULL, 10) != 0);
- trialcs = *cs;
+ trialcs = oldcs = *cs;
if (turning_on)
set_bit(bit, &trialcs.flags);
else
clear_bit(bit, &trialcs.flags);
err = validate_change(cs, &trialcs);
- if (err == 0) {
- if (turning_on)
- set_bit(bit, &cs->flags);
- else
- clear_bit(bit, &cs->flags);
- }
- return err;
+ if (err < 0)
+ return err;
+ if (turning_on)
+ set_bit(bit, &cs->flags);
+ else
+ clear_bit(bit, &cs->flags);
+
+ if (is_cpu_exclusive(cs) != is_cpu_exclusive(&oldcs))
+ update_cpu_domains(cs);
+ return 0;
}
static int attach_task(struct cpuset *cs, char *buf)
@@ -1310,12 +1367,14 @@ static int cpuset_rmdir(struct inode *un
up(&cpuset_sem);
return -EBUSY;
}
- spin_lock(&cs->dentry->d_lock);
parent = cs->parent;
set_bit(CS_REMOVED, &cs->flags);
+ if (is_cpu_exclusive(cs))
+ update_cpu_domains(cs);
list_del(&cs->sibling); /* delete my sibling from parent->children */
if (list_empty(&parent->children))
check_for_release(parent);
+ spin_lock(&cs->dentry->d_lock);
d = dget(cs->dentry);
cs->dentry = NULL;
spin_unlock(&d->d_lock);
[Index of Archives]
[Kernel Newbies]
[Netfilter]
[Bugtraq]
[Photo]
[Stuff]
[Gimp]
[Yosemite News]
[MIPS Linux]
[ARM Linux]
[Linux Security]
[Linux RAID]
[Video 4 Linux]
[Linux for the blind]
[Linux Resources]