[RFC][PATCH 2/4] Aggregated beancounters infrastructure

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




Add support for aggregated beancounters. An aggregated beancounter contains
several beancounters. The default aggregator is called init_ab (similar to
init_bc). Aggregated beancounters are created without any limits imposed
on them.

Signed-off-by: Balbir Singh <[email protected]>
---

 include/bc/beancounter.h |   48 +++++++-
 kernel/bc/beancounter.c  |  272 ++++++++++++++++++++++++++++++++++++++++-------
 kernel/bc/misc.c         |    2 
 3 files changed, 280 insertions(+), 42 deletions(-)

diff -puN include/bc/beancounter.h~add-aggr-bc include/bc/beancounter.h
--- linux-2.6.18-rc5/include/bc/beancounter.h~add-aggr-bc	2006-09-17 20:33:15.000000000 +0530
+++ linux-2.6.18-rc5-balbir/include/bc/beancounter.h	2006-09-17 20:33:15.000000000 +0530
@@ -18,6 +18,13 @@
 
 #define BC_RESOURCES	3
 
+struct ab_resource_parm {
+	unsigned long barrier;
+	unsigned long held;
+	unsigned long limit;	/* hard resource limit */
+	unsigned long failcnt;	/* count of failed charges */
+};
+
 struct bc_resource_parm {
 	unsigned long barrier;	/* A barrier over which resource allocations
 				 * are failed gracefully. e.g. if the amount
@@ -51,6 +58,27 @@ struct bc_resource_parm {
  */
 #define BC_MAGIC                0x62756275UL
 
+#define AB_HASH_BITS		8
+#define AB_HASH_SIZE		(1 << AB_HASH_BITS)
+#define ab_hash_fn(bcid)	(hash_long(bcid, AB_HASH_BITS))
+
+#define BC_HASH_BITS		5
+#define BC_HASH_SIZE		(1 << BC_HASH_BITS)
+#define bc_hash_fn(bcid)	(hash_long(bcid, BC_HASH_BITS))
+
+
+/*
+ * Aggregate beancounters - group independent BC's together to form groups
+ */
+struct aggr_beancounter {
+	struct ab_resource_parm	ab_parms[BC_RESOURCES];
+	atomic_t		ab_refcount;
+	struct hlist_head 	ab_bucket[AB_HASH_SIZE];
+	spinlock_t 		ab_lock;
+	struct hlist_node	hash;
+	bcid_t			ab_id;
+};
+
 /*
  *	Resource management structures
  * Serialization issues:
@@ -73,6 +101,7 @@ struct beancounter {
 #endif
 	/* resources statistics and settings */
 	struct bc_resource_parm	bc_parms[BC_RESOURCES];
+	struct aggr_beancounter *ab;
 };
 
 enum bc_severity { BC_BARRIER, BC_LIMIT, BC_FORCE };
@@ -118,7 +147,20 @@ int __must_check bc_charge(struct beanco
 void bc_uncharge_locked(struct beancounter *bc, int res, unsigned long val);
 void bc_uncharge(struct beancounter *bc, int res, unsigned long val);
 
-struct beancounter *beancounter_findcreate(bcid_t id, int mask);
+struct beancounter *beancounter_create(struct aggr_beancounter *ab, bcid_t id);
+struct beancounter *beancounter_find_locked(struct aggr_beancounter *ab,
+						bcid_t id);
+struct aggr_beancounter *ab_findcreate(bcid_t id, int mask);
+struct beancounter *beancounter_relocate(struct aggr_beancounter *dst_ab,
+						struct aggr_beancounter *src_ab,
+						bcid_t id);
+
+static inline struct aggr_beancounter *
+get_aggr_beancounter(struct aggr_beancounter *ab)
+{
+	atomic_inc(&ab->ab_refcount);
+	return ab;
+}
 
 static inline struct beancounter *get_beancounter(struct beancounter *bc)
 {
@@ -127,6 +169,7 @@ static inline struct beancounter *get_be
 }
 
 void put_beancounter(struct beancounter *bc);
+void put_aggr_beancounter(struct aggr_beancounter *ab);
 
 void bc_init_early(void);
 void bc_init_late(void);
@@ -139,7 +182,8 @@ extern const char *bc_rnames[];
 
 #define nr_beancounters 0
 
-#define beancounter_findcreate(id, f)			(NULL)
+#define beancounter_create(ab, id)			(NULL)
+#define beancounter_find_locked(ab, id)			(NULL)
 #define get_beancounter(bc)				(NULL)
 #define put_beancounter(bc)				do { } while (0)
 
diff -puN kernel/bc/beancounter.c~add-aggr-bc kernel/bc/beancounter.c
--- linux-2.6.18-rc5/kernel/bc/beancounter.c~add-aggr-bc	2006-09-17 20:33:15.000000000 +0530
+++ linux-2.6.18-rc5-balbir/kernel/bc/beancounter.c	2006-09-17 20:33:15.000000000 +0530
@@ -14,11 +14,17 @@
 #include <bc/vmrss.h>
 
 static kmem_cache_t *bc_cachep;
+static kmem_cache_t *ab_cachep;
 static struct beancounter default_beancounter;
 
-static void init_beancounter_struct(struct beancounter *bc, bcid_t id);
+static void init_beancounter_struct(struct aggr_beancounter *ab,
+					struct beancounter *bc, bcid_t id);
+static void init_aggr_beancounter_struct(struct aggr_beancounter *ab,
+						bcid_t id);
+static void init_aggr_beancounter_parm_nolimits(struct aggr_beancounter *ab);
 
-struct beancounter init_bc;
+struct beancounter 	init_bc;
+struct aggr_beancounter init_ab;
 
 unsigned int nr_beancounters = 1;
 
@@ -28,12 +34,64 @@ const char *bc_rnames[] = {
 	"privvmpages",
 };
 
-#define BC_HASH_BITS		8
-#define BC_HASH_SIZE		(1 << BC_HASH_BITS)
+/*
+ * Aggregated beancounters are stored in ab_hash bucket
+ */
+static struct hlist_head 	ab_hash[AB_HASH_SIZE];
+spinlock_t 			ab_hash_lock;
+
+struct aggr_beancounter *ab_findcreate(bcid_t id, int mask)
+{
+	struct aggr_beancounter *new_ab, *ab;
+	unsigned long flags;
+	struct hlist_head *slot;
+	struct hlist_node *pos;
+
+	/*
+	 * Assumption: This function is called with a reference to ab held
+	 */
+	slot = &ab_hash[ab_hash_fn(id)];
+	new_ab = NULL;
+
+retry:
+	spin_lock_irqsave(&ab_hash_lock, flags);
+	hlist_for_each_entry (ab, pos, slot, hash)
+		if (ab->ab_id == id)
+			break;
 
-static struct hlist_head bc_hash[BC_HASH_SIZE];
-static spinlock_t bc_hash_lock;
-#define bc_hash_fn(bcid)	(hash_long(bcid, BC_HASH_BITS))
+	if (pos != NULL) {
+		get_aggr_beancounter(ab);
+		spin_unlock_irqrestore(&ab_hash_lock, flags);
+
+		if (new_ab != NULL)
+			kmem_cache_free(ab_cachep, new_ab);
+		return ab;
+	}
+
+	if (new_ab != NULL)
+		goto out_install;
+
+	spin_unlock_irqrestore(&ab_hash_lock, flags);
+
+	if (!(mask & BC_ALLOC))
+		goto out;
+
+	new_ab = kmem_cache_alloc(ab_cachep,
+			mask & BC_ALLOC_ATOMIC ? GFP_ATOMIC : GFP_KERNEL);
+	if (new_ab == NULL)
+		goto out;
+
+	init_aggr_beancounter_struct(new_ab, id);
+	init_aggr_beancounter_parm_nolimits(new_ab);
+	goto retry;
+
+out_install:
+	hlist_add_head(&new_ab->hash, slot);
+	nr_beancounters++;
+	spin_unlock_irqrestore(&ab_hash_lock, flags);
+out:
+	return new_ab;
+}
 
 /*
  *	Per resource beancounting. Resources are tied to their bc id.
@@ -47,63 +105,159 @@ static spinlock_t bc_hash_lock;
  *	will mean the old entry is still around with resource tied to it.
  */
 
-struct beancounter *beancounter_findcreate(bcid_t id, int mask)
+struct beancounter *beancounter_find_locked(struct aggr_beancounter *ab,
+						bcid_t id)
 {
-	struct beancounter *new_bc, *bc;
-	unsigned long flags;
+	struct beancounter *bc;
 	struct hlist_head *slot;
 	struct hlist_node *pos;
 
-	slot = &bc_hash[bc_hash_fn(id)];
-	new_bc = NULL;
+	/*
+	 * Assumption: This function is called with a reference to ab held
+	 */
+	slot = &ab->ab_bucket[bc_hash_fn(id)];
 
-retry:
-	spin_lock_irqsave(&bc_hash_lock, flags);
 	hlist_for_each_entry (bc, pos, slot, hash)
 		if (bc->bc_id == id)
 			break;
 
 	if (pos != NULL) {
 		get_beancounter(bc);
-		spin_unlock_irqrestore(&bc_hash_lock, flags);
+		return bc;
+	}
+	else
+		return NULL;
+}
+
+struct beancounter *beancounter_create(struct aggr_beancounter *ab, bcid_t id)
+{
+	unsigned long flags;
+	struct beancounter *bc = NULL, *new_bc;
+	struct hlist_head *slot;
+
+	get_aggr_beancounter(ab);
+	spin_lock_irqsave(&ab->ab_lock, flags);
+	bc = beancounter_find_locked(ab, id);
+	spin_unlock_irqrestore(&ab->ab_lock, flags);
 
-		if (new_bc != NULL)
-			kmem_cache_free(bc_cachep, new_bc);
+	if (bc)
 		return bc;
+	else {
+		new_bc = kmem_cache_alloc(bc_cachep, GFP_KERNEL);
+		*new_bc = default_beancounter;
+		init_beancounter_struct(ab, new_bc, id);
 	}
 
-	if (new_bc != NULL)
-		goto out_install;
+	spin_lock_irqsave(&ab->ab_lock, flags);
+	bc = beancounter_find_locked(ab, id);
+	if (unlikely(bc)) {
+		spin_unlock_irqrestore(&ab->ab_lock, flags);
+		kmem_cache_free(bc_cachep, new_bc);
+		return bc;
+	}
+	slot = &ab->ab_bucket[bc_hash_fn(id)];
+	hlist_add_head(&new_bc->hash, slot);
+	spin_unlock_irqrestore(&ab->ab_lock, flags);
+	return new_bc;
+}
 
-	spin_unlock_irqrestore(&bc_hash_lock, flags);
+static void double_ab_lock(struct aggr_beancounter *ab1,
+				struct aggr_beancounter *ab2,
+				unsigned long *flags)
+{
+	if (ab1 > ab2) {
+		spin_lock_irqsave(&ab1->ab_lock, *flags);
+		spin_lock(&ab2->ab_lock);
+	} else if (ab2 > ab1) {
+		spin_lock_irqsave(&ab2->ab_lock, *flags);
+		spin_lock(&ab1->ab_lock);
+	} else
+		BUG();
+}
 
-	if (!(mask & BC_ALLOC))
+static void double_ab_unlock(struct aggr_beancounter *ab1,
+				struct aggr_beancounter *ab2,
+				unsigned long *flags)
+{
+	if (ab1 > ab2) {
+		spin_unlock(&ab2->ab_lock);
+		spin_unlock_irqrestore(&ab1->ab_lock, *flags);
+	} else if (ab2 > ab1) {
+		spin_unlock(&ab1->ab_lock);
+		spin_unlock_irqrestore(&ab2->ab_lock, *flags);
+	} else
+		BUG();
+}
+
+/*
+ * This function should be called with a reference to dst_ab held
+ */
+struct beancounter *beancounter_relocate(struct aggr_beancounter *dst_ab,
+						struct aggr_beancounter *src_ab,
+						bcid_t id)
+{
+	unsigned long flags;
+	struct beancounter *bc = NULL, *new_bc;
+	struct hlist_head *slot;
+
+	double_ab_lock(dst_ab, src_ab, &flags);
+	bc = beancounter_find_locked(src_ab, id);
+	if (!bc)
 		goto out;
 
-	new_bc = kmem_cache_alloc(bc_cachep,
-			mask & BC_ALLOC_ATOMIC ? GFP_ATOMIC : GFP_KERNEL);
-	if (new_bc == NULL)
+	/*
+	 * Ideally this should be a BUG if new_bc is found.
+	 * But we allow a small margin for several threads migrating
+	 * to a new aggregated beancounter. BUG() it later.
+	 */
+	new_bc = beancounter_find_locked(dst_ab, id);
+	if (new_bc) {
+		bc = new_bc;
 		goto out;
+	}
 
-	*new_bc = default_beancounter;
-	init_beancounter_struct(new_bc, id);
-	goto retry;
+	spin_lock(&bc->bc_lock);
+	hlist_del(&bc->hash);
+	slot = &dst_ab->ab_bucket[bc_hash_fn(id)];
+	hlist_add_head(&bc->hash, slot);
+	bc->ab = dst_ab;
+	spin_unlock(&bc->bc_lock);
 
-out_install:
-	hlist_add_head(&new_bc->hash, slot);
-	nr_beancounters++;
-	spin_unlock_irqrestore(&bc_hash_lock, flags);
 out:
-	return new_bc;
+	put_aggr_beancounter(src_ab);
+	double_ab_unlock(dst_ab, src_ab, &flags);
+	return bc;
+}
+
+void put_aggr_beancounter(struct aggr_beancounter *ab)
+{
+	int i;
+	unsigned long flags;
+
+	if (!atomic_dec_and_lock_irqsave(&ab->ab_refcount,
+				&ab_hash_lock, flags))
+		return;
+
+	for (i = 0; i < BC_RESOURCES; i++)
+		if (ab->ab_parms[i].held != 0)
+			printk("AB: %d has %lu of %s held on put", ab->ab_id,
+				ab->ab_parms[i].held, bc_rnames[i]);
+
+	hlist_del(&ab->hash);
+	nr_beancounters--;
+	spin_unlock_irqrestore(&ab_hash_lock, flags);
+
+	kmem_cache_free(ab_cachep, ab);
 }
 
 void put_beancounter(struct beancounter *bc)
 {
 	int i;
 	unsigned long flags;
+	struct aggr_beancounter *ab = bc->ab;
 
 	if (!atomic_dec_and_lock_irqsave(&bc->bc_refcount,
-				&bc_hash_lock, flags))
+				&ab->ab_lock, flags))
 		return;
 
 	BUG_ON(bc == &init_bc);
@@ -123,9 +277,10 @@ void put_beancounter(struct beancounter 
 #endif
 	hlist_del(&bc->hash);
 	nr_beancounters--;
-	spin_unlock_irqrestore(&bc_hash_lock, flags);
+	spin_unlock_irqrestore(&ab->ab_lock, flags);
 
 	kmem_cache_free(bc_cachep, bc);
+	put_aggr_beancounter(ab);
 }
 
 EXPORT_SYMBOL_GPL(put_beancounter);
@@ -228,12 +383,38 @@ EXPORT_SYMBOL_GPL(bc_uncharge);
  *	of fields not initialized explicitly.
  */
 
-static void init_beancounter_struct(struct beancounter *bc, bcid_t id)
+static void init_beancounter_struct(struct aggr_beancounter *ab,
+					struct beancounter *bc, bcid_t id)
 {
 	bc->bc_magic = BC_MAGIC;
 	atomic_set(&bc->bc_refcount, 1);
 	spin_lock_init(&bc->bc_lock);
 	bc->bc_id = id;
+	bc->ab = ab;
+}
+
+static void init_aggr_beancounter_struct(struct aggr_beancounter *ab,
+						bcid_t ab_id)
+{
+	int i;
+
+	spin_lock_init(&ab->ab_lock);
+	ab->ab_id = ab_id;
+	atomic_set(&ab->ab_refcount, 1);
+	for (i = 0; i < AB_HASH_SIZE; i++)
+		INIT_HLIST_HEAD(&ab->ab_bucket[i]);
+	INIT_HLIST_NODE(&ab->hash);
+}
+
+static void init_aggr_beancounter_parm_nolimits(struct aggr_beancounter *ab)
+{
+	int k;
+
+	for (k = 0; k < BC_RESOURCES; k++) {
+		ab->ab_parms[k].held = 0;
+		ab->ab_parms[k].limit = BC_MAXVALUE;
+		ab->ab_parms[k].barrier = BC_MAXVALUE;
+	}
 }
 
 static void init_beancounter_nolimits(struct beancounter *bc)
@@ -261,17 +442,24 @@ static void init_beancounter_syslimits(s
 void __init bc_init_early(void)
 {
 	struct beancounter *bc;
+	struct aggr_beancounter *ab;
 	struct hlist_head *slot;
 
 	bc = &init_bc;
+	ab = &init_ab;
 
 	init_beancounter_nolimits(bc);
-	init_beancounter_struct(bc, 0);
+	init_aggr_beancounter_struct(ab, 0);
+	init_aggr_beancounter_parm_nolimits(ab);
+	init_beancounter_struct(ab, bc, 0);
 
-	spin_lock_init(&bc_hash_lock);
-	slot = &bc_hash[bc_hash_fn(bc->bc_id)];
+	slot = &ab->ab_bucket[bc_hash_fn(bc->bc_id)];
 	hlist_add_head(&bc->hash, slot);
 
+	spin_lock_init(&ab_hash_lock);
+	slot = &ab_hash[ab_hash_fn(ab->ab_id)];
+	hlist_add_head(&ab->hash, slot);
+
 	current->task_bc.exec_bc = get_beancounter(bc);
 	current->task_bc.fork_bc = get_beancounter(bc);
 }
@@ -279,12 +467,18 @@ void __init bc_init_early(void)
 void __init bc_init_late(void)
 {
 	struct beancounter *bc;
+	struct aggr_beancounter *ab;
 
 	bc_cachep = kmem_cache_create("beancounters",
 			sizeof(struct beancounter), 0,
 			SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL, NULL);
 
+	ab_cachep = kmem_cache_create("aggr_beancounters",
+			sizeof(struct aggr_beancounter), 0,
+			SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL, NULL);
+
+	ab = &init_ab;
 	bc = &default_beancounter;
 	init_beancounter_syslimits(bc);
-	init_beancounter_struct(bc, 0);
+	init_beancounter_struct(ab, bc, 0);
 }
diff -puN kernel/bc/misc.c~add-aggr-bc kernel/bc/misc.c
--- linux-2.6.18-rc5/kernel/bc/misc.c~add-aggr-bc	2006-09-17 20:33:15.000000000 +0530
+++ linux-2.6.18-rc5-balbir/kernel/bc/misc.c	2006-09-17 20:33:15.000000000 +0530
@@ -16,7 +16,7 @@ void bc_task_charge(struct task_struct *
 	struct beancounter *bc;
 
 	new_bc = &new->task_bc;
-	bc = beancounter_findcreate(new->tgid, BC_ALLOC);
+	bc = beancounter_create(parent->task_bc.exec_bc->ab, new->tgid);
 	if (!bc) {
 		printk(KERN_WARNING "failed to create bc %d for tgid %d\n",
 			bc->bc_id, new->tgid);
_

-- 

	Balbir Singh,
	Linux Technology Center,
	IBM Software Labs
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Stuff]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]     [Linux Resources]
  Powered by Linux