rcu-refcount stacker performance

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On July 8 I sent out a patch which re-implemented the rcu-refcounting
of the LSM list in stacker for the sake of supporting safe security
module unloading.  (patch reattached here for convenience)  Here are
some performance results with and without that patch.  Tests were run
on a 16-way ppc64 machine.  Dbench was run 50 times, and kernbench
and reaim were run 10 times, and intervals are 95% confidence half-
intervals.

These results seem pretty poor.  I'm now wondering whether this is
really necessary.  David Wheeler's original stacker had an option
of simply waiting a while after a module was taken out of the list
of active modules before freeing the modules.  Something like that
is of course one option.  I'm hoping we can also take advantage of
some already known module state info to be a little less coarse
about it.  For instance, sys_delete_module() sets m->state to
MODULE_STATE_GOING before calling mod->exit().  If in place of
doing atomic_inc(&m->use), stacker skipped the m->hook() if
m->state!=MODULE_STATE_LIVE, then it may be safe to assume that
any m->hook() should be finished before sys_delete_module() gets
to free_module(mod).  This seems to require adding a struct
module argument to security/security:mod_reg_security() so an LSM
can pass itself along.

So I'll try that next.  Hopefully by avoiding the potential cache
line bounces which atomic_inc(&m->use) bring, this should provide
far better performance.

thanks,
-serge

Dbench (throughput, larger is better)
--------------------------------------------
plain stacker:    1531.448400 +/- 15.791116
stacker with rcu: 1408.056200 +/- 12.597277

Kernbench (runtime, smaller is better)
--------------------------------------------
plain stacker:    52.341000  +/- 0.184995
stacker with rcu: 53.722000 +/- 0.161473

Reaim (numjobs, larger is better) (gnuplot-friendly format)
plain stacker:
----------------------------------------------------------
Numforked   jobs/minute         95% CI
1           106662.857000     5354.267865
3           301628.571000     6297.121934
5           488142.858000     16031.685536
7           673200.000000     23994.030784
9           852428.570000     31485.607271
11          961714.290000     0.000000
13          1108157.144000    27287.525982
15          1171178.571000    49790.796869

Reaim (numjobs, larger is better) (gnuplot-friendly format)
plain stacker:
----------------------------------------------------------
Numforked   jobs/minute         95% CI
1           100542.857000     2099.040645
3           266657.139000     6297.121934
5           398892.858000     12023.765252
7           467670.000000     14911.383385
9           418648.352000     11665.751441
11          396825.000000     8700.115252
13          357480.912000     7567.947838
15          337571.428000     2332.267703

Patch:

Index: linux-2.6.12/security/stacker.c
===================================================================
--- linux-2.6.12.orig/security/stacker.c	2005-07-08 13:43:15.000000000 -0500
+++ linux-2.6.12/security/stacker.c	2005-07-08 16:21:54.000000000 -0500
@@ -33,13 +33,13 @@
 
 struct module_entry {
 	struct list_head lsm_list;  /* list of active lsms */
-	struct list_head all_lsms; /* list of all lsms */
 	char *module_name;
 	int namelen;
 	struct security_operations module_operations;
+	struct rcu_head m_rcu;
+	atomic_t use;
 };
 static struct list_head stacked_modules;  /* list of stacked modules */
-static struct list_head all_modules;  /* list of all modules, including freed */
 
 static short sysfsfiles_registered;
 
@@ -84,6 +84,32 @@ MODULE_PARM_DESC(debug, "Debug enabled o
  * We return as soon as an error is returned.
  */
 
+static inline void stacker_free_module(struct module_entry *m)
+{
+	kfree(m->module_name);
+	kfree(m);
+}
+
+/*
+ * Version of stacker_free_module called from call_rcu
+ */
+static void free_mod_fromrcu(struct rcu_head *head)
+{
+	struct module_entry *m;
+
+	m = container_of(head, struct module_entry, m_rcu);
+	stacker_free_module(m);
+}
+
+static void stacker_del_module(struct rcu_head *head)
+{
+	struct module_entry *m;
+	
+	m = container_of(head, struct module_entry, m_rcu);
+	if (atomic_dec_and_test(&m->use))
+		stacker_free_module(m);
+}
+
 #define stack_for_each_entry(pos, head, member)				\
 	for (pos = list_entry((head)->next, typeof(*pos), member);	\
 		&pos->member != (head);					\
@@ -93,16 +119,27 @@ MODULE_PARM_DESC(debug, "Debug enabled o
 /* to make this safe for module deletion, we would need to
  * add a reference count to m as we had before
  */
+/*
+ * XXX We can't quite do this - we delete the module before we grab
+ * m->next?
+ * We could just do a call_rcu.  Then the call_rcu happens in same
+ * rcu cycle has dereference, so module won't be deleted until the
+ * next cycle.
+ * That's what I'm going to do.
+ */
 #define RETURN_ERROR_IF_ANY_ERROR(BASE_FUNC, FUNC_WITH_ARGS) do { \
 	int result = 0; \
 	struct module_entry *m; \
 	rcu_read_lock(); \
 	stack_for_each_entry(m, &stacked_modules, lsm_list) { \
-		if (!m->module_operations.BASE_FUNC) \
-			continue; \
-		rcu_read_unlock(); \
-		result = m->module_operations.FUNC_WITH_ARGS; \
-		rcu_read_lock(); \
+		if (m->module_operations.BASE_FUNC) { \
+			atomic_inc(&m->use); \
+			rcu_read_unlock(); \
+			result = m->module_operations.FUNC_WITH_ARGS; \
+			rcu_read_lock(); \
+			if (unlikely(atomic_dec_and_test(&m->use))) \
+				call_rcu(&m->m_rcu, free_mod_fromrcu); \
+		} \
 		if (result) \
 			break; \
 	} \
@@ -116,9 +153,12 @@ MODULE_PARM_DESC(debug, "Debug enabled o
 	rcu_read_lock(); \
 	stack_for_each_entry(m, &stacked_modules, lsm_list) { \
 		if (m->module_operations.BASE_FUNC) { \
+			atomic_inc(&m->use); \
 			rcu_read_unlock(); \
 			m->module_operations.FUNC_WITH_ARGS; \
 			rcu_read_lock(); \
+			if (unlikely(atomic_dec_and_test(&m->use))) \
+				call_rcu(&m->m_rcu, free_mod_fromrcu); \
 		} \
 	} \
 	rcu_read_unlock(); \
@@ -129,38 +169,47 @@ MODULE_PARM_DESC(debug, "Debug enabled o
 	rcu_read_lock(); \
 	stack_for_each_entry(m, &stacked_modules, lsm_list ) { \
 		if (m->module_operations.BASE_FREE) { \
+			atomic_inc(&m->use); \
 			rcu_read_unlock(); \
 			m->module_operations.FREE_WITH_ARGS; \
 			rcu_read_lock(); \
+			if (unlikely(atomic_dec_and_test(&m->use))) \
+				call_rcu(&m->m_rcu, free_mod_fromrcu); \
 		} \
 	} \
 	rcu_read_unlock(); \
 } while (0)
 
 #define ALLOC_SECURITY(BASE_FUNC,FUNC_WITH_ARGS,BASE_FREE,FREE_WITH_ARGS) do { \
-	int result; \
+	int result = 0; \
 	struct module_entry *m, *m2; \
 	rcu_read_lock(); \
 	stack_for_each_entry(m, &stacked_modules, lsm_list) { \
-		if (!m->module_operations.BASE_FUNC) \
-			continue; \
-		rcu_read_unlock(); \
-		result = m->module_operations.FUNC_WITH_ARGS; \
-		rcu_read_lock(); \
+		if (m->module_operations.BASE_FUNC) { \
+			atomic_inc(&m->use); \
+			rcu_read_unlock(); \
+			result = m->module_operations.FUNC_WITH_ARGS; \
+			rcu_read_lock(); \
+			if (unlikely(atomic_dec_and_test(&m->use))) \
+				call_rcu(&m->m_rcu, free_mod_fromrcu); \
+		} \
 		if (result) \
 			goto bad; \
 	} \
 	rcu_read_unlock(); \
 	return 0; \
 bad: \
-	stack_for_each_entry(m2, &all_modules, all_lsms) { \
+	stack_for_each_entry(m2, &stacked_modules, lsm_list) { \
 		if (m == m2) \
 			break; \
 		if (!m2->module_operations.BASE_FREE) \
 			continue; \
+		atomic_inc(&m2->use); \
 		rcu_read_unlock(); \
 		m2->module_operations.FREE_WITH_ARGS; \
 		rcu_read_lock(); \
+		if (unlikely(atomic_dec_and_test(&m2->use))) \
+			call_rcu(&m2->m_rcu, free_mod_fromrcu); \
 	} \
 	rcu_read_unlock(); \
 	return result; \
@@ -251,10 +300,16 @@ static int stacker_vm_enough_memory(long
 
 	rcu_read_lock();
 	stack_for_each_entry(m, &stacked_modules, lsm_list) {
-		if (!m->module_operations.vm_enough_memory)
+		if (!m->module_operations.vm_enough_memory) {
 			continue;
+		}
+		atomic_inc(&m->use);
 		rcu_read_unlock();
 		result = m->module_operations.vm_enough_memory(pages);
+		rcu_read_lock();
+		if (unlikely(atomic_dec_and_test(&m->use)))
+			stacker_free_module(m);
+		rcu_read_unlock();
 		return result;
 	}
 	rcu_read_unlock();
@@ -281,9 +336,12 @@ static int stacker_netlink_send (struct 
 		if (!m->module_operations.netlink_send)
 			continue;
 		NETLINK_CB(skb).eff_cap = ~0;
+		atomic_inc(&m->use);
 		rcu_read_unlock();
 		result = m->module_operations.netlink_send(sk, skb);
 		rcu_read_lock();
+		if (unlikely(atomic_dec_and_test(&m->use)))
+			call_rcu(&m->m_rcu, free_mod_fromrcu);
 		tmpcap &= NETLINK_CB(skb).eff_cap;
 		if (result)
 			break;
@@ -987,33 +1045,42 @@ stacker_getprocattr(struct task_struct *
 	stack_for_each_entry(m, &stacked_modules, lsm_list) {
 		if (!m->module_operations.getprocattr)
 			continue;
+		atomic_inc(&m->use);
 		rcu_read_unlock();
 		ret = m->module_operations.getprocattr(p, name,
 					value+len, size-len);
 		rcu_read_lock();
-		if (ret == -EINVAL)
-			continue;
-		found_noneinval = 1;
-		if (ret < 0) {
+		if (ret > 0) {
+			found_noneinval = 1;
+			len += ret;
+			if (len+m->namelen+4 < size) {
+				char *v = value;
+				if (v[len-1]=='\n')
+					len--;
+				len += sprintf(value+len, " (%s)\n",
+							m->module_name);
+			}
+		} else if (ret != -EINVAL) {
+			found_noneinval = 1;
 			memset(value, 0, len);
 			len = ret;
+		} else
+			ret = 0;
+
+		if (unlikely(atomic_dec_and_test(&m->use)))
+			call_rcu(&m->m_rcu, free_mod_fromrcu);
+
+		if (ret < 0)
 			break;
-		}
-		if (ret == 0)
-			continue;
-		len += ret;
-		if (len+m->namelen+4 < size) {
-			char *v = value;
-			if (v[len-1]=='\n')
-				len--;
-			len += sprintf(value+len, " (%s)\n", m->module_name);
-		}
 	}
 	rcu_read_unlock();
 
 	return found_noneinval ? len : -EINVAL;
 }
 
+/*
+ * find an lsm by name.  If found, increment its use count and return it
+ */
 static struct module_entry *
 find_active_lsm(const char *name, int len)
 {
@@ -1022,6 +1089,7 @@ find_active_lsm(const char *name, int le
 	rcu_read_lock();
 	stack_for_each_entry(m, &stacked_modules, lsm_list) {
 		if (m->namelen == len && !strncmp(m->module_name, name, len)) {
+			atomic_inc(&m->use);
 			ret = m;
 			break;
 		}
@@ -1043,6 +1111,7 @@ stacker_setprocattr(struct task_struct *
 	char *realv = (char *)value;
 	size_t dsize = size;
 	int loc = 0, end_data = size;
+	int ret, free_module = 0;
 
 	if (list_empty(&stacked_modules))
 		return -EINVAL;
@@ -1063,7 +1132,7 @@ stacker_setprocattr(struct task_struct *
 	callm = find_active_lsm(realv+loc+1, dsize-loc-1);
 	if (!callm)
 		goto call;
-
+	free_module = 1;
 
 	loc--;
 	while (loc && realv[loc]==' ')
@@ -1074,8 +1143,14 @@ call:
 	if (!callm || !callm->module_operations.setprocattr)
 		return -EINVAL;
 
-	return callm->module_operations.setprocattr(p, name, value, end_data) +
+	ret = callm->module_operations.setprocattr(p, name, value, end_data) +
 			(size-end_data);
+
+	if (free_module && atomic_dec_and_test(&callm->use))
+		stacker_free_module(callm);
+
+	return ret;
+
 }
 
 /*
@@ -1116,15 +1191,15 @@ static int stacker_register (const char 
 	new_module_entry->module_name = new_module_name;
 	new_module_entry->namelen = namelen;
 
+	atomic_set(&new_module_entry->use, 1);
+
 	INIT_LIST_HEAD(&new_module_entry->lsm_list);
-	INIT_LIST_HEAD(&new_module_entry->all_lsms);
 
 	rcu_read_lock();
 	if (!modules_registered) {
 		modules_registered++;
 		list_del_rcu(&default_module.lsm_list);
 	}
-	list_add_tail_rcu(&new_module_entry->all_lsms, &all_modules);
 	list_add_tail_rcu(&new_module_entry->lsm_list, &stacked_modules);
 	if (strcmp(name, "selinux") == 0)
 		selinux_module = new_module_entry;
@@ -1141,16 +1216,60 @@ out:
 }
 
 /*
- * Currently this version of stacker does not allow for module
- * unregistering.
- * Easy way to allow for this is using rcu ref counting like an older
- * version of stacker did.
- * Another way would be to force stacker_unregister to sleep between
- * removing the module from all_modules and free_modules and unloading it.
+ * find_lsm_module_by_name:
+ * Find a module by name.  Used by stacker_unregister.  Called with
+ * stacker spinlock held.
+ */
+static struct module_entry *
+find_lsm_with_namelen(const char *name, int len)
+{
+	struct module_entry *m, *ret = NULL;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(m, &stacked_modules, lsm_list) {
+		atomic_inc(&m->use);
+		rcu_read_unlock();
+		if (m->namelen == len && !strncmp(m->module_name, name, len))
+			ret = m;
+		rcu_read_lock();
+		if (unlikely(atomic_dec_and_test(&m->use)))
+			call_rcu(&m->m_rcu, free_mod_fromrcu);
+		if (ret)
+			break;
+	}
+	rcu_read_unlock();
+
+	return ret;
+}
+
+/*
  */
 static int stacker_unregister (const char *name, struct security_operations *ops)
 {
-	return -EPERM;
+	struct module_entry *m;
+	int len = strnlen(name, MAX_MODULE_NAME_LEN);
+	int ret = 0;
+
+	spin_lock(&stacker_lock);
+	m = find_lsm_with_namelen(name, len);
+
+	if (!m) {
+		printk(KERN_INFO "%s: could not find module %s.\n",
+				__FUNCTION__, name);
+		ret = -ENOENT;
+		goto out;
+	}
+
+	list_del_rcu(&m->lsm_list);
+
+	if (strcmp(m->module_name, "selinux") == 0)
+		selinux_module = NULL;
+	call_rcu(&m->m_rcu, stacker_del_module);
+
+out:
+	spin_unlock(&stacker_lock);
+
+	return ret;
 }
 
 static struct security_operations stacker_ops = {
@@ -1407,57 +1526,6 @@ static struct stacker_attribute stacker_
 	.show = listmodules_read,
 };
 
-/* respond to a request to unload a module */
-static ssize_t stacker_unload_write (struct stacker_kobj *obj, const char *name,
-					size_t count)
-{
-	struct module_entry *m;
-	int len = strnlen(name, MAX_MODULE_NAME_LEN);
-	int ret = count;
-
-	if (!capable(CAP_SYS_ADMIN))
-		return -EPERM;
-
-	if (count <= 0)
-		return -EINVAL;
-
-	if (!modules_registered)
-		return -EINVAL;
-
-	spin_lock(&stacker_lock);
-	m = find_active_lsm(name, len);
-
-	if (!m) {
-		printk(KERN_INFO "%s: could not find module %s.\n",
-			__FUNCTION__, name);
-		ret = -ENOENT;
-		goto out;
-	}
-
-	if (strcmp(m->module_name, "selinux") == 0)
-		selinux_module = NULL;
-
-	rcu_read_lock();
-	list_del_rcu(&m->lsm_list);
-	if (list_empty(&stacked_modules)) {
-		INIT_LIST_HEAD(&default_module.lsm_list);
-		list_add_tail_rcu(&default_module.lsm_list, &stacked_modules);
-		modules_registered = 0;
-	}
-	rcu_read_unlock();
-
-out:
-	spin_unlock(&stacker_lock);
-
-	return ret;
-}
-
-static struct stacker_attribute stacker_attr_unload = {
-	.attr = {.name = "unload", .mode = S_IFREG | S_IRUGO | S_IWUSR},
-	.store = stacker_unload_write,
-};
-
-
 /* stop responding to sysfs */
 static ssize_t stop_responding_write (struct stacker_kobj *obj,
 					const char *buff, size_t count)
@@ -1483,7 +1551,6 @@ static void unregister_sysfs_files(void)
 	sysfs_remove_file(kobj, &stacker_attr_lockdown.attr);
 	sysfs_remove_file(kobj, &stacker_attr_listmodules.attr);
 	sysfs_remove_file(kobj, &stacker_attr_stop_responding.attr);
-	sysfs_remove_file(kobj, &stacker_attr_unload.attr);
 
 	sysfsfiles_registered = 0;
 }
@@ -1506,8 +1573,6 @@ static int register_sysfs_files(void)
 			&stacker_attr_listmodules.attr);
 	sysfs_create_file(&stacker_subsys.kset.kobj,
 			&stacker_attr_stop_responding.attr);
-	sysfs_create_file(&stacker_subsys.kset.kobj,
-			&stacker_attr_unload.attr);
 	sysfsfiles_registered = 1;
 	stacker_dbg("sysfs files registered\n");
 	return 0;
@@ -1524,13 +1589,13 @@ static int __init stacker_init (void)
 	sysfsfiles_registered = 0;
 
 	INIT_LIST_HEAD(&stacked_modules);
-	INIT_LIST_HEAD(&all_modules);
 	spin_lock_init(&stacker_lock);
 	default_module.module_name = DEFAULT_MODULE_NAME;
 	default_module.namelen = strlen(DEFAULT_MODULE_NAME);
 	memcpy(&default_module.module_operations, &dummy_security_ops,
 			sizeof(struct security_operations));
 	INIT_LIST_HEAD(&default_module.lsm_list);
+	atomic_set(&default_module.use, 1);
 	list_add_tail(&default_module.lsm_list, &stacked_modules);
 
 	if (register_security (&stacker_ops)) {
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]
  Powered by Linux