[RFC PATCH 2/10] vfs: make mounts shared/slave/private

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Patch that creates mounts that sets up 
1. two-way propagation(shared) or
2. only receives propagation(slave) or
3. do not receive or forward propagation (private)

A shared mount can be bind-mount to some mountpoint. And any mount/
umount events within any of these mounts propagates to the other mount.

Eg: 	mount --make-shared /mnt
	mount --bind /mnt /tmp

	The above two steps make the /mnt and /tmp propagation peers.

	mount /dev/sda0 /mnt/1 

	is mounted on both /mnt/1 as well as /tmp/1


A slave mount, only receives mount/umount events from its master mount.
But any mount/umount in the slave mount is not propagated to the
master.

Eg: 	Lets say /mnt ant /tmp are peers(shared mounts)

	mount --make-slave /tmp

	The above step makes /tmp a slave of /mnt

	mount /dev/sda0 /mnt/1 

	is mounted on both /mnt/1 as well as /tmp/1

	but mount /dev/sda1 /tmp/1
	is mounted only on /tmp/1 and not on /mnt/1


A private mount does not receive or forward any mount/unmount event.
This is the default mount.


Signed by Ram Pai ([email protected])

 fs/Makefile           |    2 
 fs/namespace.c        |   50 +++++++++++++++++++++
 fs/pnode.c            |  116 ++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/fs.h    |    3 +
 include/linux/mount.h |   29 +++++++++---
 include/linux/pnode.h |   47 ++++++++++++++++++++
 6 files changed, 239 insertions(+), 8 deletions(-)

Index: 2.6.13.sharedsubtree/fs/namespace.c
===================================================================
--- 2.6.13.sharedsubtree.orig/fs/namespace.c
+++ 2.6.13.sharedsubtree/fs/namespace.c
@@ -20,10 +20,11 @@
 #include <linux/seq_file.h>
 #include <linux/namespace.h>
 #include <linux/namei.h>
 #include <linux/security.h>
 #include <linux/mount.h>
+#include <linux/pnode.h>
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
 
 extern int __init init_rootfs(void);
 
@@ -60,10 +61,13 @@ struct vfsmount *alloc_vfsmnt(const char
 		INIT_LIST_HEAD(&mnt->mnt_hash);
 		INIT_LIST_HEAD(&mnt->mnt_child);
 		INIT_LIST_HEAD(&mnt->mnt_mounts);
 		INIT_LIST_HEAD(&mnt->mnt_list);
 		INIT_LIST_HEAD(&mnt->mnt_expire);
+		INIT_LIST_HEAD(&mnt->mnt_share);
+		INIT_LIST_HEAD(&mnt->mnt_slave_list);
+		INIT_LIST_HEAD(&mnt->mnt_slave);
 		if (name) {
 			int size = strlen(name) + 1;
 			char *newname = kmalloc(size, GFP_KERNEL);
 			if (newname) {
 				memcpy(newname, name, size);
@@ -614,10 +618,54 @@ static int graft_tree(struct vfsmount *m
 		security_sb_post_addmount(mnt, nd);
 	return err;
 }
 
 /*
+ * recursively change the type of the mountpoint.
+ */
+static int do_change_type(struct nameidata *nd, int flag)
+{
+	struct vfsmount *m, *mnt = nd->mnt;
+	int err = 0;
+	int recurse = flag & MS_REC;
+
+	if (nd->dentry != nd->mnt->mnt_root)
+		return -EINVAL;
+
+	down_write(&namespace_sem);
+	spin_lock(&vfsmount_lock);
+	switch (flag & ~MS_REC) {
+	case MS_SHARED:
+		for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL))
+			do_make_shared(m);
+		break;
+	case MS_SLAVE:
+		for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL)) {
+			err = do_make_slave(m);
+			if (err && m == mnt)
+				goto out;
+			else
+				err = 0;
+			/*
+			 * note: The mount tree need not have all it mounts as
+			 * shared. Hence it is expected to have some failure
+			 * while slaving mounts down the tree.
+			 */
+		}
+		break;
+	case MS_PRIVATE:
+		for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL))
+			do_make_private(m);
+		break;
+	}
+      out:
+	spin_unlock(&vfsmount_lock);
+	up_write(&namespace_sem);
+	return err;
+}
+
+/*
  * do loopback mount.
  */
 static int do_loopback(struct nameidata *nd, char *old_name, int recurse)
 {
 	struct nameidata old_nd;
@@ -1051,10 +1099,12 @@ long do_mount(char *dev_name, char *dir_
 	if (flags & MS_REMOUNT)
 		retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags,
 				    data_page);
 	else if (flags & MS_BIND)
 		retval = do_loopback(&nd, dev_name, flags & MS_REC);
+	else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE))
+		retval = do_change_type(&nd, flags);
 	else if (flags & MS_MOVE)
 		retval = do_move_mount(&nd, dev_name);
 	else
 		retval = do_new_mount(&nd, type_page, flags, mnt_flags,
 				      dev_name, data_page);
Index: 2.6.13.sharedsubtree/fs/pnode.c
===================================================================
--- /dev/null
+++ 2.6.13.sharedsubtree/fs/pnode.c
@@ -0,0 +1,116 @@
+/*
+ *  linux/fs/pnode.c
+ *
+ * (C) Copyright IBM Corporation 2005.
+ *	Released under GPL v2.
+ *	Author : Ram Pai ([email protected])
+ *
+ */
+#include <linux/config.h>
+#include <linux/syscalls.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/smp_lock.h>
+#include <linux/init.h>
+#include <linux/quotaops.h>
+#include <linux/acct.h>
+#include <linux/module.h>
+#include <linux/seq_file.h>
+#include <linux/namespace.h>
+#include <linux/namei.h>
+#include <linux/security.h>
+#include <linux/mount.h>
+#include <linux/pnode.h>
+#include <asm/uaccess.h>
+#include <asm/unistd.h>
+#include <stdarg.h>
+
+/* spinlock for pnode related operations */
+__cacheline_aligned_in_smp DEFINE_SPINLOCK(vfspnode_lock);
+
+/*
+ * make @mnt the slave of @master
+ */
+static void make_slave_of(struct vfsmount *mnt, struct vfsmount *master)
+{
+	list_del_init(&mnt->mnt_slave);
+	if (master)
+		list_add(&mnt->mnt_slave, &master->mnt_slave_list);
+	mnt->mnt_master = master;
+}
+
+static int __do_make_slave(struct vfsmount *mnt)
+{
+	struct vfsmount *peer_mnt, *master = mnt->mnt_master;
+	struct vfsmount *slave_mnt, *t_slave_mnt;
+
+	peer_mnt = next_shared(mnt);
+	if (peer_mnt == mnt)
+		peer_mnt = NULL;
+
+	list_del_init(&mnt->mnt_share);
+	/*
+	 * first we will attempt to move 'mnt' and its slaves
+	 * under 'peer_mnt'. if that is not possible we will
+	 * try to move them under the 'master'. And if this
+	 * is also not possible than we make them all
+	 * independent(they can no more be slaves).
+	 */
+	if (peer_mnt) {
+		if (master) {
+			/*
+			 * switch the pivot to peer_mnt
+			 */
+			BUG_ON(peer_mnt->mnt_master);
+			BUG_ON(!list_empty(&peer_mnt->mnt_slave));
+			make_slave_of(peer_mnt, master);
+		}
+		master = peer_mnt;
+	}
+
+	list_for_each_entry_safe(slave_mnt, t_slave_mnt,
+				 &mnt->mnt_slave_list, mnt_slave)
+	    make_slave_of(slave_mnt, master);
+
+	make_slave_of(mnt, master);
+	CLEAR_MNT_SHARED(mnt);
+	INIT_LIST_HEAD(&mnt->mnt_slave_list);
+	return 0;
+}
+
+int do_make_shared(struct vfsmount *mnt)
+{
+	set_mnt_shared(mnt);
+	return 0;
+}
+
+int do_make_slave(struct vfsmount *mnt)
+{
+	int err = 0;
+
+	spin_lock(&vfspnode_lock);
+	if (!IS_MNT_SHARED(mnt)) {
+		err = -EINVAL;
+		goto out;
+	}
+	__do_make_slave(mnt);
+      out:
+	spin_unlock(&vfspnode_lock);
+	return err;
+}
+
+int do_make_private(struct vfsmount *mnt)
+{
+	/*
+	 * a private mount is nothing but a
+	 * slave mount with no incoming
+	 * propagations.
+	 */
+	spin_lock(&vfspnode_lock);
+	__do_make_slave(mnt);
+	list_del_init(&mnt->mnt_slave);
+	spin_unlock(&vfspnode_lock);
+	mnt->mnt_master = NULL;
+	set_mnt_private(mnt);
+	return 0;
+}
Index: 2.6.13.sharedsubtree/include/linux/fs.h
===================================================================
--- 2.6.13.sharedsubtree.orig/include/linux/fs.h
+++ 2.6.13.sharedsubtree/include/linux/fs.h
@@ -100,10 +100,13 @@ extern int dir_notify_enable;
 #define MS_NODIRATIME	2048	/* Do not update directory access times */
 #define MS_BIND		4096
 #define MS_MOVE		8192
 #define MS_REC		16384
 #define MS_VERBOSE	32768
+#define MS_PRIVATE	(1<<18)	/* recursively change to private */
+#define MS_SLAVE	(1<<19)	/* recursively change to slave */
+#define MS_SHARED	(1<<20)	/* recursively change to shared */
 #define MS_POSIXACL	(1<<16)	/* VFS does not apply the umask */
 #define MS_ACTIVE	(1<<30)
 #define MS_NOUSER	(1<<31)
 
 /*
Index: 2.6.13.sharedsubtree/include/linux/pnode.h
===================================================================
--- /dev/null
+++ 2.6.13.sharedsubtree/include/linux/pnode.h
@@ -0,0 +1,47 @@
+/*
+ *  linux/fs/pnode.h
+ *
+ * (C) Copyright IBM Corporation 2005.
+ *	Released under GPL v2.
+ *
+ */
+#ifndef _LINUX_PNODE_H
+#define _LINUX_PNODE_H
+
+#include <linux/list.h>
+#include <linux/mount.h>
+#include <linux/spinlock.h>
+#include <asm/atomic.h>
+
+extern spinlock_t vfspnode_lock;
+
+static inline void set_mnt_shared(struct vfsmount *mnt)
+{
+	mnt->mnt_flags &= ~MNT_PNODE_MASK;
+	mnt->mnt_flags |= MNT_PNODE_MASK & MNT_SHARED;
+}
+
+static inline void set_mnt_private(struct vfsmount *mnt)
+{
+	mnt->mnt_flags &= ~MNT_PNODE_MASK;
+}
+
+static inline struct vfsmount *next_shared(struct vfsmount *p)
+{
+	return list_entry(p->mnt_share.next, struct vfsmount, mnt_share);
+}
+
+static inline struct vfsmount *first_slave(struct vfsmount *p)
+{
+	return list_entry(p->mnt_slave_list.next, struct vfsmount, mnt_slave);
+}
+
+static inline struct vfsmount *next_slave(struct vfsmount *p)
+{
+	return list_entry(p->mnt_slave.next, struct vfsmount, mnt_slave);
+}
+
+int do_make_slave(struct vfsmount *);
+int do_make_shared(struct vfsmount *);
+int do_make_private(struct vfsmount *);
+#endif				/* _LINUX_PNODE_H */
Index: 2.6.13.sharedsubtree/include/linux/mount.h
===================================================================
--- 2.6.13.sharedsubtree.orig/include/linux/mount.h
+++ 2.6.13.sharedsubtree/include/linux/mount.h
@@ -15,16 +15,27 @@
 #include <linux/types.h>
 #include <linux/list.h>
 #include <linux/spinlock.h>
 #include <asm/atomic.h>
 
-#define MNT_NOSUID	1
-#define MNT_NODEV	2
-#define MNT_NOEXEC	4
+#define MNT_NOSUID	0x01
+#define MNT_NODEV	0x02
+#define MNT_NOEXEC	0x04
+#define MNT_SHARED	0x10	/* if the vfsmount is a shared mount */
+#define MNT_PNODE_MASK	0x30	/* propogation flag mask */
+
+#define IS_MNT_SHARED(mnt) (mnt->mnt_flags & MNT_SHARED)
+#define IS_MNT_SLAVE(mnt) (!list_empty(&mnt->mnt_slave))
+#define IS_MNT_PRIVATE(mnt) (!IS_MNT_SLAVE(mnt) && \
+			!(mnt->mnt_flags & MNT_PNODE_MASK))
+#define IS_MNT_UNCLONABLE(mnt) (mnt->mnt_flags & MNT_UNCLONABLE)
+#define GET_MNT_TYPE(mnt) (mnt->mnt_flags & MNT_PNODE_MASK)
+#define SET_MNT_TYPE(mnt, type) (mnt->mnt_flags &= ~MNT_PNODE_MASK, \
+			mnt->mnt_flags |= (type & MNT_PNODE_MASK))
+#define CLEAR_MNT_SHARED(mnt) (mnt->mnt_flags &= ~(MNT_PNODE_MASK & MNT_SHARED))
 
-struct vfsmount
-{
+struct vfsmount {
 	struct list_head mnt_hash;
 	struct vfsmount *mnt_parent;	/* fs we are mounted on */
 	struct dentry *mnt_mountpoint;	/* dentry of mountpoint */
 	struct dentry *mnt_root;	/* root of the mounted tree */
 	struct super_block *mnt_sb;	/* pointer to superblock */
@@ -34,11 +45,15 @@ struct vfsmount
 	int mnt_flags;
 	int mnt_expiry_mark;		/* true if marked for expiry */
 	char *mnt_devname;		/* Name of device e.g. /dev/dsk/hda1 */
 	struct list_head mnt_list;
 	struct list_head mnt_expire;	/* link in fs-specific expiry list */
-	struct namespace *mnt_namespace; /* containing namespace */
+	struct list_head mnt_share;	/* circular list of shared mounts */
+	struct list_head mnt_slave_list;	/* list of slave mounts */
+	struct list_head mnt_slave;	/* slave list entry */
+	struct vfsmount *mnt_master;	/* slave is on master->mnt_slave_list */
+	struct namespace *mnt_namespace;/* containing namespace */
 };
 
 static inline struct vfsmount *mntget(struct vfsmount *mnt)
 {
 	if (mnt)
@@ -78,6 +93,6 @@ extern void mark_mounts_for_expiry(struc
 
 extern spinlock_t vfsmount_lock;
 extern dev_t name_to_dev_t(char *name);
 
 #endif
-#endif /* _LINUX_MOUNT_H */
+#endif				/* _LINUX_MOUNT_H */
Index: 2.6.13.sharedsubtree/fs/Makefile
===================================================================
--- 2.6.13.sharedsubtree.orig/fs/Makefile
+++ 2.6.13.sharedsubtree/fs/Makefile
@@ -8,11 +8,11 @@
 obj-y :=	open.o read_write.o file_table.o buffer.o  bio.o super.o \
 		block_dev.o char_dev.o stat.o exec.o pipe.o namei.o fcntl.o \
 		ioctl.o readdir.o select.o fifo.o locks.o dcache.o inode.o \
 		attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \
 		seq_file.o xattr.o libfs.o fs-writeback.o mpage.o direct-io.o \
-		ioprio.o
+		ioprio.o pnode.o
 
 obj-$(CONFIG_INOTIFY)		+= inotify.o
 obj-$(CONFIG_EPOLL)		+= eventpoll.o
 obj-$(CONFIG_COMPAT)		+= compat.o
 
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]
  Powered by Linux