Re: [RFC][PATCH 0/20] Multiple instances of the process id namespace

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hubertus Franke <[email protected]> writes:

> Eric W. Biederman wrote:
>> Hubertus Franke <[email protected]> writes:
>>
>>
>>>find_task_by_pid( pid ) { return find_task_pidspace_by_pid ( current->pspace,
>>>pid ); }
>>>
>>>and then only deal with the exceptional cases using find_task_pidspace_by_pid
>>>when the pidspace is different..
>> That is a possibility.  However I want to break some eggs so that the
>> users are updated appropriately.  It is only by a strenuous act of
>> will that I don't change the type of pid,tgid,pgrp,session.
>> The size of the changes is much less important than being clear.
>> So for I want find_task_by_pid to be an absolute interface.
>>
>
> Fair enough, valid answers .. I checked the patch and it would only take
> 19/33 instances out .. so not the end of the world.
>
>>
>>>>  Does the use of clone to create a new namespace instance look
>>>>  like the sane approach?
>>>>
>>>
>>>At he surface it looks OK .. how does this work in a multi-threaded
>>>process which does cloen ( CLONE_NPSPACE ) ?
>>>We discussed at some point that exec is the right place to do it,
>>>but what I get is that because this is the container_init task
>>>we are OK !
>>>A bit clarification would help here ...
>> Well the parent doesn't much matter.  But the child must have a fresh
>> start on all the groups of processes.  As all other groupings known by
>> a pid are per pspace, so they can't cross that line.
>>
>
> Now, on which kernel does this compile/work ?

2.6.latest plus a few patches I have already sent off to Andrew.

> Do you have a "helper" program you can share that starts/exec's an
> app under a new container (uhmm, namespace). No point for us to
> actually write that..

Ok here is my little helper/tester program.  Not beautiful but
it should work.

Eric


/* gcc -Wall -O2 -g chpid.c -o chpid */
#define _XOPEN_SOURCE
#define _XOPEN_SOURCE_EXTENDED
#define _SVID_SOURCE
#define _GNU_SOURCE
#include <stdio.h>
#include <errno.h>
#include <string.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#include <sys/wait.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <sys/mount.h>
#include <sys/vfs.h>
#include <fcntl.h>
#include <unistd.h>
#include <sched.h>
#include <stdarg.h>
#include <dirent.h>

#ifndef MNT_FORCE
#define MNT_FORCE	0x00000001	/* Attempt to forcibily umount */
#endif /* MNT_FORCE */
#ifndef MNT_DETACH
#define MNT_DETACH	0x00000002	/* Just detach from the tree */
#endif /* MNT_DETACH */
#ifndef MNT_EXPIRE
#define MNT_EXPIRE	0x00000004	/* Mark for expiry */
#endif /* MNT_EXPIRE */

#ifndef MS_MOVE
#define MS_MOVE 8192
#endif
#ifndef MS_REC
#define MS_REC 16384
#endif

#ifndef CLONE_NPSPACE
#define CLONE_NPSPACE	0x04000000	/* New process space */
#endif


#ifndef PROC_SUPER_MAGIC
#define PROC_SUPER_MAGIC 0x9fa0
#endif /* PROC_SUPER_MAGIC */

struct user_desc;
static pid_t raw_clone(int flags, void *child_stack, 
	int *parent_tidptr, struct user_desc *newtls, int *child_tidptr)
{
	return syscall(__NR_clone, flags, child_stack, parent_tidptr, newtls, child_tidptr);
}

static int raw_pivot_root(const char *new_root, const char *old_root)
{
	return syscall(__NR_pivot_root, new_root, old_root);
}

static void (*my_exit)(int status) = exit;

static void die(char *fmt, ...)
{
	va_list ap;
	va_start(ap, fmt);
	vfprintf(stderr, fmt, ap);
	va_end(ap);
	fflush(stderr);
	fflush(stdout);
	my_exit(1);
}

static void *xmalloc(size_t size)
{
	void *ptr;
	ptr = malloc(size);
	if (!ptr) die("malloc of %d bytes failed: %s\n", size, strerror(errno));
	return ptr;
}

int main(int argc, char **argv, char **envp)
{
	pid_t pid;
	int status;
	struct rlimit rlim;
	int clone_flags;
	char **cmd_argv, *shell_argv[2];
	char *root = "/", *old = "/mnt";
	int i;
	int tty, tty_force;
	
	tty = 0;
	tty_force = 0;
	clone_flags = CLONE_NPSPACE | SIGCHLD;

	for (i = 1; (i < argc) && (argv[i][0] == '-'); i++) {
		if (strcmp(argv[i], "--") == 0) {
			break;
		}
		else if (((argc - i) >= 2) && (strcmp(argv[i], "-r") == 0)) {
			clone_flags |= CLONE_NEWNS;
			root = argv[i + 1];
			i++;
		}
		else if (((argc - i) >= 2) && (strcmp(argv[i], "-o") == 0)) {
			old = argv[i + 1];
			i++;
		}
		else if (strcmp(argv[i], "-n") == 0) {
			clone_flags |= CLONE_NEWNS;
		}
		else if (strcmp(argv[i], "--tty") == 0) {
			tty = 1;
		}
		else if (strcmp(argv[i], "--tty-force") == 0) {
			tty = 1; tty_force = 1;
		}
		else {
			die("Bad argument %s\n", argv[i]);
		}
	}
	cmd_argv = argv + i;
	if (cmd_argv[0] == NULL) {
		cmd_argv = shell_argv;
		shell_argv[0] = getenv("SHELL");
		shell_argv[1] = NULL;
	}
	if (cmd_argv[0] == NULL) {
		die("No command specified\n");
	}
#if 1
	fprintf(stderr, "cmd_argv: %s\n", cmd_argv[0]);
#endif
	if (root[0] != '/') {
		die("root path: '%s' not absolute\n", root);
	}
	if (old[0] != '/') {
		die("old path: '%s' not absolute\n", old);
	}
	
	pid = raw_clone(clone_flags, NULL, NULL, NULL, NULL);
	if (pid < 0) {
		fprintf(stderr, "clone_failed: pid: %d %d:%s\n", 
			pid, errno, strerror(errno));
		exit(2);
	}
	if (pid == 0) {
		/* In the child */
		int result;
		my_exit = _exit;

		/* FIXME allocate a process inside for controlling the new process space */

		fprintf(stderr, "pid: %d, ppid: %d pgrp: %d sid: %d\n",
			getpid(), getppid(), getpgid(0), getsid(0));
		/* If CLONE_NPSPACE isn't implemented exit */
		if (getpid() != 1) 
			die("CLONE_NPSPACE not implemented\n");
		if (clone_flags & CLONE_NEWNS) {
			struct statfs stfs;
			if (strcmp(root, "/") != 0) {
				char put_old[PATH_MAX];
				result = snprintf(put_old, sizeof(put_old), "%s%s", root, old);
				if (result >= sizeof(put_old))
					die("path name to long\n");
				if (result < 0)
					die("snprintf failed: %d:%s\n",
						errno, strerror(errno));
				
				/* Ensure I have a mount point at the directory I want to export */
				result = mount(root, root, NULL, MS_BIND | MS_REC, NULL);
				if (result < 0)
					die("bind of '%s' failed: %d:%s\n",
						root, errno, strerror(errno));
				
				/* Switch the mount points */
				result = raw_pivot_root(root, put_old);
				if (result < 0)
					die("pivot_root('%s', '%s') failed: %d:%s\n",
						root, put_old, errno, strerror(errno));
				
				/* Unmount all of the old mounts */
				result = umount2(old, MNT_DETACH);
				if (result < 0)
					die("umount2 of '%s' failed: %d:%s\n",
						put_old, errno, strerror(errno));
			}

			result = statfs("/proc", &stfs);
			if ((result == 0) && (stfs.f_type == PROC_SUPER_MAGIC))  {
				/* Unmount and remount proc so it reflects the new pid space */
				result = umount2("/proc", 0);
				if (result < 0) 
					die("umount failed: %d:%s\n", errno, strerror(errno));
				
				result = mount("proc", "/proc", "proc", 0, NULL);
				if (result < 0)
					die("mount failed: %d:%s\n",
						errno, strerror(errno));
			}
		}
		if (tty) {
			pid_t sid, pgrp;
			sid = setsid();
			if (sid < 0)
				die("setsid failed: %d:%s\n",
					errno, strerror(errno));
			fprintf(stderr, "pid: %d, ppid: %d pgrp: %d sid: %d\n",
				getpid(), getppid(), getpgid(0), getsid(0));

			result = ioctl(STDIN_FILENO, TIOCSCTTY, tty_force);
			if (result < 0)
				die("tiocsctty failed: %d:%s\n",
					errno, strerror(errno));

			pgrp = tcgetpgrp(STDIN_FILENO);
			
			fprintf(stderr, "pgrp: %d\n", pgrp);

			fprintf(stderr, "pid: %d, ppid: %d pgrp: %d sid: %d\n",
				getpid(), getppid(), getpgid(0), getsid(0));

		}
		result = execve(cmd_argv[0], cmd_argv, envp);
		die("execve of %s failed: %d:%s\n",
			cmd_argv[0], errno, strerror(errno));
	}
	/* In the parent */
	fprintf(stderr, "child pid: %d\n", pid);
	pid = waitpid(pid, &status, 0);
	fprintf(stderr, "pid: %d exited status: %d\n", 
		pid, status);
	if (pid < 0) {
		fprintf(stderr, "waitpid failed: %d %s\n",
			errno, strerror(errno));
		exit(9);
	}
	if (pid == 0) {
		fprintf(stderr, "waitpid returned no pid!\n");
		exit(10);
	}
	if (WIFEXITED(status)) {
		fprintf(stderr, "pid: %d exited: %d\n",
			pid, WEXITSTATUS(status));
	}
	if (WIFSIGNALED(status)) {
		fprintf(stderr, "pid: %d exited with a uncaught signal: %d %s\n",
			pid, WTERMSIG(status), strsignal(WTERMSIG(status)));
	}
	if (WIFSTOPPED(status)) {
		fprintf(stderr, "pid: %d stopped with signal: %d\n", 
			pid, WSTOPSIG(status));
	}
	return 0;
}
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Stuff]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]     [Linux Resources]
  Powered by Linux