[patch] removes MAX_ARG_PAGES

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



A while back, I sent out a preliminary patch
(http://thread.gmane.org/gmane.linux.ports.hppa/752) to remove the
MAX_ARG_PAGES limit on command line sizes.  Since then, Peter Zijlstra
and I have fixed a number of bugs and addressed the various
outstanding issues.

The attached patch incorporates the following changes:

- Fixes a BUG_ON() assertion failure discovered by Ingo Molnar.
- Adds CONFIG_STACK_GROWSUP (parisc) support.
- Adds auditing support.
- Reverts to the old behavior on architectures with no MMU.
- Fixes broken execution of 64-bit binaries from 32-bit binaries.
- Adds elf_fdpic support.
- Fixes cache coherency bugs.

We've tested the following architectures: i386, x86_64, um/i386,
parisc, and frv.  These are representative of the various scenarios
which this patch addresses, but other architecture teams should try it
out to make sure there aren't any unexpected gotchas.

Ollie
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index 26f15c4..022043e 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -60,8 +60,6 @@ int handle_page_fault(unsigned long address, unsigned long ip,
 		goto good_area;
 	else if(!(vma->vm_flags & VM_GROWSDOWN))
 		goto out;
-	else if(is_user && !ARCH_IS_STACKGROW(address))
-		goto out;
 	else if(expand_stack(vma, address))
 		goto out;
 
diff --git a/arch/x86_64/ia32/ia32_binfmt.c b/arch/x86_64/ia32/ia32_binfmt.c
index 185399b..a2731fc 100644
--- a/arch/x86_64/ia32/ia32_binfmt.c
+++ b/arch/x86_64/ia32/ia32_binfmt.c
@@ -232,9 +232,6 @@ do {							\
 #define load_elf_binary load_elf32_binary
 
 #define ELF_PLAT_INIT(r, load_addr)	elf32_init(r)
-#define setup_arg_pages(bprm, stack_top, exec_stack) \
-	ia32_setup_arg_pages(bprm, stack_top, exec_stack)
-int ia32_setup_arg_pages(struct linux_binprm *bprm, unsigned long stack_top, int executable_stack);
 
 #undef start_thread
 #define start_thread(regs,new_rip,new_rsp) do { \
@@ -289,55 +286,7 @@ static void elf32_init(struct pt_regs *regs)
 int ia32_setup_arg_pages(struct linux_binprm *bprm, unsigned long stack_top,
 			 int executable_stack)
 {
-	unsigned long stack_base;
-	struct vm_area_struct *mpnt;
-	struct mm_struct *mm = current->mm;
-	int i, ret;
-
-	stack_base = stack_top - MAX_ARG_PAGES * PAGE_SIZE;
-	mm->arg_start = bprm->p + stack_base;
-
-	bprm->p += stack_base;
-	if (bprm->loader)
-		bprm->loader += stack_base;
-	bprm->exec += stack_base;
-
-	mpnt = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
-	if (!mpnt) 
-		return -ENOMEM; 
-
-	down_write(&mm->mmap_sem);
-	{
-		mpnt->vm_mm = mm;
-		mpnt->vm_start = PAGE_MASK & (unsigned long) bprm->p;
-		mpnt->vm_end = stack_top;
-		if (executable_stack == EXSTACK_ENABLE_X)
-			mpnt->vm_flags = VM_STACK_FLAGS |  VM_EXEC;
-		else if (executable_stack == EXSTACK_DISABLE_X)
-			mpnt->vm_flags = VM_STACK_FLAGS & ~VM_EXEC;
-		else
-			mpnt->vm_flags = VM_STACK_FLAGS;
- 		mpnt->vm_page_prot = (mpnt->vm_flags & VM_EXEC) ? 
- 			PAGE_COPY_EXEC : PAGE_COPY;
-		if ((ret = insert_vm_struct(mm, mpnt))) {
-			up_write(&mm->mmap_sem);
-			kmem_cache_free(vm_area_cachep, mpnt);
-			return ret;
-		}
-		mm->stack_vm = mm->total_vm = vma_pages(mpnt);
-	} 
-
-	for (i = 0 ; i < MAX_ARG_PAGES ; i++) {
-		struct page *page = bprm->page[i];
-		if (page) {
-			bprm->page[i] = NULL;
-			install_arg_page(mpnt, page, stack_base);
-		}
-		stack_base += PAGE_SIZE;
-	}
-	up_write(&mm->mmap_sem);
-	
-	return 0;
+	return setup_arg_pages(bprm, stack_top, executable_stack);
 }
 EXPORT_SYMBOL(ia32_setup_arg_pages);
 
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 9cc4f0a..fa0cf77 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -254,8 +254,8 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
 		size_t len;
 		if (__put_user((elf_addr_t)p, argv++))
 			return -EFAULT;
-		len = strnlen_user((void __user *)p, PAGE_SIZE*MAX_ARG_PAGES);
-		if (!len || len > PAGE_SIZE*MAX_ARG_PAGES)
+		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
+		if (!len || len > MAX_ARG_STRLEN)
 			return 0;
 		p += len;
 	}
@@ -266,8 +266,8 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
 		size_t len;
 		if (__put_user((elf_addr_t)p, envp++))
 			return -EFAULT;
-		len = strnlen_user((void __user *)p, PAGE_SIZE*MAX_ARG_PAGES);
-		if (!len || len > PAGE_SIZE*MAX_ARG_PAGES)
+		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
+		if (!len || len > MAX_ARG_STRLEN)
 			return 0;
 		p += len;
 	}
@@ -777,10 +777,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 	}
 
 	/* OK, This is the point of no return */
-	current->mm->start_data = 0;
-	current->mm->end_data = 0;
-	current->mm->end_code = 0;
-	current->mm->mmap = NULL;
 	current->flags &= ~PF_FORKNOEXEC;
 	current->mm->def_flags = def_flags;
 
@@ -985,9 +981,13 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 
 	compute_creds(bprm);
 	current->flags &= ~PF_FORKNOEXEC;
-	create_elf_tables(bprm, &loc->elf_ex,
+	retval = create_elf_tables(bprm, &loc->elf_ex,
 			  (interpreter_type == INTERPRETER_AOUT),
 			  load_addr, interp_load_addr);
+	if (retval < 0) {
+		send_sig(SIGSEGV, current, 0);
+		goto out;
+	}
 	/* N.B. passed_fileno might not be initialized? */
 	if (interpreter_type == INTERPRETER_AOUT)
 		current->mm->arg_start += strlen(passed_fileno) + 1;
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index f3ddca4..f3dc3ca 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -622,8 +622,8 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm,
 	p = (char __user *) current->mm->arg_start;
 	for (loop = bprm->argc; loop > 0; loop--) {
 		__put_user((elf_caddr_t) p, argv++);
-		len = strnlen_user(p, PAGE_SIZE * MAX_ARG_PAGES);
-		if (!len || len > PAGE_SIZE * MAX_ARG_PAGES)
+		len = strnlen_user(p, MAX_ARG_STRLEN);
+		if (!len || len > MAX_ARG_STRLEN)
 			return -EINVAL;
 		p += len;
 	}
@@ -634,8 +634,8 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm,
 	current->mm->env_start = (unsigned long) p;
 	for (loop = bprm->envc; loop > 0; loop--) {
 		__put_user((elf_caddr_t)(unsigned long) p, envp++);
-		len = strnlen_user(p, PAGE_SIZE * MAX_ARG_PAGES);
-		if (!len || len > PAGE_SIZE * MAX_ARG_PAGES)
+		len = strnlen_user(p, MAX_ARG_STRLEN);
+		if (!len || len > MAX_ARG_STRLEN)
 			return -EINVAL;
 		p += len;
 	}
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index e6f5799..aa9ae99 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -126,7 +126,9 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 		goto _ret;
 
 	if (!(fmt->flags & MISC_FMT_PRESERVE_ARGV0)) {
-		remove_arg_zero(bprm);
+		retval = remove_arg_zero(bprm);
+		if (retval)
+			goto _ret;
 	}
 
 	if (fmt->flags & MISC_FMT_OPEN_BINARY) {
diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c
index 1edbcca..5b6309f 100644
--- a/fs/binfmt_script.c
+++ b/fs/binfmt_script.c
@@ -68,7 +68,9 @@ static int load_script(struct linux_binprm *bprm,struct pt_regs *regs)
 	 * This is done in reverse order, because of how the
 	 * user environment and arguments are stored.
 	 */
-	remove_arg_zero(bprm);
+	retval = remove_arg_zero(bprm);
+	if (retval)
+		return retval;
 	retval = copy_strings_kernel(1, &bprm->interp, bprm);
 	if (retval < 0) return retval; 
 	bprm->argc++;
diff --git a/fs/compat.c b/fs/compat.c
index 72e5e69..5279745 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1391,6 +1391,7 @@ static int compat_copy_strings(int argc, compat_uptr_t __user *argv,
 {
 	struct page *kmapped_page = NULL;
 	char *kaddr = NULL;
+	unsigned long kpos = 0;
 	int ret;
 
 	while (argc-- > 0) {
@@ -1399,92 +1400,84 @@ static int compat_copy_strings(int argc, compat_uptr_t __user *argv,
 		unsigned long pos;
 
 		if (get_user(str, argv+argc) ||
-			!(len = strnlen_user(compat_ptr(str), bprm->p))) {
+		    !(len = strnlen_user(compat_ptr(str), MAX_ARG_STRLEN))) {
 			ret = -EFAULT;
 			goto out;
 		}
 
-		if (bprm->p < len)  {
+		if (MAX_ARG_STRLEN < len) {
 			ret = -E2BIG;
 			goto out;
 		}
 
-		bprm->p -= len;
-		/* XXX: add architecture specific overflow check here. */
+		/* We're going to work our way backwords. */
 		pos = bprm->p;
+		str += len;
+		bprm->p -= len;
 
 		while (len > 0) {
-			int i, new, err;
 			int offset, bytes_to_copy;
-			struct page *page;
 
 			offset = pos % PAGE_SIZE;
-			i = pos/PAGE_SIZE;
-			page = bprm->page[i];
-			new = 0;
-			if (!page) {
-				page = alloc_page(GFP_HIGHUSER);
-				bprm->page[i] = page;
-				if (!page) {
-					ret = -ENOMEM;
+			if (offset == 0)
+				offset = PAGE_SIZE;
+
+			bytes_to_copy = offset;
+			if (bytes_to_copy > len)
+				bytes_to_copy = len;
+
+			offset -= bytes_to_copy;
+			pos -= bytes_to_copy;
+			str -= bytes_to_copy;
+			len -= bytes_to_copy;
+
+			if (!kmapped_page || kpos != (pos & PAGE_MASK)) {
+				struct page *page;
+
+#ifdef CONFIG_STACK_GROWSUP
+				ret = expand_downwards(bprm->vma, pos);
+				if (ret < 0) {
+					/* We've exceed the stack rlimit. */
+					ret = -E2BIG;
+					goto out;
+				}
+#endif
+				ret = get_user_pages(current, bprm->mm, pos,
+						     1, 1, 1, &page, NULL);
+				if (ret <= 0) {
+					/* We've exceed the stack rlimit. */
+					ret = -E2BIG;
 					goto out;
 				}
-				new = 1;
-			}
 
-			if (page != kmapped_page) {
-				if (kmapped_page)
+				if (kmapped_page) {
+					flush_kernel_dcache_page(kmapped_page);
 					kunmap(kmapped_page);
+					put_page(kmapped_page);
+				}
 				kmapped_page = page;
 				kaddr = kmap(kmapped_page);
+				kpos = pos & PAGE_MASK;
+				flush_cache_page(bprm->vma, kpos,
+						 page_to_pfn(kmapped_page));
 			}
-			if (new && offset)
-				memset(kaddr, 0, offset);
-			bytes_to_copy = PAGE_SIZE - offset;
-			if (bytes_to_copy > len) {
-				bytes_to_copy = len;
-				if (new)
-					memset(kaddr+offset+len, 0,
-						PAGE_SIZE-offset-len);
-			}
-			err = copy_from_user(kaddr+offset, compat_ptr(str),
-						bytes_to_copy);
-			if (err) {
+			if (copy_from_user(kaddr+offset, compat_ptr(str),
+						bytes_to_copy)) {
 				ret = -EFAULT;
 				goto out;
 			}
-
-			pos += bytes_to_copy;
-			str += bytes_to_copy;
-			len -= bytes_to_copy;
 		}
 	}
 	ret = 0;
 out:
-	if (kmapped_page)
+	if (kmapped_page) {
+		flush_kernel_dcache_page(kmapped_page);
 		kunmap(kmapped_page);
-	return ret;
-}
-
-#ifdef CONFIG_MMU
-
-#define free_arg_pages(bprm) do { } while (0)
-
-#else
-
-static inline void free_arg_pages(struct linux_binprm *bprm)
-{
-	int i;
-
-	for (i = 0; i < MAX_ARG_PAGES; i++) {
-		if (bprm->page[i])
-			__free_page(bprm->page[i]);
-		bprm->page[i] = NULL;
+		put_page(kmapped_page);
 	}
+	return ret;
 }
 
-#endif /* CONFIG_MMU */
-
 /*
  * compat_do_execve() is mostly a copy of do_execve(), with the exception
  * that it processes 32 bit argv and envp pointers.
@@ -1497,7 +1490,6 @@ int compat_do_execve(char * filename,
 	struct linux_binprm *bprm;
 	struct file *file;
 	int retval;
-	int i;
 
 	retval = -ENOMEM;
 	bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
@@ -1511,24 +1503,19 @@ int compat_do_execve(char * filename,
 
 	sched_exec();
 
-	bprm->p = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *);
 	bprm->file = file;
 	bprm->filename = filename;
 	bprm->interp = filename;
-	bprm->mm = mm_alloc();
-	retval = -ENOMEM;
-	if (!bprm->mm)
-		goto out_file;
 
-	retval = init_new_context(current, bprm->mm);
-	if (retval < 0)
-		goto out_mm;
+	retval = bprm_mm_init(bprm);
+	if (retval)
+		goto out_file;
 
-	bprm->argc = compat_count(argv, bprm->p / sizeof(compat_uptr_t));
+	bprm->argc = compat_count(argv, MAX_ARG_STRINGS);
 	if ((retval = bprm->argc) < 0)
 		goto out_mm;
 
-	bprm->envc = compat_count(envp, bprm->p / sizeof(compat_uptr_t));
+	bprm->envc = compat_count(envp, MAX_ARG_STRINGS);
 	if ((retval = bprm->envc) < 0)
 		goto out_mm;
 
@@ -1553,10 +1540,8 @@ int compat_do_execve(char * filename,
 	if (retval < 0)
 		goto out;
 
-	retval = search_binary_handler(bprm, regs);
+	retval = search_binary_handler(bprm,regs);
 	if (retval >= 0) {
-		free_arg_pages(bprm);
-
 		/* execve success */
 		security_bprm_free(bprm);
 		acct_update_integrals(current);
@@ -1565,19 +1550,12 @@ int compat_do_execve(char * filename,
 	}
 
 out:
-	/* Something went wrong, return the inode and free the argument pages*/
-	for (i = 0 ; i < MAX_ARG_PAGES ; i++) {
-		struct page * page = bprm->page[i];
-		if (page)
-			__free_page(page);
-	}
-
 	if (bprm->security)
 		security_bprm_free(bprm);
 
 out_mm:
 	if (bprm->mm)
-		mmdrop(bprm->mm);
+		mmput (bprm->mm);
 
 out_file:
 	if (bprm->file) {
diff --git a/fs/exec.c b/fs/exec.c
index 3155e91..8637630 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -53,6 +53,7 @@
 
 #include <asm/uaccess.h>
 #include <asm/mmu_context.h>
+#include <asm/tlb.h>
 
 #ifdef CONFIG_KMOD
 #include <linux/kmod.h>
@@ -173,6 +174,153 @@ exit:
 	goto out;
 }
 
+#ifdef CONFIG_MMU
+
+static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
+		int write)
+{
+	struct page *page;
+	int ret;
+
+#ifdef CONFIG_STACK_GROWSUP
+	if (write) {
+		ret = expand_downwards(bprm->vma, pos);
+		if (ret < 0)
+			return NULL;
+	}
+#endif
+	ret = get_user_pages(current, bprm->mm, pos,
+			1, write, 1, &page, NULL);
+	if (ret <= 0)
+		return NULL;
+
+	return page;
+}
+
+static void put_arg_page(struct page *page)
+{
+	put_page(page);
+}
+
+static void free_arg_page(struct linux_binprm *bprm, int i)
+{
+}
+
+static void free_arg_pages(struct linux_binprm *bprm)
+{
+}
+
+#else
+
+static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
+		int write)
+{
+	struct page *page;
+
+	page = bprm->page[pos / PAGE_SIZE];
+	if (!page && write) {
+		page = alloc_page(GFP_HIGHUSER|__GFP_ZERO);
+		if (!page)
+			return NULL;
+		bprm->page[pos / PAGE_SIZE] = page;
+	}
+
+	return page;
+}
+
+static void put_arg_page(struct page *page)
+{
+}
+
+static void free_arg_page(struct linux_binprm *bprm, int i)
+{
+	if (bprm->page[i]) {
+		__free_page(bprm->page[i]);
+		bprm->page[i] = NULL;
+	}
+}
+
+static void free_arg_pages(struct linux_binprm *bprm)
+{
+	int i;
+
+	for (i = 0; i < MAX_ARG_PAGES; i++)
+		free_arg_page(bprm, i);
+}
+
+#endif /* CONFIG_MMU */
+
+/* Create a new mm_struct and populate it with a temporary stack
+ * vm_area_struct.  We don't have enough context at this point to set the
+ * stack flags, permissions, and offset, so we use temporary values.  We'll
+ * update them later in setup_arg_pages(). */
+int bprm_mm_init(struct linux_binprm *bprm)
+{
+	int err;
+	struct mm_struct *mm = NULL;
+	struct vm_area_struct *vma = NULL;
+
+	bprm->mm = mm = mm_alloc();
+	err = -ENOMEM;
+	if (!mm)
+		goto err;
+
+	if ((err = init_new_context(current, mm)))
+		goto err;
+
+#ifdef CONFIG_MMU
+	bprm->vma = vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
+	err = -ENOMEM;
+	if (!vma)
+		goto err;
+
+	down_write(&mm->mmap_sem);
+	{
+		vma->vm_mm = mm;
+
+		/* Place the stack at the top of user memory.  Later, we'll
+		 * move this to an appropriate place.  We don't use STACK_TOP
+		 * because that can depend on attributes which aren't
+		 * configured yet. */
+		vma->vm_end = TASK_SIZE;
+		vma->vm_start = vma->vm_end - PAGE_SIZE;
+
+		vma->vm_flags = VM_STACK_FLAGS;
+		vma->vm_page_prot = protection_map[vma->vm_flags & 0x7];
+		if ((err = insert_vm_struct(mm, vma))) {
+			up_write(&mm->mmap_sem);
+			goto err;
+		}
+
+		mm->stack_vm = mm->total_vm = 1;
+	}
+	up_write(&mm->mmap_sem);
+
+	bprm->p = vma->vm_end - sizeof(void *);
+#else
+	bprm->p = PAGE_SIZE * MAX_ARG_PAGES - sizeof(void *);
+#endif
+
+	return 0;
+
+err:
+#ifdef CONFIG_MMU
+	if (vma) {
+		bprm->vma = NULL;
+		kmem_cache_free(vm_area_cachep, vma);
+	}
+#endif
+
+	if (mm) {
+		bprm->mm = NULL;
+		mmdrop(mm);
+	}
+
+	return err;
+}
+
+EXPORT_SYMBOL(bprm_mm_init);
+
 /*
  * count() counts the number of strings in array ARGV.
  */
@@ -198,15 +346,16 @@ static int count(char __user * __user * argv, int max)
 }
 
 /*
- * 'copy_strings()' copies argument/environment strings from user
- * memory to free pages in kernel mem. These are in a format ready
- * to be put directly into the top of new user memory.
+ * 'copy_strings()' copies argument/environment strings from the old
+ * processes's memory to the new process's stack.  The call to get_user_pages()
+ * ensures the destination page is created and not swapped out.
  */
 static int copy_strings(int argc, char __user * __user * argv,
 			struct linux_binprm *bprm)
 {
 	struct page *kmapped_page = NULL;
 	char *kaddr = NULL;
+	unsigned long kpos = 0;
 	int ret;
 
 	while (argc-- > 0) {
@@ -215,69 +364,77 @@ static int copy_strings(int argc, char __user * __user * argv,
 		unsigned long pos;
 
 		if (get_user(str, argv+argc) ||
-				!(len = strnlen_user(str, bprm->p))) {
+				!(len = strnlen_user(str, MAX_ARG_STRLEN))) {
 			ret = -EFAULT;
 			goto out;
 		}
 
-		if (bprm->p < len)  {
+#ifdef CONFIG_MMU
+		if (MAX_ARG_STRLEN < len) {
 			ret = -E2BIG;
 			goto out;
 		}
+#else
+		if (bprm->p < len) {
+			ret = -E2BIG;
+			goto out;
+		}
+#endif
 
-		bprm->p -= len;
-		/* XXX: add architecture specific overflow check here. */
+		/* We're going to work our way backwords. */
 		pos = bprm->p;
+		str += len;
+		bprm->p -= len;
 
 		while (len > 0) {
-			int i, new, err;
 			int offset, bytes_to_copy;
-			struct page *page;
 
 			offset = pos % PAGE_SIZE;
-			i = pos/PAGE_SIZE;
-			page = bprm->page[i];
-			new = 0;
-			if (!page) {
-				page = alloc_page(GFP_HIGHUSER);
-				bprm->page[i] = page;
+			if (offset == 0)
+				offset = PAGE_SIZE;
+
+			bytes_to_copy = offset;
+			if (bytes_to_copy > len)
+				bytes_to_copy = len;
+
+			offset -= bytes_to_copy;
+			pos -= bytes_to_copy;
+			str -= bytes_to_copy;
+			len -= bytes_to_copy;
+
+			if (!kmapped_page || kpos != (pos & PAGE_MASK)) {
+				struct page *page;
+
+				page = get_arg_page(bprm, pos, 1);
 				if (!page) {
-					ret = -ENOMEM;
+					ret = -E2BIG;
 					goto out;
 				}
-				new = 1;
-			}
 
-			if (page != kmapped_page) {
-				if (kmapped_page)
+				if (kmapped_page) {
+					flush_kernel_dcache_page(kmapped_page);
 					kunmap(kmapped_page);
+					put_arg_page(kmapped_page);
+				}
 				kmapped_page = page;
 				kaddr = kmap(kmapped_page);
+				kpos = pos & PAGE_MASK;
+				flush_cache_page(bprm->vma, kpos,
+						 page_to_pfn(kmapped_page));
 			}
-			if (new && offset)
-				memset(kaddr, 0, offset);
-			bytes_to_copy = PAGE_SIZE - offset;
-			if (bytes_to_copy > len) {
-				bytes_to_copy = len;
-				if (new)
-					memset(kaddr+offset+len, 0,
-						PAGE_SIZE-offset-len);
-			}
-			err = copy_from_user(kaddr+offset, str, bytes_to_copy);
-			if (err) {
+			if (copy_from_user(kaddr+offset, str, bytes_to_copy)) {
 				ret = -EFAULT;
 				goto out;
 			}
-
-			pos += bytes_to_copy;
-			str += bytes_to_copy;
-			len -= bytes_to_copy;
 		}
 	}
 	ret = 0;
 out:
-	if (kmapped_page)
+	if (kmapped_page) {
+		flush_kernel_dcache_page(kmapped_page);
 		kunmap(kmapped_page);
+		put_arg_page(kmapped_page);
+	}
 	return ret;
 }
 
@@ -297,154 +454,157 @@ int copy_strings_kernel(int argc,char ** argv, struct linux_binprm *bprm)
 EXPORT_SYMBOL(copy_strings_kernel);
 
 #ifdef CONFIG_MMU
-/*
- * This routine is used to map in a page into an address space: needed by
- * execve() for the initial stack and environment pages.
- *
- * vma->vm_mm->mmap_sem is held for writing.
- */
-void install_arg_page(struct vm_area_struct *vma,
-			struct page *page, unsigned long address)
+
+static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
 {
 	struct mm_struct *mm = vma->vm_mm;
-	pte_t * pte;
-	spinlock_t *ptl;
+	unsigned long old_start = vma->vm_start;
+	unsigned long old_end = vma->vm_end;
+	unsigned long length = old_end - old_start;
+	unsigned long new_start = old_start + shift;
+	unsigned long new_end = old_end + shift;
+	struct mmu_gather *tlb;
+
+	BUG_ON(new_start > new_end);
+
+	if (new_start < old_start) {
+		if (vma != find_vma(mm, new_start))
+			return -EFAULT;
+
+		vma_adjust(vma, new_start, old_end,
+			   vma->vm_pgoff - (-shift >> PAGE_SHIFT), NULL);
+
+		if (length != move_page_tables(vma, old_start,
+					       vma, new_start, length))
+			return -ENOMEM;
+		
+		lru_add_drain();
+		tlb = tlb_gather_mmu(mm, 0);
+		free_pgd_range(&tlb, new_end, old_end, new_end,
+			       vma->vm_next ? vma->vm_next->vm_start : 0);
+		tlb_finish_mmu(tlb, new_end, old_end);
+
+		vma_adjust(vma, new_start, new_end, vma->vm_pgoff, NULL);
+	} else {
+		struct vm_area_struct *tmp, *prev;
+		
+		tmp = find_vma_prev(mm, new_end, &prev);
+		if ((tmp && tmp->vm_start < new_end) || prev != vma)
+			return -EFAULT;
 
-	if (unlikely(anon_vma_prepare(vma)))
-		goto out;
+		find_vma_prev(mm, vma->vm_start, &prev);
 
-	flush_dcache_page(page);
-	pte = get_locked_pte(mm, address, &ptl);
-	if (!pte)
-		goto out;
-	if (!pte_none(*pte)) {
-		pte_unmap_unlock(pte, ptl);
-		goto out;
+		vma_adjust(vma, old_start, new_end, vma->vm_pgoff, NULL);
+
+		if (length != move_page_tables_up(vma, old_start,
+					       vma, new_start, length))
+			return -ENOMEM;
+
+		lru_add_drain();
+		tlb = tlb_gather_mmu(mm, 0);
+		free_pgd_range(&tlb, old_start, new_start, 
+			       prev ? prev->vm_end: 0, new_start);
+		tlb_finish_mmu(tlb, old_start, new_start);
+
+		vma_adjust(vma, new_start, new_end,
+			   vma->vm_pgoff + (shift >> PAGE_SHIFT), NULL);
 	}
-	inc_mm_counter(mm, anon_rss);
-	lru_cache_add_active(page);
-	set_pte_at(mm, address, pte, pte_mkdirty(pte_mkwrite(mk_pte(
-					page, vma->vm_page_prot))));
-	page_add_new_anon_rmap(page, vma, address);
-	pte_unmap_unlock(pte, ptl);
-
-	/* no need for flush_tlb */
-	return;
-out:
-	__free_page(page);
-	force_sig(SIGKILL, current);
+
+	return 0;
 }
 
 #define EXTRA_STACK_VM_PAGES	20	/* random */
 
+/* Finalizes the stack vm_area_struct.  The flags and permissions are updated,
+ * the stack is optionally relocated, and some extra space is added.
+ */
 int setup_arg_pages(struct linux_binprm *bprm,
 		    unsigned long stack_top,
 		    int executable_stack)
 {
-	unsigned long stack_base;
-	struct vm_area_struct *mpnt;
+	unsigned long ret;
+	unsigned long stack_base, stack_shift;
 	struct mm_struct *mm = current->mm;
-	int i, ret;
-	long arg_size;
+	struct vm_area_struct *vma = bprm->vma;
 
 #ifdef CONFIG_STACK_GROWSUP
-	/* Move the argument and environment strings to the bottom of the
-	 * stack space.
-	 */
-	int offset, j;
-	char *to, *from;
-
-	/* Start by shifting all the pages down */
-	i = 0;
-	for (j = 0; j < MAX_ARG_PAGES; j++) {
-		struct page *page = bprm->page[j];
-		if (!page)
-			continue;
-		bprm->page[i++] = page;
-	}
-
-	/* Now move them within their pages */
-	offset = bprm->p % PAGE_SIZE;
-	to = kmap(bprm->page[0]);
-	for (j = 1; j < i; j++) {
-		memmove(to, to + offset, PAGE_SIZE - offset);
-		from = kmap(bprm->page[j]);
-		memcpy(to + PAGE_SIZE - offset, from, offset);
-		kunmap(bprm->page[j - 1]);
-		to = from;
-	}
-	memmove(to, to + offset, PAGE_SIZE - offset);
-	kunmap(bprm->page[j - 1]);
-
 	/* Limit stack size to 1GB */
 	stack_base = current->signal->rlim[RLIMIT_STACK].rlim_max;
 	if (stack_base > (1 << 30))
 		stack_base = 1 << 30;
 	stack_base = PAGE_ALIGN(stack_top - stack_base);
 
-	/* Adjust bprm->p to point to the end of the strings. */
-	bprm->p = stack_base + PAGE_SIZE * i - offset;
-
-	mm->arg_start = stack_base;
-	arg_size = i << PAGE_SHIFT;
+	/* Make sure we didn't let the argument array grow too large. */
+	if (vma->vm_end - vma->vm_start > STACK_TOP - stack_base)
+		return -ENOMEM;
 
-	/* zero pages that were copied above */
-	while (i < MAX_ARG_PAGES)
-		bprm->page[i++] = NULL;
+	stack_shift = stack_base - vma->vm_start;
+	mm->arg_start = bprm->p + stack_shift;
+	bprm->p = vma->vm_end + stack_shift;
 #else
-	stack_base = arch_align_stack(stack_top - MAX_ARG_PAGES*PAGE_SIZE);
+	BUG_ON(stack_top > STACK_TOP);
+	BUG_ON(stack_top & ~PAGE_MASK);
+
+	stack_base = arch_align_stack(stack_top - mm->stack_vm*PAGE_SIZE);
 	stack_base = PAGE_ALIGN(stack_base);
-	bprm->p += stack_base;
+
+	/* Make sure we didn't let the argument array grow too large. */
+	if (stack_base > stack_top)
+		return -ENOMEM;
+
+	stack_shift = stack_base - (bprm->p & PAGE_MASK);
+	bprm->p += stack_shift;
 	mm->arg_start = bprm->p;
-	arg_size = stack_top - (PAGE_MASK & (unsigned long) mm->arg_start);
 #endif
 
-	arg_size += EXTRA_STACK_VM_PAGES * PAGE_SIZE;
-
 	if (bprm->loader)
-		bprm->loader += stack_base;
-	bprm->exec += stack_base;
-
-	mpnt = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
-	if (!mpnt)
-		return -ENOMEM;
+		bprm->loader += stack_shift;
+	bprm->exec += stack_shift;
 
 	down_write(&mm->mmap_sem);
 	{
-		mpnt->vm_mm = mm;
-#ifdef CONFIG_STACK_GROWSUP
-		mpnt->vm_start = stack_base;
-		mpnt->vm_end = stack_base + arg_size;
-#else
-		mpnt->vm_end = stack_top;
-		mpnt->vm_start = mpnt->vm_end - arg_size;
-#endif
+		struct vm_area_struct *prev = NULL;
+		unsigned long vm_flags = vma->vm_flags;
+
 		/* Adjust stack execute permissions; explicitly enable
 		 * for EXSTACK_ENABLE_X, disable for EXSTACK_DISABLE_X
 		 * and leave alone (arch default) otherwise. */
 		if (unlikely(executable_stack == EXSTACK_ENABLE_X))
-			mpnt->vm_flags = VM_STACK_FLAGS |  VM_EXEC;
+			vm_flags |= VM_EXEC;
 		else if (executable_stack == EXSTACK_DISABLE_X)
-			mpnt->vm_flags = VM_STACK_FLAGS & ~VM_EXEC;
-		else
-			mpnt->vm_flags = VM_STACK_FLAGS;
-		mpnt->vm_flags |= mm->def_flags;
-		mpnt->vm_page_prot = protection_map[mpnt->vm_flags & 0x7];
-		if ((ret = insert_vm_struct(mm, mpnt))) {
+			vm_flags &= ~VM_EXEC;
+		vm_flags |= mm->def_flags;
+
+		ret = mprotect_fixup(vma, &prev, vma->vm_start, vma->vm_end,
+				vm_flags);
+		if (ret) {
 			up_write(&mm->mmap_sem);
-			kmem_cache_free(vm_area_cachep, mpnt);
 			return ret;
 		}
-		mm->stack_vm = mm->total_vm = vma_pages(mpnt);
-	}
+		BUG_ON(prev != vma);
+
+		/* Move stack pages down in memory. */
+		if (stack_shift) {
+			ret = shift_arg_pages(vma, stack_shift);
+			if (ret) {
+				up_write(&mm->mmap_sem);
+				return ret;
+			}
+		}
 
-	for (i = 0 ; i < MAX_ARG_PAGES ; i++) {
-		struct page *page = bprm->page[i];
-		if (page) {
-			bprm->page[i] = NULL;
-			install_arg_page(mpnt, page, stack_base);
+#ifdef CONFIG_STACK_GROWSUP
+		if (expand_stack(vma, vma->vm_end +
+					EXTRA_STACK_VM_PAGES * PAGE_SIZE)) {
+			up_write(&mm->mmap_sem);
+			return -EFAULT;
+		}
+#else
+		if (expand_stack(vma, stack_base -
+					EXTRA_STACK_VM_PAGES * PAGE_SIZE)) {
+			up_write(&mm->mmap_sem);
+			return -EFAULT;
 		}
-		stack_base += PAGE_SIZE;
+#endif
 	}
 	up_write(&mm->mmap_sem);
 	
@@ -453,21 +613,6 @@ int setup_arg_pages(struct linux_binprm *bprm,
 
 EXPORT_SYMBOL(setup_arg_pages);
 
-#define free_arg_pages(bprm) do { } while (0)
-
-#else
-
-static inline void free_arg_pages(struct linux_binprm *bprm)
-{
-	int i;
-
-	for (i = 0; i < MAX_ARG_PAGES; i++) {
-		if (bprm->page[i])
-			__free_page(bprm->page[i]);
-		bprm->page[i] = NULL;
-	}
-}
-
 #endif /* CONFIG_MMU */
 
 struct file *open_exec(const char *name)
@@ -985,28 +1130,47 @@ void compute_creds(struct linux_binprm *bprm)
 
 EXPORT_SYMBOL(compute_creds);
 
-void remove_arg_zero(struct linux_binprm *bprm)
+/*
+ * Arguments are '\0' separated strings found at the location bprm->p
+ * points to; chop off the first by relocating brpm->p to right after
+ * the first '\0' encountered.
+ */
+int remove_arg_zero(struct linux_binprm *bprm)
 {
-	if (bprm->argc) {
-		unsigned long offset;
-		char * kaddr;
-		struct page *page;
+	int ret = 0;
+	unsigned long offset;
+	char *kaddr;
+	struct page *page;
 
-		offset = bprm->p % PAGE_SIZE;
-		goto inside;
+	if (!bprm->argc)
+		return 0;
 
-		while (bprm->p++, *(kaddr+offset++)) {
-			if (offset != PAGE_SIZE)
-				continue;
-			offset = 0;
-			kunmap_atomic(kaddr, KM_USER0);
-inside:
-			page = bprm->page[bprm->p/PAGE_SIZE];
-			kaddr = kmap_atomic(page, KM_USER0);
+	do {
+		offset = bprm->p & ~PAGE_MASK;
+		page = get_arg_page(bprm, bprm->p, 0);
+		if (!page) {
+			ret = -EFAULT;
+			goto out;
 		}
+		kaddr = kmap_atomic(page, KM_USER0);
+
+		for (; offset < PAGE_SIZE && kaddr[offset];
+				offset++, bprm->p++)
+			;
+
 		kunmap_atomic(kaddr, KM_USER0);
-		bprm->argc--;
-	}
+		put_arg_page(page);
+
+		if (offset == PAGE_SIZE)
+			free_arg_page(bprm, (bprm->p >> PAGE_SHIFT) - 1);
+	} while (offset == PAGE_SIZE);
+
+	bprm->p++;
+	bprm->argc--;
+	ret = 0;
+
+out:
+	return ret;
 }
 
 EXPORT_SYMBOL(remove_arg_zero);
@@ -1033,7 +1197,7 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
 		fput(bprm->file);
 		bprm->file = NULL;
 
-	        loader = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *);
+	        loader = bprm->vma->vm_end - sizeof(void *);
 
 		file = open_exec("/sbin/loader");
 		retval = PTR_ERR(file);
@@ -1125,8 +1289,8 @@ int do_execve(char * filename,
 {
 	struct linux_binprm *bprm;
 	struct file *file;
+	unsigned long tmp;
 	int retval;
-	int i;
 
 	retval = -ENOMEM;
 	bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
@@ -1140,25 +1304,19 @@ int do_execve(char * filename,
 
 	sched_exec();
 
-	bprm->p = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *);
-
 	bprm->file = file;
 	bprm->filename = filename;
 	bprm->interp = filename;
-	bprm->mm = mm_alloc();
-	retval = -ENOMEM;
-	if (!bprm->mm)
-		goto out_file;
 
-	retval = init_new_context(current, bprm->mm);
-	if (retval < 0)
-		goto out_mm;
+	retval = bprm_mm_init(bprm);
+	if (retval)
+		goto out_file;
 
-	bprm->argc = count(argv, bprm->p / sizeof(void *));
+	bprm->argc = count(argv, MAX_ARG_STRINGS);
 	if ((retval = bprm->argc) < 0)
 		goto out_mm;
 
-	bprm->envc = count(envp, bprm->p / sizeof(void *));
+	bprm->envc = count(envp, MAX_ARG_STRINGS);
 	if ((retval = bprm->envc) < 0)
 		goto out_mm;
 
@@ -1179,15 +1337,16 @@ int do_execve(char * filename,
 	if (retval < 0)
 		goto out;
 
+	tmp = bprm->p;
 	retval = copy_strings(bprm->argc, argv, bprm);
 	if (retval < 0)
 		goto out;
+	bprm->argv_len = tmp - bprm->p;
 
 	retval = search_binary_handler(bprm,regs);
 	if (retval >= 0) {
-		free_arg_pages(bprm);
-
 		/* execve success */
+		free_arg_pages(bprm);
 		security_bprm_free(bprm);
 		acct_update_integrals(current);
 		kfree(bprm);
@@ -1195,26 +1354,19 @@ int do_execve(char * filename,
 	}
 
 out:
-	/* Something went wrong, return the inode and free the argument pages*/
-	for (i = 0 ; i < MAX_ARG_PAGES ; i++) {
-		struct page * page = bprm->page[i];
-		if (page)
-			__free_page(page);
-	}
-
+	free_arg_pages(bprm);
 	if (bprm->security)
 		security_bprm_free(bprm);
 
 out_mm:
 	if (bprm->mm)
-		mmdrop(bprm->mm);
+		mmput (bprm->mm);
 
 out_file:
 	if (bprm->file) {
 		allow_write_access(bprm->file);
 		fput(bprm->file);
 	}
-
 out_kfree:
 	kfree(bprm);
 
diff --git a/include/asm-um/processor-i386.h b/include/asm-um/processor-i386.h
index 595f1c3..869c236 100644
--- a/include/asm-um/processor-i386.h
+++ b/include/asm-um/processor-i386.h
@@ -67,9 +67,6 @@ static inline void rep_nop(void)
 #define current_text_addr() \
 	({ void *pc; __asm__("movl $1f,%0\n1:":"=g" (pc)); pc; })
 
-#define ARCH_IS_STACKGROW(address) \
-       (address + 32 >= UPT_SP(&current->thread.regs.regs))
-
 #define KSTK_EIP(tsk) KSTK_REG(tsk, EIP)
 #define KSTK_ESP(tsk) KSTK_REG(tsk, UESP)
 #define KSTK_EBP(tsk) KSTK_REG(tsk, EBP)
diff --git a/include/asm-um/processor-x86_64.h b/include/asm-um/processor-x86_64.h
index 31c2d4d..64d7bdd 100644
--- a/include/asm-um/processor-x86_64.h
+++ b/include/asm-um/processor-x86_64.h
@@ -44,9 +44,6 @@ static inline void arch_copy_thread(struct arch_thread *from,
 #define current_text_addr() \
 	({ void *pc; __asm__("movq $1f,%0\n1:":"=g" (pc)); pc; })
 
-#define ARCH_IS_STACKGROW(address) \
-        (address + 128 >= UPT_SP(&current->thread.regs.regs))
-
 #define KSTK_EIP(tsk) KSTK_REG(tsk, RIP)
 #define KSTK_ESP(tsk) KSTK_REG(tsk, RSP)
 
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index 2d956cd..8ac2277 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -5,12 +5,9 @@
 
 struct pt_regs;
 
-/*
- * MAX_ARG_PAGES defines the number of pages allocated for arguments
- * and envelope for the new program. 32 should suffice, this gives
- * a maximum env+arg of 128kB w/4KB pages!
- */
-#define MAX_ARG_PAGES 32
+/* FIXME: Find real limits, or none. */
+#define MAX_ARG_STRLEN (PAGE_SIZE * 32)
+#define MAX_ARG_STRINGS 0x7FFFFFFF
 
 /* sizeof(linux_binprm->buf) */
 #define BINPRM_BUF_SIZE 128
@@ -22,7 +19,12 @@ struct pt_regs;
  */
 struct linux_binprm{
 	char buf[BINPRM_BUF_SIZE];
+#ifdef CONFIG_MMU
+	struct vm_area_struct *vma;
+#else
+# define MAX_ARG_PAGES	32
 	struct page *page[MAX_ARG_PAGES];
+#endif
 	struct mm_struct *mm;
 	unsigned long p; /* current top of mem */
 	int sh_bang;
@@ -38,6 +40,7 @@ struct linux_binprm{
 	unsigned interp_flags;
 	unsigned interp_data;
 	unsigned long loader, exec;
+	unsigned long argv_len;
 };
 
 #define BINPRM_FLAGS_ENFORCE_NONDUMP_BIT 0
@@ -66,7 +69,7 @@ extern int register_binfmt(struct linux_binfmt *);
 extern int unregister_binfmt(struct linux_binfmt *);
 
 extern int prepare_binprm(struct linux_binprm *);
-extern void remove_arg_zero(struct linux_binprm *);
+extern int __must_check remove_arg_zero(struct linux_binprm *);
 extern int search_binary_handler(struct linux_binprm *,struct pt_regs *);
 extern int flush_old_exec(struct linux_binprm * bprm);
 
@@ -83,6 +86,7 @@ extern int suid_dumpable;
 extern int setup_arg_pages(struct linux_binprm * bprm,
 			   unsigned long stack_top,
 			   int executable_stack);
+extern int bprm_mm_init(struct linux_binprm *bprm);
 extern int copy_strings_kernel(int argc,char ** argv,struct linux_binprm *bprm);
 extern void compute_creds(struct linux_binprm *binprm);
 extern int do_coredump(long signr, int exit_code, struct pt_regs * regs);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 60e0e4a..ceee062 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -771,7 +771,6 @@ static inline int handle_mm_fault(struct mm_struct *mm,
 
 extern int make_pages_present(unsigned long addr, unsigned long end);
 extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write);
-void install_arg_page(struct vm_area_struct *, struct page *, unsigned long);
 
 int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start,
 		int len, int write, int force, struct page **pages, struct vm_area_struct **vmas);
@@ -788,9 +787,18 @@ int FASTCALL(set_page_dirty(struct page *page));
 int set_page_dirty_lock(struct page *page);
 int clear_page_dirty_for_io(struct page *page);
 
+extern unsigned long move_page_tables(struct vm_area_struct *vma,
+		unsigned long old_addr, struct vm_area_struct *new_vma,
+		unsigned long new_addr, unsigned long len);
+extern unsigned long move_page_tables_up(struct vm_area_struct *vma,
+		unsigned long old_addr, struct vm_area_struct *new_vma,
+		unsigned long new_addr, unsigned long len);
 extern unsigned long do_mremap(unsigned long addr,
 			       unsigned long old_len, unsigned long new_len,
 			       unsigned long flags, unsigned long new_addr);
+extern int mprotect_fixup(struct vm_area_struct *vma,
+			  struct vm_area_struct **pprev, unsigned long start,
+			  unsigned long end, unsigned long newflags);
 
 /*
  * Prototype to add a shrinker callback for ageable caches.
@@ -1091,6 +1099,9 @@ extern int expand_stack(struct vm_area_struct *vma, unsigned long address);
 #ifdef CONFIG_IA64
 extern int expand_upwards(struct vm_area_struct *vma, unsigned long address);
 #endif
+#ifdef CONFIG_STACK_GROWSUP
+extern int expand_downwards(struct vm_area_struct *vma, unsigned long address);
+#endif
 
 /* Look up the first VMA which satisfies  addr < vm_end,  NULL if none. */
 extern struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr);
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 47f1c53..b97ca11 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -165,6 +165,7 @@ enum
 	KERN_MAX_LOCK_DEPTH=74,
 	KERN_NMI_WATCHDOG=75, /* int: enable/disable nmi watchdog */
 	KERN_PANIC_ON_NMI=76, /* int: whether we will panic on an unrecovered */
+	KERN_AUDIT_ARGV=77, /* int: max size of argv array for audit logging */
 };
 
 
diff --git a/kernel/audit.c b/kernel/audit.c
index 4e9d208..9b08f55 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -170,6 +170,22 @@ void audit_panic(const char *message)
 	}
 }
 
+void audit_kill(const char *message)
+{
+	switch (audit_failure)
+	{
+	case AUDIT_FAIL_SILENT:
+		break;
+	case AUDIT_FAIL_PRINTK:
+		printk(KERN_ERR "audit: %s\n", message);
+		break;
+	case AUDIT_FAIL_PANIC:
+		printk(KERN_ERR "audit: %s\n", message);
+		send_sig(SIGKILL, current, 0);
+		break;
+	}
+}
+
 static inline int audit_rate_check(void)
 {
 	static unsigned long	last_check = 0;
diff --git a/kernel/audit.h b/kernel/audit.h
index a337023..9cad5ce 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -116,6 +116,7 @@ extern void		    audit_send_reply(int pid, int seq, int type,
 					     void *payload, int size);
 extern void		    audit_log_lost(const char *message);
 extern void		    audit_panic(const char *message);
+extern void		    audit_kill(const char *message);
 
 struct audit_netlink_list {
 	int pid;
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 628c7ac..402252e 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -155,7 +155,7 @@ struct audit_aux_data_execve {
 	struct audit_aux_data	d;
 	int argc;
 	int envc;
-	char mem[0];
+	struct mm_struct *mm;
 };
 
 struct audit_aux_data_socketcall {
@@ -795,6 +795,48 @@ static void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk
 	audit_log_task_context(ab);
 }
 
+static void audit_log_execve_info(struct audit_buffer *ab,
+		struct audit_aux_data_execve *axi)
+{
+	int i;
+	long len;
+	const char __user *p = (const char __user *)axi->mm->arg_start;
+
+	if (axi->mm != current->mm)
+		return; /* execve failed, no additional info */
+
+	for (i = 0; i < axi->argc; i++, p += len) {
+		long ret;
+		char *tmp;
+
+		len = strnlen_user(p, MAX_ARG_STRLEN);
+		/*
+		 * We just created this mm, if we can't find the strings
+		 * we just copied in something is _very_ wrong.
+		 */
+		BUG_ON(!len);
+
+		tmp = kmalloc(len, GFP_KERNEL);
+		if (!tmp) {
+			audit_kill("out of memory for argv string,"
+					" terminating process\n");
+			break;
+		}
+
+		ret = copy_from_user(tmp, p, len);
+		/*
+		 * There is no reason for this copy to be short.
+		 */
+		BUG_ON(ret);
+
+		audit_log_format(ab, "a%d=", i);
+		audit_log_untrustedstring(ab, tmp);
+		audit_log_format(ab, "\n");
+
+		kfree(tmp);
+	}
+}
+
 static void audit_log_exit(struct audit_context *context, struct task_struct *tsk)
 {
 	int i, call_panic = 0;
@@ -935,13 +977,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
 
 		case AUDIT_EXECVE: {
 			struct audit_aux_data_execve *axi = (void *)aux;
-			int i;
-			const char *p;
-			for (i = 0, p = axi->mem; i < axi->argc; i++) {
-				audit_log_format(ab, "a%d=", i);
-				p = audit_log_untrustedstring(ab, p);
-				audit_log_format(ab, "\n");
-			}
+			audit_log_execve_info(ab, axi);
 			break; }
 
 		case AUDIT_SOCKETCALL: {
@@ -1761,32 +1797,31 @@ int __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode
 	return 0;
 }
 
+int audit_argv_kb = 32;
+
 int audit_bprm(struct linux_binprm *bprm)
 {
 	struct audit_aux_data_execve *ax;
 	struct audit_context *context = current->audit_context;
-	unsigned long p, next;
-	void *to;
 
 	if (likely(!audit_enabled || !context || context->dummy))
 		return 0;
 
-	ax = kmalloc(sizeof(*ax) + PAGE_SIZE * MAX_ARG_PAGES - bprm->p,
-				GFP_KERNEL);
+	/*
+	 * Even though the stack code doesn't limit the arg+env size any more,
+	 * the audit code requires that _all_ arguments be logged in a single
+	 * netlink skb. Hence cap it :-(
+	 */
+	if (bprm->argv_len > (audit_argv_kb << 10))
+		return -E2BIG;
+
+	ax = kmalloc(sizeof(*ax), GFP_KERNEL);
 	if (!ax)
 		return -ENOMEM;
 
 	ax->argc = bprm->argc;
 	ax->envc = bprm->envc;
-	for (p = bprm->p, to = ax->mem; p < MAX_ARG_PAGES*PAGE_SIZE; p = next) {
-		struct page *page = bprm->page[p / PAGE_SIZE];
-		void *kaddr = kmap(page);
-		next = (p + PAGE_SIZE) & ~(PAGE_SIZE - 1);
-		memcpy(to, kaddr + (p & (PAGE_SIZE - 1)), next - p);
-		to += next - p;
-		kunmap(page);
-	}
-
+	ax->mm = bprm->mm;
 	ax->d.type = AUDIT_EXECVE;
 	ax->d.next = context->aux;
 	context->aux = (void *)ax;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index c904748..5dfd4d7 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -76,6 +76,7 @@ extern int pid_max_min, pid_max_max;
 extern int sysctl_drop_caches;
 extern int percpu_pagelist_fraction;
 extern int compat_log;
+extern int audit_argv_kb;
 
 /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
 static int maxolduid = 65535;
@@ -603,6 +604,16 @@ static ctl_table kern_table[] = {
 		.proc_handler	= &proc_dointvec,
 	},
 #endif
+#ifdef CONFIG_AUDITSYSCALL
+	{
+		.ctl_name	= KERN_AUDIT_ARGV,
+		.procname	= "audit_argv_kb",
+		.data		= &audit_argv_kb,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+#endif
 
 	{ .ctl_name = 0 }
 };
diff --git a/mm/mmap.c b/mm/mmap.c
index 88da687..8c6ceb5 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1569,33 +1569,13 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
 }
 #endif /* CONFIG_STACK_GROWSUP || CONFIG_IA64 */
 
-#ifdef CONFIG_STACK_GROWSUP
-int expand_stack(struct vm_area_struct *vma, unsigned long address)
-{
-	return expand_upwards(vma, address);
-}
-
-struct vm_area_struct *
-find_extend_vma(struct mm_struct *mm, unsigned long addr)
-{
-	struct vm_area_struct *vma, *prev;
-
-	addr &= PAGE_MASK;
-	vma = find_vma_prev(mm, addr, &prev);
-	if (vma && (vma->vm_start <= addr))
-		return vma;
-	if (!prev || expand_stack(prev, addr))
-		return NULL;
-	if (prev->vm_flags & VM_LOCKED) {
-		make_pages_present(addr, prev->vm_end);
-	}
-	return prev;
-}
-#else
 /*
  * vma is the first one with address < vma->vm_start.  Have to extend vma.
  */
-int expand_stack(struct vm_area_struct *vma, unsigned long address)
+#ifndef CONFIG_STACK_GROWSUP
+static inline
+#endif
+int expand_downwards(struct vm_area_struct *vma, unsigned long address)
 {
 	int error;
 
@@ -1632,6 +1612,34 @@ int expand_stack(struct vm_area_struct *vma, unsigned long address)
 	return error;
 }
 
+#ifdef CONFIG_STACK_GROWSUP
+int expand_stack(struct vm_area_struct *vma, unsigned long address)
+{
+	return expand_upwards(vma, address);
+}
+
+struct vm_area_struct *
+find_extend_vma(struct mm_struct *mm, unsigned long addr)
+{
+	struct vm_area_struct *vma, *prev;
+
+	addr &= PAGE_MASK;
+	vma = find_vma_prev(mm, addr, &prev);
+	if (vma && (vma->vm_start <= addr))
+		return vma;
+	if (!prev || expand_stack(prev, addr))
+		return NULL;
+	if (prev->vm_flags & VM_LOCKED) {
+		make_pages_present(addr, prev->vm_end);
+	}
+	return prev;
+}
+#else
+int expand_stack(struct vm_area_struct *vma, unsigned long address)
+{
+	return expand_downwards(vma, address);
+}
+
 struct vm_area_struct *
 find_extend_vma(struct mm_struct * mm, unsigned long addr)
 {
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 3b8f3c0..e8346c3 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -128,7 +128,7 @@ static void change_protection(struct vm_area_struct *vma,
 	flush_tlb_range(vma, start, end);
 }
 
-static int
+int
 mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
 	unsigned long start, unsigned long end, unsigned long newflags)
 {
diff --git a/mm/mremap.c b/mm/mremap.c
index 5d4bd4f..858a36b 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -118,9 +118,63 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
 		spin_unlock(&mapping->i_mmap_lock);
 }
 
+static void move_ptes_up(struct vm_area_struct *vma, pmd_t *old_pmd,
+		unsigned long old_addr, unsigned long old_end,
+		struct vm_area_struct *new_vma, pmd_t *new_pmd,
+		unsigned long new_addr)
+{
+	struct address_space *mapping = NULL;
+	struct mm_struct *mm = vma->vm_mm;
+	pte_t *old_pte, *new_pte, pte;
+	spinlock_t *old_ptl, *new_ptl;
+	unsigned long new_end = new_addr + (old_end - old_addr);
+
+	if (vma->vm_file) {
+		/*
+		 * Subtle point from Rajesh Venkatasubramanian: before
+		 * moving file-based ptes, we must lock vmtruncate out,
+		 * since it might clean the dst vma before the src vma,
+		 * and we propagate stale pages into the dst afterward.
+		 */
+		mapping = vma->vm_file->f_mapping;
+		spin_lock(&mapping->i_mmap_lock);
+		if (new_vma->vm_truncate_count &&
+		    new_vma->vm_truncate_count != vma->vm_truncate_count)
+			new_vma->vm_truncate_count = 0;
+	}
+
+	/*
+	 * We don't have to worry about the ordering of src and dst
+	 * pte locks because exclusive mmap_sem prevents deadlock.
+	 */
+	old_pte = pte_offset_map_lock(mm, old_pmd, old_end-1, &old_ptl);
+ 	new_pte = pte_offset_map_nested(new_pmd, new_end-1);
+	new_ptl = pte_lockptr(mm, new_pmd);
+	if (new_ptl != old_ptl)
+		spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
+	arch_enter_lazy_mmu_mode();
+
+	for (; old_end > old_addr; old_pte--, old_end -= PAGE_SIZE,
+				   new_pte--, new_end -= PAGE_SIZE) {
+		if (pte_none(*old_pte))
+			continue;
+		pte = ptep_clear_flush(vma, old_end-1, old_pte);
+		pte = move_pte(pte, new_vma->vm_page_prot, old_end-1, new_end-1);
+		set_pte_at(mm, new_end-1, new_pte, pte);
+	}
+
+	arch_leave_lazy_mmu_mode();
+	if (new_ptl != old_ptl)
+		spin_unlock(new_ptl);
+	pte_unmap_nested(new_pte - 1);
+	pte_unmap_unlock(old_pte - 1, old_ptl);
+	if (mapping)
+		spin_unlock(&mapping->i_mmap_lock);
+}
+
 #define LATENCY_LIMIT	(64 * PAGE_SIZE)
 
-static unsigned long move_page_tables(struct vm_area_struct *vma,
+unsigned long move_page_tables(struct vm_area_struct *vma,
 		unsigned long old_addr, struct vm_area_struct *new_vma,
 		unsigned long new_addr, unsigned long len)
 {
@@ -132,21 +186,25 @@ static unsigned long move_page_tables(struct vm_area_struct *vma,
 
 	for (; old_addr < old_end; old_addr += extent, new_addr += extent) {
 		cond_resched();
+
 		next = (old_addr + PMD_SIZE) & PMD_MASK;
 		if (next - 1 > old_end)
 			next = old_end;
 		extent = next - old_addr;
+
 		old_pmd = get_old_pmd(vma->vm_mm, old_addr);
 		if (!old_pmd)
 			continue;
 		new_pmd = alloc_new_pmd(vma->vm_mm, new_addr);
 		if (!new_pmd)
 			break;
+
 		next = (new_addr + PMD_SIZE) & PMD_MASK;
 		if (extent > next - new_addr)
 			extent = next - new_addr;
 		if (extent > LATENCY_LIMIT)
 			extent = LATENCY_LIMIT;
+
 		move_ptes(vma, old_pmd, old_addr, old_addr + extent,
 				new_vma, new_pmd, new_addr);
 	}
@@ -154,6 +212,51 @@ static unsigned long move_page_tables(struct vm_area_struct *vma,
 	return len + old_addr - old_end;	/* how much done */
 }
 
+unsigned long move_page_tables_up(struct vm_area_struct *vma,
+		unsigned long old_addr, struct vm_area_struct *new_vma,
+		unsigned long new_addr, unsigned long len)
+{
+	unsigned long extent, prev, old_end, new_end;
+	pmd_t *old_pmd, *new_pmd;
+
+	old_end = old_addr + len;
+	new_end = new_addr + len;
+	flush_cache_range(vma, old_addr, old_end);
+
+	for (; old_end > old_addr; old_end -= extent, new_end -= extent) {
+		cond_resched();
+
+		/*
+		 * calculate how far till prev PMD boundary for old
+		 */
+		prev = (old_end - 1) & PMD_MASK;
+		if (prev < old_addr)
+			prev = old_addr;
+		extent = old_end - prev;
+
+		old_pmd = get_old_pmd(vma->vm_mm, old_end-1);
+		if (!old_pmd)
+			continue;
+		new_pmd = alloc_new_pmd(vma->vm_mm, new_end-1);
+		if (!new_pmd)
+			break;
+
+		/*
+		 * calculate and clip to prev PMD boundary for new
+		 */
+		prev = (new_end - 1) & PMD_MASK;
+		if (extent > new_end - prev)
+			extent = new_end - prev;
+		if (extent > LATENCY_LIMIT)
+			extent = LATENCY_LIMIT;
+
+		move_ptes_up(vma, old_pmd, old_end - extent, old_end,
+				new_vma, new_pmd, new_end - extent);
+	}
+
+	return old_addr + len - old_end;
+}
+
 static unsigned long move_vma(struct vm_area_struct *vma,
 		unsigned long old_addr, unsigned long old_len,
 		unsigned long new_len, unsigned long new_addr)

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Stuff]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]     [Linux Resources]
  Powered by Linux