[PATCH][Fix] Fix Bug #4959 (take 2)

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi,

The following patch fixes Bug #4959.  For this purpose it creates temporary
page translation tables including the kernel mapping (reused) and the direct
mapping (created from scratch) and makes swsusp switch to these tables
right before the image is restored.

The code used to generate the direct mapping comes from arch/x86_64/mm/init.c.

Please consider for applying.

Greetings,
Rafael

PS
As you can see, I really would like this bug to go officially, as swsusp is hardly
usable on x86-64 with it, and I know some people using swsusp on this
architecture.  I just don't want them to be hurt by this bug.

NOTES:
(1) I'm quite sure that to fix the problem we need to use temporary page
translation tables that won't be modified in the process of copying the image.
(2) These page translation tables have to be present in memory before the
image is copied, so there are two possible ways in which they can be created:
	(a) in the startup kernel code that is executed before calling swsusp
	on resume, in which case they have to be marked with PG_nosave,
	(b) in swsusp, after the image has been loaded from disk (to set up
	the tables we need to know which pages will be overwritten while
	copying the image).
However, (a) is tricky, because it will only work if the tables are always located
at the same physical addresses, which I think would be quite difficult to achieve.
Moreover, such a code would have to be executed on every boot and the
temporary page tables would always be present in memory.
For this reason I decided to chose (b).
(3) To create the temporary page translation tables in swsusp I need to allocate
some memory pages and theoretically this operaction could fail.  Fortunately
the probability of this is extremely small, because the number of pages needed
is low (4 pages for a machine with 2GB of RAM) and we reserve 512 spare pages
on suspend (in principle they are reserved for I/O, but they should be available
at the time the temporary page tables are created).  Thus I think the memory
allocations here are completely safe.


Signed-off-by: Rafael J. Wysocki <[email protected]>

Index: linux-2.6.14-rc2-git6/arch/x86_64/kernel/suspend.c
===================================================================
--- linux-2.6.14-rc2-git6.orig/arch/x86_64/kernel/suspend.c	2005-09-26 21:05:13.000000000 +0200
+++ linux-2.6.14-rc2-git6/arch/x86_64/kernel/suspend.c	2005-09-27 07:41:04.000000000 +0200
@@ -11,6 +11,17 @@
 #include <linux/smp.h>
 #include <linux/suspend.h>
 #include <asm/proto.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
+
+/* Defined in kernel/power/swsusp.c */
+extern unsigned long get_usable_page(unsigned gfp_mask);
+extern void free_eaten_memory(void);
+/* Defined in arch/x86_64/kernel/suspend_asm.S */
+int restore_image(void);
+
+pgd_t *temp_level4_pgt;
 
 struct saved_context saved_context;
 
@@ -140,4 +151,128 @@
 
 }
 
+static void **pages;
+
+static inline void *__add_page(void)
+{
+	void **c;
+
+	c = (void **)get_usable_page(GFP_ATOMIC);
+	if (c) {
+		*c = pages;
+		pages = c;
+	}
+	return c;
+}
+
+static inline void *__next_page(void)
+{
+	void **c;
+
+	c = pages;
+	if (c) {
+		pages = *c;
+		*c = NULL;
+	}
+	return c;
+}
+
+/*
+ * Try to allocate as many usable pages as needed and daisy chain them.
+ * If one allocation fails, free the pages allocated so far
+ */
+static int alloc_usable_pages(unsigned long n)
+{
+	void *p;
+
+	pages = NULL;
+	do
+		if (!__add_page())
+			break;
+	while (--n);
+	if (n) {
+		p = __next_page();
+		while (p) {
+			free_page((unsigned long)p);
+			p = __next_page();
+		}
+		return -ENOMEM;
+	}
+	return 0;
+}
 
+static void phys_pud_init(pud_t *pud, unsigned long address, unsigned long end)
+{
+	long i, j;
+
+	i = pud_index(address);
+	pud = pud + i;
+	for (; i < PTRS_PER_PUD; pud++, i++) {
+		unsigned long paddr;
+		pmd_t *pmd;
+
+		paddr = address + i*PUD_SIZE;
+		if (paddr >= end) {
+			for (; i < PTRS_PER_PUD; i++, pud++)
+				set_pud(pud, __pud(0));
+			break;
+		}
+
+		pmd = (pmd_t *)__next_page();
+		set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
+		for (j = 0; j < PTRS_PER_PMD; pmd++, j++, paddr += PMD_SIZE) {
+			unsigned long pe;
+
+			if (paddr >= end) {
+				for (; j < PTRS_PER_PMD; j++, pmd++)
+					set_pmd(pmd,  __pmd(0));
+				break;
+			}
+			pe = _PAGE_NX|_PAGE_PSE | _KERNPG_TABLE | _PAGE_GLOBAL | paddr;
+			pe &= __supported_pte_mask;
+			set_pmd(pmd, __pmd(pe));
+		}
+	}
+}
+
+static void set_up_temporary_mappings(void)
+{
+	unsigned long start, end, next;
+
+	temp_level4_pgt = (pgd_t *)__next_page();
+
+	/* It is safe to reuse the original kernel mapping */
+	set_pgd(temp_level4_pgt + pgd_index(__START_KERNEL_map),
+		init_level4_pgt[pgd_index(__START_KERNEL_map)]);
+
+	/* Set up the direct mapping from scratch */
+	start = (unsigned long)pfn_to_kaddr(0);
+	end = (unsigned long)pfn_to_kaddr(end_pfn);
+
+	for (; start < end; start = next) {
+		pud_t *pud = (pud_t *)__next_page();
+		next = start + PGDIR_SIZE;
+		if (next > end)
+			next = end;
+		phys_pud_init(pud, __pa(start), __pa(next));
+		set_pgd(temp_level4_pgt + pgd_index(start),
+			mk_kernel_pgd(__pa(pud)));
+	}
+}
+
+int swsusp_arch_resume(void)
+{
+	unsigned long n;
+
+	n = ((end_pfn << PAGE_SHIFT) + PUD_SIZE - 1) >> PUD_SHIFT;
+	n += n / PTRS_PER_PUD + !!(n % PTRS_PER_PUD) + 1;
+	pr_debug("swsusp_arch_resume(): pages needed = %lu\n", n);
+	if (alloc_usable_pages(n)) {
+		free_eaten_memory();
+		return -ENOMEM;
+	}
+	/* We have enough memory and from now on we cannot recover */
+	set_up_temporary_mappings();
+	restore_image();
+	return 0;
+}
Index: linux-2.6.14-rc2-git6/kernel/power/swsusp.c
===================================================================
--- linux-2.6.14-rc2-git6.orig/kernel/power/swsusp.c	2005-09-26 21:05:13.000000000 +0200
+++ linux-2.6.14-rc2-git6/kernel/power/swsusp.c	2005-09-26 22:13:21.000000000 +0200
@@ -1095,7 +1095,7 @@
 	*eaten_memory = c;
 }
 
-static unsigned long get_usable_page(unsigned gfp_mask)
+unsigned long get_usable_page(unsigned gfp_mask)
 {
 	unsigned long m;
 
@@ -1109,7 +1109,7 @@
 	return m;
 }
 
-static void free_eaten_memory(void)
+void free_eaten_memory(void)
 {
 	unsigned long m;
 	void **c;
@@ -1481,11 +1481,12 @@
 	/* Allocate memory for the image and read the data from swap */
 
 	error = check_pagedir(pagedir_nosave);
-	free_eaten_memory();
+
 	if (!error)
 		error = data_read(pagedir_nosave);
 
 	if (error) { /* We fail cleanly */
+		free_eaten_memory();
 		for_each_pbe (p, pagedir_nosave)
 			if (p->address) {
 				free_page(p->address);
Index: linux-2.6.14-rc2-git6/arch/x86_64/kernel/suspend_asm.S
===================================================================
--- linux-2.6.14-rc2-git6.orig/arch/x86_64/kernel/suspend_asm.S	2005-09-26 21:05:13.000000000 +0200
+++ linux-2.6.14-rc2-git6/arch/x86_64/kernel/suspend_asm.S	2005-09-27 00:33:35.000000000 +0200
@@ -39,12 +39,13 @@
 	call swsusp_save
 	ret
 
-ENTRY(swsusp_arch_resume)
-	/* set up cr3 */	
-	leaq	init_level4_pgt(%rip),%rax
-	subq	$__START_KERNEL_map,%rax
-	movq	%rax,%cr3
-
+ENTRY(restore_image)
+	/* switch to temporary page tables */
+	movq	$__PAGE_OFFSET, %rdx
+	movq	temp_level4_pgt(%rip), %rax
+	subq	%rdx, %rax
+	movq	%rax, %cr3
+	/* Flush TLB */
 	movq	mmu_cr4_features(%rip), %rax
 	movq	%rax, %rdx
 	andq	$~(1<<7), %rdx	# PGE
@@ -69,6 +70,10 @@
 	movq	pbe_next(%rdx), %rdx
 	jmp	loop
 done:
+	/* go back to the original page tables */
+	leaq	init_level4_pgt(%rip), %rax
+	subq	$__START_KERNEL_map, %rax
+	movq	%rax, %cr3
 	/* Flush TLB, including "global" things (vmalloc) */
 	movq	mmu_cr4_features(%rip), %rax
 	movq	%rax, %rdx

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]
  Powered by Linux