Re: 2.6.19 file content corruption on ext3

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Btw, 
 much cleaned-up page tracing patch here, in case anybody cares (and 
"test.c" attached, although I don't think it changed since last time). 

The test.c output is a bit hard to read at times, since it will give 
offsets in bytes as hex (ie "00a77664" means page frame 00000a77, and byte 
664h within that page), while the kernel output is obvioiusly the page 
indexes (but the page fault _addresses_ can contain information about the 
exact byte in a page, so you can match them up when some kernel event is 
related to a page fault).

So both forms are necessary/logical, but it means that to match things up, 
you often need to ignore the last three hex digits of the address that 
"test.c" outputs.

This one also adds traces for the tags and the writeback activity, but 
since I'm going out for birthday dinner, I won't have time to try to 
actually analyse the trace I have.. Which is why I'm sending it out, in 
the hope that somebody else is working on this corruption issue and is 
interested..

		Linus

----
diff --git a/fs/buffer.c b/fs/buffer.c
index 263f88e..f5e132a 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -722,6 +722,7 @@ int __set_page_dirty_buffers(struct page *page)
 			set_buffer_dirty(bh);
 			bh = bh->b_this_page;
 		} while (bh != head);
+		PAGE_TRACE(page, "dirtied buffers");
 	}
 	spin_unlock(&mapping->private_lock);
 
@@ -734,6 +735,7 @@ int __set_page_dirty_buffers(struct page *page)
 			__inc_zone_page_state(page, NR_FILE_DIRTY);
 			task_io_account_write(PAGE_CACHE_SIZE);
 		}
+		PAGE_TRACE(page, "setting TAG_DIRTY");
 		radix_tree_tag_set(&mapping->page_tree,
 				page_index(page), PAGECACHE_TAG_DIRTY);
 	}
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 350878a..0cf3dce 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -91,6 +91,14 @@
 #define PG_nosave_free		18	/* Used for system suspend/resume */
 #define PG_buddy		19	/* Page is free, on buddy lists */
 
+#define SetPageInteresting(page) set_bit(PG_arch_1, &(page)->flags)
+#define PageInteresting(page)	test_bit(PG_arch_1, &(page)->flags)
+
+#define PAGE_TRACE(page, msg, arg...) do {	 				\
+	if (PageInteresting(page))	 					\
+		printk(KERN_DEBUG "PG %08lx: %s:%d " msg "\n", 			\
+			(page)->index, __FILE__, __LINE__ ,##arg );		\
+} while (0)
 
 #if (BITS_PER_LONG > 32)
 /*
@@ -183,32 +191,38 @@ static inline void SetPageUptodate(struct page *page)
 #define PageWriteback(page)	test_bit(PG_writeback, &(page)->flags)
 #define SetPageWriteback(page)						\
 	do {								\
-		if (!test_and_set_bit(PG_writeback,			\
-				&(page)->flags))			\
+		if (!test_and_set_bit(PG_writeback, &(page)->flags)) {	\
+			PAGE_TRACE(page, "set writeback");		\
 			inc_zone_page_state(page, NR_WRITEBACK);	\
+		}							\
 	} while (0)
 #define TestSetPageWriteback(page)					\
 	({								\
 		int ret;						\
 		ret = test_and_set_bit(PG_writeback,			\
 					&(page)->flags);		\
-		if (!ret)						\
+		if (!ret) {						\
+			PAGE_TRACE(page, "set writeback");		\
 			inc_zone_page_state(page, NR_WRITEBACK);	\
+		}							\
 		ret;							\
 	})
 #define ClearPageWriteback(page)					\
 	do {								\
-		if (test_and_clear_bit(PG_writeback,			\
-				&(page)->flags))			\
+		if (test_and_clear_bit(PG_writeback, &(page)->flags)) {	\
+			PAGE_TRACE(page, "end writeback");		\
 			dec_zone_page_state(page, NR_WRITEBACK);	\
+		}							\
 	} while (0)
 #define TestClearPageWriteback(page)					\
 	({								\
 		int ret;						\
 		ret = test_and_clear_bit(PG_writeback,			\
 				&(page)->flags);			\
-		if (ret)						\
+		if (ret) {						\
+			PAGE_TRACE(page, "end writeback");		\
 			dec_zone_page_state(page, NR_WRITEBACK);	\
+		}							\
 		ret;							\
 	})
 
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 5c26818..7735b83 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -79,7 +79,7 @@ config DEBUG_KERNEL
 
 config LOG_BUF_SHIFT
 	int "Kernel log buffer size (16 => 64KB, 17 => 128KB)" if DEBUG_KERNEL
-	range 12 21
+	range 12 24
 	default 17 if S390 || LOCKDEP
 	default 16 if X86_NUMAQ || IA64
 	default 15 if SMP
diff --git a/mm/filemap.c b/mm/filemap.c
index 8332c77..4df7d35 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -116,6 +116,7 @@ void __remove_from_page_cache(struct page *page)
 {
 	struct address_space *mapping = page->mapping;
 
+	PAGE_TRACE(page, "Removing page cache");
 	radix_tree_delete(&mapping->page_tree, page->index);
 	page->mapping = NULL;
 	mapping->nrpages--;
@@ -421,6 +422,23 @@ int filemap_write_and_wait_range(struct address_space *mapping,
 	return err;
 }
 
+static noinline int is_interesting(struct address_space *mapping)
+{
+	struct inode *inode = mapping->host;
+	struct dentry *dentry;
+	int retval = 0;
+
+	spin_lock(&dcache_lock);
+	list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
+		if (strcmp(dentry->d_name.name, "mapfile"))
+			continue;
+		retval = 1;
+		break;
+	}
+	spin_unlock(&dcache_lock);
+	return retval;
+}
+
 /**
  * add_to_page_cache - add newly allocated pagecache pages
  * @page:	page to add
@@ -439,6 +457,9 @@ int add_to_page_cache(struct page *page, struct address_space *mapping,
 {
 	int error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM);
 
+	if (is_interesting(mapping))
+		SetPageInteresting(page);
+
 	if (error == 0) {
 		write_lock_irq(&mapping->tree_lock);
 		error = radix_tree_insert(&mapping->page_tree, offset, page);
diff --git a/mm/memory.c b/mm/memory.c
index 563792f..20af32f 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -667,6 +667,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
 			tlb_remove_tlb_entry(tlb, pte, addr);
 			if (unlikely(!page))
 				continue;
+			PAGE_TRACE(page, "unmapped at %08lx", addr);
 			if (unlikely(details) && details->nonlinear_vma
 			    && linear_page_index(details->nonlinear_vma,
 						addr) != page->index)
@@ -1605,6 +1606,7 @@ gotten:
 		 */
 		ptep_clear_flush(vma, address, page_table);
 		set_pte_at(mm, address, page_table, entry);
+		PAGE_TRACE(new_page, "write fault at %08lx", address);
 		update_mmu_cache(vma, address, entry);
 		lru_cache_add_active(new_page);
 		page_add_new_anon_rmap(new_page, vma, address);
@@ -2249,6 +2251,7 @@ retry:
 		entry = mk_pte(new_page, vma->vm_page_prot);
 		if (write_access)
 			entry = maybe_mkwrite(pte_mkdirty(entry), vma);
+		PAGE_TRACE(new_page, "mapping at %08lx (%s)", address, write_access ? "write" : "read");
 		set_pte_at(mm, address, page_table, entry);
 		if (anon) {
 			inc_mm_counter(mm, anon_rss);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index b3a198c..15f3aaf 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -773,6 +773,7 @@ int __set_page_dirty_nobuffers(struct page *page)
 				__inc_zone_page_state(page, NR_FILE_DIRTY);
 				task_io_account_write(PAGE_CACHE_SIZE);
 			}
+			PAGE_TRACE(page, "setting TAG_DIRTY");
 			radix_tree_tag_set(&mapping->page_tree,
 				page_index(page), PAGECACHE_TAG_DIRTY);
 		}
@@ -813,6 +814,7 @@ int fastcall set_page_dirty(struct page *page)
 		if (!spd)
 			spd = __set_page_dirty_buffers;
 #endif
+		PAGE_TRACE(page, "setting dirty");
 		return (*spd)(page);
 	}
 	if (!PageDirty(page)) {
@@ -867,6 +869,7 @@ int clear_page_dirty_for_io(struct page *page)
 
 	if (TestClearPageDirty(page)) {
 		if (mapping_cap_account_dirty(mapping)) {
+			PAGE_TRACE(page, "clean_for_io");
 			page_mkclean(page);
 			dec_zone_page_state(page, NR_FILE_DIRTY);
 		}
@@ -886,10 +889,12 @@ int test_clear_page_writeback(struct page *page)
 
 		write_lock_irqsave(&mapping->tree_lock, flags);
 		ret = TestClearPageWriteback(page);
-		if (ret)
+		if (ret) {
+			PAGE_TRACE(page, "clearing TAG_WRITEBACK");
 			radix_tree_tag_clear(&mapping->page_tree,
 						page_index(page),
 						PAGECACHE_TAG_WRITEBACK);
+		}
 		write_unlock_irqrestore(&mapping->tree_lock, flags);
 	} else {
 		ret = TestClearPageWriteback(page);
@@ -907,14 +912,18 @@ int test_set_page_writeback(struct page *page)
 
 		write_lock_irqsave(&mapping->tree_lock, flags);
 		ret = TestSetPageWriteback(page);
-		if (!ret)
+		if (!ret) {
+			PAGE_TRACE(page, "setting TAG_WRITEBACK");
 			radix_tree_tag_set(&mapping->page_tree,
 						page_index(page),
 						PAGECACHE_TAG_WRITEBACK);
-		if (!PageDirty(page))
+		}
+		if (!PageDirty(page)) {
+			PAGE_TRACE(page, "clearing TAG_DIRTY");
 			radix_tree_tag_clear(&mapping->page_tree,
 						page_index(page),
 						PAGECACHE_TAG_DIRTY);
+		}
 		write_unlock_irqrestore(&mapping->tree_lock, flags);
 	} else {
 		ret = TestSetPageWriteback(page);
diff --git a/mm/rmap.c b/mm/rmap.c
index 57306fa..e6b4676 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -448,6 +448,7 @@ static int page_mkclean_one(struct page *page, struct vm_area_struct *vma)
 	if (pte_dirty(*pte) || pte_write(*pte)) {
 		pte_t entry;
 
+		PAGE_TRACE(page, "cleaning PTE %08lx", address);
 		flush_cache_page(vma, address, pte_pfn(*pte));
 		entry = ptep_clear_flush(vma, address, pte);
 		entry = pte_wrprotect(entry);
@@ -637,6 +638,7 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 		goto out_unmap;
 	}
 
+	PAGE_TRACE(page, "unmapping from %08lx", address);
 	/* Nuke the page table entry. */
 	flush_cache_page(vma, address, page_to_pfn(page));
 	pteval = ptep_clear_flush(vma, address, pte);
@@ -767,6 +769,7 @@ static void try_to_unmap_cluster(unsigned long cursor,
 		if (ptep_clear_flush_young(vma, address, pte))
 			continue;
 
+		PAGE_TRACE(page, "unmapping from %08lx", address);
 		/* Nuke the page table entry. */
 		flush_cache_page(vma, address, pte_pfn(*pte));
 		pteval = ptep_clear_flush(vma, address, pte);
#include <sys/mman.h>
#include <sys/fcntl.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <time.h>

#define TARGETSIZE (22 << 20)
#define CHUNKSIZE (1460)
#define NRCHUNKS (TARGETSIZE / CHUNKSIZE)
#define SIZE (NRCHUNKS * CHUNKSIZE)

static void fillmem(void *start, int nr)
{
	memset(start, nr, CHUNKSIZE);
}

#define page_offset(buf, off) (unsigned)((unsigned long)(buf)+(off)-(unsigned long)(mapping))

static int chunkorder[NRCHUNKS];
static char *mapping;

static int order(int nr)
{
	int i;
	if (nr < 0 || nr >= NRCHUNKS)
		return -1;
	for (i = 0; i < NRCHUNKS; i++)
		if (chunkorder[i] == nr)
			return i;
	return -2;
}

static void checkmem(void *buf, int nr)
{
	unsigned int start = ~0u, end = 0;
	unsigned char c = nr, *p = buf, differs = 0;
	int i;
	for (i = 0; i < CHUNKSIZE; i++) {
		unsigned char got = *p++;
		if (got != c) {
			if (i < start)
				start = i;
			if (i > end)
				end = i;
			differs = got;
		}
	}
	if (start < end) {
		printf("Chunk %d corrupted (%u-%u)  (%x-%x)            \n", nr, start, end,
			page_offset(buf, start), page_offset(buf, end));
		printf("Expected %u, got %u\n", c, differs);
		printf("Written as (%d)%d(%d)\n", order(nr-1), order(nr), order(nr+1));
	}
}

static char *remap(int fd, char *mapping)
{
	if (mapping) {
		munmap(mapping, SIZE);
		posix_fadvise(fd, 0, SIZE, POSIX_FADV_DONTNEED);
	}
	return mmap(NULL, SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
}

int main(int argc, char **argv)
{
	int fd, i;

	/*
	 * Make some random ordering of writing the chunks to the
	 * memory map..
	 *
	 * Start with fully ordered..
	 */
	for (i = 0; i < NRCHUNKS; i++)
		chunkorder[i] = i;

	/* ..and then mix it up randomly */
	srandom(time(NULL));
	for (i = 0; i < NRCHUNKS; i++) {
		int index = (unsigned int) random() % NRCHUNKS;
		int nr = chunkorder[index];
		chunkorder[index] = chunkorder[i];
		chunkorder[i] = nr;
	}

	fd = open("mapfile", O_RDWR | O_TRUNC | O_CREAT, 0666);
	if (fd < 0)
		return -1;
	if (ftruncate(fd, SIZE) < 0)
		return -1;
	mapping = remap(fd, NULL);
	if (-1 == (int)(long)mapping)
		return -1;

	for (i = 0; i < NRCHUNKS; i++) {
		int chunk = chunkorder[i];
		printf("Writing chunk %d/%d (%d%%) (%08x)     \r",
			chunk, NRCHUNKS,
			100*i/NRCHUNKS,
			page_offset(mapping, chunk * CHUNKSIZE));
		fillmem(mapping + chunk * CHUNKSIZE, chunk);
	}
	printf("\n");

	/* Unmap, drop, and remap.. */
	mapping = remap(fd, mapping);

	/* .. and check */
	for (i = 0; i < NRCHUNKS; i++) {
		int chunk = i;
		printf("Checking chunk %d/%d (%d%%) (%08x)    \r",
			i, NRCHUNKS,
			100*i/NRCHUNKS,
			page_offset(mapping, i * CHUNKSIZE));
		checkmem(mapping + chunk * CHUNKSIZE, chunk);
	}
	printf("\n");

	/* Clean up for next time */
	sleep(5);
	sync();
	sleep(5);
	munmap(mapping, SIZE);
	close(fd);
	unlink("mapfile");
	
	return 0;
}

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Stuff]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]     [Linux Resources]
  Powered by Linux