Re: ext3_ordered_writepage() questions

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi Stephen,

Now that we got your attention, I am wondering whats your opinion on
this ?

I have a patch which eliminates adding buffers to the journal, if
we are doing just re-write of the disk block. In theory, it should
be fine - but it does change the current behavior for order mode
writes. I guess, current code adds the buffers to the journal, so
any metadata updates to any file in the filesystem happen in the 
journal - guarantees our buffers to be flushed out before that
transaction completes.

My patch *breaks* that guarantee. But provides significant improvement
for re-write case. My micro benchmark shows:

     2.6.16-rc6      2.6.16-rc6+patch
real  0m6.606s        0m3.705s 
user  0m0.124s        0m0.108s
sys   0m6.456s        0m3.600s


In real world, does this ordering guarantee matter ? Waiting for your
advise.

Thanks,
Badari

Make use of PageMappedToDisk(page) to find out if we need to
block allocation and skip the calls to it, if not needed.
When we are not doing block allocation, also avoid calls
to journal start and adding buffers to transaction.

Signed-off-by: Badari Pulavarty <[email protected]>
Index: linux-2.6.16-rc6/fs/buffer.c
===================================================================
--- linux-2.6.16-rc6.orig/fs/buffer.c	2006-03-11 14:12:55.000000000 -0800
+++ linux-2.6.16-rc6/fs/buffer.c	2006-03-16 08:22:37.000000000 -0800
@@ -2029,6 +2029,7 @@ static int __block_commit_write(struct i
 	int partial = 0;
 	unsigned blocksize;
 	struct buffer_head *bh, *head;
+	int fullymapped = 1;
 
 	blocksize = 1 << inode->i_blkbits;
 
@@ -2043,6 +2044,8 @@ static int __block_commit_write(struct i
 			set_buffer_uptodate(bh);
 			mark_buffer_dirty(bh);
 		}
+		if (!buffer_mapped(bh))
+			fullymapped = 0;
 	}
 
 	/*
@@ -2053,6 +2056,9 @@ static int __block_commit_write(struct i
 	 */
 	if (!partial)
 		SetPageUptodate(page);
+
+	if (fullymapped)
+		SetPageMappedToDisk(page);
 	return 0;
 }
 
Index: linux-2.6.16-rc6/fs/ext3/inode.c
===================================================================
--- linux-2.6.16-rc6.orig/fs/ext3/inode.c	2006-03-11 14:12:55.000000000 -0800
+++ linux-2.6.16-rc6/fs/ext3/inode.c	2006-03-15 13:30:04.000000000 -0800
@@ -999,6 +999,12 @@ static int ext3_prepare_write(struct fil
 	handle_t *handle;
 	int retries = 0;
 
+	/*
+ 	 * If the page is already mapped to disk and we are not
+	 * journalling the data - there is nothing to do.
+	 */
+	if (PageMappedToDisk(page) && !ext3_should_journal_data(inode))
+		return 0;
 retry:
 	handle = ext3_journal_start(inode, needed_blocks);
 	if (IS_ERR(handle)) {
@@ -1059,8 +1065,14 @@ static int ext3_ordered_commit_write(str
 	struct inode *inode = page->mapping->host;
 	int ret = 0, ret2;
 
-	ret = walk_page_buffers(handle, page_buffers(page),
-		from, to, NULL, ext3_journal_dirty_data);
+	/*
+	 * If the page is already mapped to disk, we won't have
+	 * a handle - which means no metadata updates are needed.
+	 * So, no need to add buffers to the transaction.
+	 */
+	if (handle)
+		ret = walk_page_buffers(handle, page_buffers(page),
+			from, to, NULL, ext3_journal_dirty_data);
 
 	if (ret == 0) {
 		/*
@@ -1075,9 +1087,11 @@ static int ext3_ordered_commit_write(str
 			EXT3_I(inode)->i_disksize = new_i_size;
 		ret = generic_commit_write(file, page, from, to);
 	}
-	ret2 = ext3_journal_stop(handle);
-	if (!ret)
-		ret = ret2;
+	if (handle) {
+		ret2 = ext3_journal_stop(handle);
+		if (!ret)
+			ret = ret2;
+	}
 	return ret;
 }
 
@@ -1098,9 +1112,11 @@ static int ext3_writeback_commit_write(s
 	else
 		ret = generic_commit_write(file, page, from, to);
 
-	ret2 = ext3_journal_stop(handle);
-	if (!ret)
-		ret = ret2;
+	if (handle) {
+		ret2 = ext3_journal_stop(handle);
+		if (!ret)
+			ret = ret2;
+	}
 	return ret;
 }
 
@@ -1278,6 +1294,14 @@ static int ext3_ordered_writepage(struct
 	if (ext3_journal_current_handle())
 		goto out_fail;
 
+	/*
+	 * If the page is mapped to disk, just do the IO
+	 */
+	if (PageMappedToDisk(page)) {
+		ret = block_write_full_page(page, ext3_get_block, wbc);
+		goto out;
+	}
+
 	handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode));
 
 	if (IS_ERR(handle)) {
@@ -1318,6 +1342,7 @@ static int ext3_ordered_writepage(struct
 	err = ext3_journal_stop(handle);
 	if (!ret)
 		ret = err;
+out:
 	return ret;
 
 out_fail:
@@ -1337,10 +1362,13 @@ static int ext3_writeback_writepage(stru
 	if (ext3_journal_current_handle())
 		goto out_fail;
 
-	handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode));
-	if (IS_ERR(handle)) {
-		ret = PTR_ERR(handle);
-		goto out_fail;
+	if (!PageMappedToDisk(page)) {
+		handle = ext3_journal_start(inode,
+				ext3_writepage_trans_blocks(inode));
+		if (IS_ERR(handle)) {
+			ret = PTR_ERR(handle);
+			goto out_fail;
+		}
 	}
 
 	if (test_opt(inode->i_sb, NOBH))
@@ -1348,9 +1376,11 @@ static int ext3_writeback_writepage(stru
 	else
 		ret = block_write_full_page(page, ext3_get_block, wbc);
 
-	err = ext3_journal_stop(handle);
-	if (!ret)
-		ret = err;
+	if (handle) {
+		err = ext3_journal_stop(handle);
+		if (!ret)
+			ret = err;
+	}
 	return ret;
 
 out_fail:


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Stuff]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]     [Linux Resources]
  Powered by Linux