REISER4: tracking down opps in reiser4_do_readpage_extent

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi,

I've been trying to narrow down the possible sources of oppses in reiser4_do_readpage_extent (vs-1426, nikita-2688) patch by patch. Reverting reiser4-use-generic-file-read.patch & friends didn't have any effect, downgrading kernel from 2.6.21-rc5 to 2.6.20 too. Eventually it all came down to 0046-reiser4-drop-unused-semaphores.patch. I don't have a reliable way to trigger the bug, but one of my vmware sandboxes with reiser4 root always oppses when I do emerge binutils right after boot. With this patch reverted it no longer happens. **

You can find original patch at:
http://laurent.riffard.free.fr/reiser4/reiser4-for-2.6.21-rc1/broken-out/

I attached patch that adds mutex_write back to reiser4_inode, but as real mutex now instead of semaphore. Cryptcompress rw semaphore is not added back. Updated reiser4-fix-write_extent.patch with proper identation is also attached.

Perhaps this is not a proper fix and only hides real culprit, but at least I do not see oppses now. I do not yet know if this also solves problem with zeroed out files.

   Max
diff -u -r --exclude='*.cmd' --exclude='*.o' --exclude='*.orig' --exclude='*.rej' linux-2.6.21-rc4-git7/fs/reiser4/inode.h linux-2.6.20.4/fs/reiser4/inode.h
--- linux-2.6.21-rc4-git7/fs/reiser4/inode.h	2007-03-23 14:40:53.445156464 +0300
+++ linux-2.6.20.4/fs/reiser4/inode.h	2007-04-06 04:02:40.000000000 +0400
@@ -136,6 +136,17 @@
 		cryptcompress_info_t cryptcompress_info;
 	} file_plugin_data;
 
+ 	/* this semaphore is used to serialize writes of any file plugin,
+	 * and should be invariant during file plugin conversion (which
+	 * is going in the context of ->write()).
+ 	 * inode->i_mutex can not be used for the serialization, because
+ 	 * write_unix_file uses get_user_pages which is to be used under
+ 	 * mm->mmap_sem and because it is required to take mm->mmap_sem before
+ 	 * inode->i_mutex, so inode->i_mutex would have to be up()-ed before
+ 	 * calling to get_user_pages which is unacceptable.
+	 */
+ 	struct mutex mutex_write;
+
 	/* this semaphore is to serialize readers and writers of @pset->file
 	 * when file plugin conversion is enabled
 	 */
diff -u -r --exclude='*.cmd' --exclude='*.o' --exclude='*.orig' --exclude='*.rej' linux-2.6.21-rc4-git7/fs/reiser4/plugin/file/file.c linux-2.6.20.4/fs/reiser4/plugin/file/file.c
--- linux-2.6.21-rc4-git7/fs/reiser4/plugin/file/file.c	2007-03-24 01:14:45.000000000 +0300
+++ linux-2.6.20.4/fs/reiser4/plugin/file/file.c	2007-04-06 04:02:40.000000000 +0400
@@ -1937,6 +1933,7 @@
 
 	uf_info = unix_file_inode_data(inode);
 
+	mutex_lock(&reiser4_inode_data(inode)->mutex_write);
 	get_exclusive_access(uf_info);
 
 	if (!IS_RDONLY(inode) && (vma->vm_flags & (VM_MAYWRITE | VM_SHARED))) {
@@ -1948,6 +1945,7 @@
 		result = find_file_state(inode, uf_info);
 		if (result != 0) {
 			drop_exclusive_access(uf_info);
+			mutex_unlock(&reiser4_inode_data(inode)->mutex_write);
 			reiser4_exit_context(ctx);
 			return result;
 		}
@@ -1963,6 +1961,7 @@
 			result = check_pages_unix_file(file, inode);
 			if (result) {
 				drop_exclusive_access(uf_info);
+				mutex_unlock(&reiser4_inode_data(inode)->mutex_write);
 				reiser4_exit_context(ctx);
 				return result;
 			}
@@ -1977,6 +1976,7 @@
 	result = reiser4_grab_space_force(needed, BA_CAN_COMMIT);
 	if (result) {
 		drop_exclusive_access(uf_info);
+		mutex_unlock(&reiser4_inode_data(inode)->mutex_write);
 		reiser4_exit_context(ctx);
 		return result;
 	}
@@ -1988,6 +1988,7 @@
 	}
 
 	drop_exclusive_access(uf_info);
+	mutex_unlock(&reiser4_inode_data(inode)->mutex_write);
 	reiser4_exit_context(ctx);
 	return result;
 }
@@ -2369,6 +2370,7 @@
 	if (in_reiser4 == 0) {
 		uf_info = unix_file_inode_data(inode);
 
+		mutex_lock(&reiser4_inode_data(inode)->mutex_write);
 		get_exclusive_access(uf_info);
 		if (atomic_read(&file->f_dentry->d_count) == 1 &&
 		    uf_info->container == UF_CONTAINER_EXTENTS &&
@@ -2384,6 +2386,7 @@
 			}
 		}
 		drop_exclusive_access(uf_info);
+		mutex_unlock(&reiser4_inode_data(inode)->mutex_write);
 	} else {
 		/*
 		   we are within reiser4 context already. How latter is
@@ -2678,9 +2681,11 @@
 			return PTR_ERR(ctx);
 
 		uf_info = unix_file_inode_data(dentry->d_inode);
+		mutex_lock(&reiser4_inode_data(dentry->d_inode)->mutex_write);
 		get_exclusive_access(uf_info);
 		result = setattr_truncate(dentry->d_inode, attr);
 		drop_exclusive_access(uf_info);
+		mutex_unlock(&reiser4_inode_data(dentry->d_inode)->mutex_write);
 		context_set_commit_async(ctx);
 		reiser4_exit_context(ctx);
 	} else
diff -u -r --exclude='*.cmd' --exclude='*.o' --exclude='*.orig' --exclude='*.rej' linux-2.6.21-rc4-git7/fs/reiser4/super_ops.c linux-2.6.20.4/fs/reiser4/super_ops.c
--- linux-2.6.21-rc4-git7/fs/reiser4/super_ops.c	2007-03-23 14:40:53.818099768 +0300
+++ linux-2.6.20.4/fs/reiser4/super_ops.c	2007-04-06 04:02:40.000000000 +0400
@@ -44,6 +44,7 @@
 		 * etc. that will be added to our private inode part.
 		 */
 		INIT_LIST_HEAD(get_readdir_list(&info->vfs_inode));
+		mutex_init(&info->p.mutex_write);
 		init_rwsem(&info->p.conv_sem);
 		/* init semaphore which is used during inode loading */
 		loading_init_once(&info->p);
--- original/fs/reiser4/plugin/file/cryptcompress.c	2007-04-06 08:00:28.000000000 +0400
+++ current/fs/reiser4/plugin/file/cryptcompress.c	2007-04-06 10:20:25.000000000 +0400
@@ -2783,7 +2783,7 @@
 	if (IS_ERR(ctx))
 		return PTR_ERR(ctx);
 
- 	mutex_lock(&inode->i_mutex);
+ 	mutex_lock(&reiser4_inode_data(inode)->mutex_write);
 
 	result = generic_write_checks(file, &pos, &count, 0);
   	if (unlikely(result != 0))
@@ -2803,7 +2803,7 @@
   	/* update position in a file */
   	*off = pos + result;
  out:
-	mutex_unlock(&inode->i_mutex);
+	mutex_unlock(&reiser4_inode_data(inode)->mutex_write);
 
 	context_set_commit_async(ctx);
 	reiser4_exit_context(ctx);
--- original/fs/reiser4/plugin/item/extent_file_ops.c	2007-04-06 08:00:28.000000000 +0400
+++ current/fs/reiser4/plugin/item/extent_file_ops.c	2007-04-06 09:23:45.000000000 +0400
@@ -941,15 +941,15 @@
  * reiser4_write_extent - write method of extent item plugin
  * @file: file to write to
  * @buf: address of user-space buffer
- * @write_amount: number of bytes to write
- * @off: position in file to write to
+ * @count: number of bytes to write
+ * @pos: position in file to write to
  *
  */
 ssize_t reiser4_write_extent(struct file *file, const char __user *buf,
 			     size_t count, loff_t *pos)
 {
 	int have_to_update_extent;
-	int nr_pages;
+	int nr_pages, nr_dirty;
 	struct page *page;
 	jnode *jnodes[WRITE_GRANULARITY + 1];
 	struct inode *inode;
@@ -958,7 +958,7 @@
 	int i;
 	int to_page, page_off;
 	size_t left, written;
-	int result;
+	int result = 0;
 
 	inode = file->f_dentry->d_inode;
 	if (write_extent_reserve_space(inode))
@@ -972,10 +972,12 @@
 
 	BUG_ON(get_current_context()->trans->atom != NULL);
 
+	left = count;
 	index = *pos >> PAGE_CACHE_SHIFT;
 	/* calculate number of pages which are to be written */
       	end = ((*pos + count - 1) >> PAGE_CACHE_SHIFT);
 	nr_pages = end - index + 1;
+	nr_dirty = 0;
 	assert("", nr_pages <= WRITE_GRANULARITY + 1);
 
 	/* get pages and jnodes */
@@ -983,22 +985,18 @@
 		page = find_or_create_page(inode->i_mapping, index + i,
 					   reiser4_ctx_gfp_mask_get());
 		if (page == NULL) {
-			while(i --) {
-				unlock_page(jnode_page(jnodes[i]));
-				page_cache_release(jnode_page(jnodes[i]));
-			}
-			return RETERR(-ENOMEM);
+			nr_pages = i;
+			result = RETERR(-ENOMEM);
+			goto out;
 		}
 
 		jnodes[i] = jnode_of_page(page);
 		if (IS_ERR(jnodes[i])) {
 			unlock_page(page);
 			page_cache_release(page);
-			while (i --) {
-				jput(jnodes[i]);
-				page_cache_release(jnode_page(jnodes[i]));
-			}
-			return RETERR(-ENOMEM);
+			nr_pages = i;
+			result = RETERR(-ENOMEM);
+			goto out;
 		}
 		/* prevent jnode and page from disconnecting */
 		JF_SET(jnodes[i], JNODE_WRITE_PREPARED);
@@ -1009,7 +1007,6 @@
 
 	have_to_update_extent = 0;
 
-	left = count;
 	page_off = (*pos & (PAGE_CACHE_SIZE - 1));
 	for (i = 0; i < nr_pages; i ++) {
 		to_page = PAGE_CACHE_SIZE - page_off;
@@ -1052,12 +1049,19 @@
 		}
 
 		written = filemap_copy_from_user(page, page_off, buf, to_page);
+		if (unlikely(written != to_page)) {
+			unlock_page(page);
+			result = RETERR(-EFAULT);
+			break;
+		}
+
 		flush_dcache_page(page);
 		reiser4_set_page_dirty_internal(page);
 		unlock_page(page);
+		nr_dirty++;
+
 		mark_page_accessed(page);
 		SetPageUptodate(page);
-		page_cache_release(page);
 
 		if (jnodes[i]->blocknr == 0)
 			have_to_update_extent ++;
@@ -1069,25 +1073,29 @@
 	}
 
 	if (have_to_update_extent) {
-		update_extents(file, jnodes, nr_pages, *pos);
+		update_extents(file, jnodes, nr_dirty, *pos);
 	} else {
-		for (i = 0; i < nr_pages; i ++) {
+		for (i = 0; i < nr_dirty; i ++) {
+			int ret;
 			spin_lock_jnode(jnodes[i]);
-			result = reiser4_try_capture(jnodes[i],
+			ret = reiser4_try_capture(jnodes[i],
 						     ZNODE_WRITE_LOCK, 0);
-			BUG_ON(result != 0);
+			BUG_ON(ret != 0);
 			jnode_make_dirty_locked(jnodes[i]);
 			spin_unlock_jnode(jnodes[i]);
 		}
 	}
-
+out:
 	for (i = 0; i < nr_pages; i ++) {
+		page_cache_release(jnode_page(jnodes[i]));
 		JF_CLR(jnodes[i], JNODE_WRITE_PREPARED);
 		jput(jnodes[i]);
 	}
 
-	/* the only error handled so far is EFAULT on copy_from_user  */
-	return (count - left) ? (count - left) : -EFAULT;
+	/* the only errors handled so far is ENOMEM and
+	   EFAULT on copy_from_user  */
+
+	return (count - left) ? (count - left) : result;
 }
 
 static inline void zero_page(struct page *page)

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Stuff]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]     [Linux Resources]
  Powered by Linux