[EXPERIMENT] Add new "flush" option

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This adds new "flush" option for hotplug devices.

Current implementation of "flush" option does,

	- synchronizing data pages at ->release() (last close(2))
	- if user's work seems to be done (fs is not active), all
	  metadata syncs by pdflush()

This option would provide kind of sane progress, and dirty buffers is
flushed more frequently (if fs is not active).  This option doesn't
provide any robustness (robustness is provided by other options), but
probably the option is proper for hotplug devices.

(Please don't assume that dirty buffers is synchronized at any point.
This implementation will be changed easily.)
-- 
OGAWA Hirofumi <[email protected]>


Signed-off-by: OGAWA Hirofumi <[email protected]>
---

 fs/Makefile        |    2 -
 fs/flush.c         |   94 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/fs-writeback.c  |    1 
 fs/namespace.c     |    1 
 fs/super.c         |    3 +
 include/linux/fs.h |   21 ++++++++++-
 6 files changed, 119 insertions(+), 3 deletions(-)

diff -puN fs/Makefile~add-flush_option fs/Makefile
--- linux-2.6/fs/Makefile~add-flush_option	2005-12-24 12:46:59.000000000 +0900
+++ linux-2.6-hirofumi/fs/Makefile	2005-12-24 12:46:59.000000000 +0900
@@ -10,7 +10,7 @@ obj-y :=	open.o read_write.o file_table.
 		ioctl.o readdir.o select.o fifo.o locks.o dcache.o inode.o \
 		attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \
 		seq_file.o xattr.o libfs.o fs-writeback.o mpage.o direct-io.o \
-		ioprio.o pnode.o
+		ioprio.o pnode.o flush.o
 
 obj-$(CONFIG_INOTIFY)		+= inotify.o
 obj-$(CONFIG_EPOLL)		+= eventpoll.o
diff -puN /dev/null fs/flush.c
--- /dev/null	2005-12-25 04:55:31.716482250 +0900
+++ linux-2.6-hirofumi/fs/flush.c	2005-12-24 12:46:59.000000000 +0900
@@ -0,0 +1,94 @@
+/*
+ * Copyright (C) 2005, OGAWA Hirofumi
+ * Released under GPL v2.
+ */
+
+#include <linux/fs.h>
+#include <linux/blkdev.h>
+#include <linux/writeback.h>
+#include <linux/buffer_head.h>
+
+#define FLUSH_INITAL_DELAY	HZ
+#define FLUSH_DELAY		(HZ / 2)
+
+int fs_flush_sync_fdata(struct inode *inode, struct file *filp)
+{
+	int err = 0;
+
+	if (IS_FLUSH(inode) && filp->f_mode & FMODE_WRITE) {
+		current->flags |= PF_SYNCWRITE;
+		err = filemap_write_and_wait(inode->i_mapping);
+		current->flags &= ~PF_SYNCWRITE;
+	}
+	return err;
+}
+EXPORT_SYMBOL(fs_flush_sync_fdata);
+
+static void fs_flush_pdflush_handler(unsigned long arg)
+{
+	struct super_block *sb = (struct super_block *)arg;
+	fsync_super(sb);
+	up_read(&sb->s_umount);
+}
+
+static void fs_flush_timer(unsigned long data)
+{
+	struct super_block *sb = (struct super_block *)data;
+	struct backing_dev_info *bdi = blk_get_backing_dev_info(sb->s_bdev);
+	unsigned long last_flush_jiff;
+	int err;
+
+	if (bdi_write_congested(bdi)) {
+		mod_timer(&sb->flush_timer, jiffies + (HZ / 10));
+		return;
+	}
+
+	last_flush_jiff = sb->last_flush_jiff;
+
+	if (!time_after_eq(jiffies, last_flush_jiff + FLUSH_DELAY)) {
+		mod_timer(&sb->flush_timer, last_flush_jiff + FLUSH_DELAY);
+		return;
+	}
+
+	if (down_read_trylock(&sb->s_umount)) {
+		if (sb->s_root) {
+			err = pdflush_operation(fs_flush_pdflush_handler, data);
+			if (!err)
+				return;
+			mod_timer(&sb->flush_timer, jiffies + FLUSH_DELAY);
+		}
+		up_read(&sb->s_umount);
+	}
+}
+
+void __fs_mark_flush(struct super_block *sb)
+{
+	sb->last_flush_jiff = jiffies;
+	/*
+	 * make sure by smb_wmb() that dirty buffers before here is
+	 * processed at the timer routine.
+	 */
+	smp_wmb();
+
+	if (!timer_pending(&sb->flush_timer))
+		mod_timer(&sb->flush_timer, jiffies + FLUSH_INITAL_DELAY);
+}
+EXPORT_SYMBOL(__fs_mark_flush);
+
+/*
+ * caller must take down_write(sb->s_umount), otherwise pdflush
+ * handler may be run after this del_timer_sync.
+ */
+void fs_flush_stop(struct super_block *sb)
+{
+	sb->s_flags &= ~MS_FLUSH;
+	del_timer_sync(&sb->flush_timer);
+}
+
+void fs_flush_init(struct super_block *sb)
+{
+	init_timer(&sb->flush_timer);
+	sb->flush_timer.data = (unsigned long)sb;
+	sb->flush_timer.function = fs_flush_timer;
+	sb->last_flush_jiff = 0;
+}
diff -puN fs/fs-writeback.c~add-flush_option fs/fs-writeback.c
--- linux-2.6/fs/fs-writeback.c~add-flush_option	2005-12-24 12:46:59.000000000 +0900
+++ linux-2.6-hirofumi/fs/fs-writeback.c	2005-12-24 12:46:59.000000000 +0900
@@ -63,6 +63,7 @@ void __mark_inode_dirty(struct inode *in
 	if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
 		if (sb->s_op->dirty_inode)
 			sb->s_op->dirty_inode(inode);
+		fs_mark_flush(sb);
 	}
 
 	/*
diff -puN fs/namespace.c~add-flush_option fs/namespace.c
--- linux-2.6/fs/namespace.c~add-flush_option	2005-12-24 12:46:59.000000000 +0900
+++ linux-2.6-hirofumi/fs/namespace.c	2005-12-24 12:46:59.000000000 +0900
@@ -354,6 +354,7 @@ static int show_vfsmnt(struct seq_file *
 	} fs_info[] = {
 		{ MS_SYNCHRONOUS, ",sync" },
 		{ MS_DIRSYNC, ",dirsync" },
+		{ MS_FLUSH, ",flush" },
 		{ MS_MANDLOCK, ",mand" },
 		{ MS_NOATIME, ",noatime" },
 		{ MS_NODIRATIME, ",nodiratime" },
diff -puN fs/super.c~add-flush_option fs/super.c
--- linux-2.6/fs/super.c~add-flush_option	2005-12-24 12:46:59.000000000 +0900
+++ linux-2.6-hirofumi/fs/super.c	2005-12-24 12:46:59.000000000 +0900
@@ -86,6 +86,7 @@ static struct super_block *alloc_super(v
 		s->s_qcop = sb_quotactl_ops;
 		s->s_op = &default_op;
 		s->s_time_gran = 1000000000;
+		fs_flush_init(s);
 	}
 out:
 	return s;
@@ -230,6 +231,7 @@ void generic_shutdown_super(struct super
 
 	if (root) {
 		sb->s_root = NULL;
+		fs_flush_stop(sb);
 		shrink_dcache_parent(root);
 		shrink_dcache_anon(&sb->s_anon);
 		dput(root);
@@ -536,6 +538,7 @@ int do_remount_sb(struct super_block *sb
 		return -EACCES;
 	if (flags & MS_RDONLY)
 		acct_auto_close(sb);
+	fs_flush_stop(sb);
 	shrink_dcache_sb(sb);
 	fsync_super(sb);
 
diff -puN include/linux/fs.h~add-flush_option include/linux/fs.h
--- linux-2.6/include/linux/fs.h~add-flush_option	2005-12-24 12:46:59.000000000 +0900
+++ linux-2.6-hirofumi/include/linux/fs.h	2005-12-24 12:46:59.000000000 +0900
@@ -98,6 +98,7 @@ extern int dir_notify_enable;
 #define MS_REMOUNT	32	/* Alter flags of a mounted FS */
 #define MS_MANDLOCK	64	/* Allow mandatory locks on an FS */
 #define MS_DIRSYNC	128	/* Directory modifications are synchronous */
+#define MS_FLUSH	256	/* Frequently flushes for hotplug devices */
 #define MS_NOATIME	1024	/* Do not update access times. */
 #define MS_NODIRATIME	2048	/* Do not update directory access times */
 #define MS_BIND		4096
@@ -115,8 +116,8 @@ extern int dir_notify_enable;
 /*
  * Superblock flags that can be altered by MS_REMOUNT
  */
-#define MS_RMT_MASK	(MS_RDONLY|MS_SYNCHRONOUS|MS_MANDLOCK|MS_NOATIME|\
-			 MS_NODIRATIME)
+#define MS_RMT_MASK	(MS_RDONLY|MS_SYNCHRONOUS|MS_MANDLOCK|MS_FLUSH| \
+			 MS_NOATIME|MS_NODIRATIME)
 
 /*
  * Old magic mount flag and mask
@@ -157,6 +158,7 @@ extern int dir_notify_enable;
 					((inode)->i_flags & S_SYNC))
 #define IS_DIRSYNC(inode)	(__IS_FLG(inode, MS_SYNCHRONOUS|MS_DIRSYNC) || \
 					((inode)->i_flags & (S_SYNC|S_DIRSYNC)))
+#define IS_FLUSH(inode)		__IS_FLG(inode, MS_FLUSH)
 #define IS_MANDLOCK(inode)	__IS_FLG(inode, MS_MANDLOCK)
 
 #define IS_NOQUOTA(inode)	((inode)->i_flags & S_NOQUOTA)
@@ -222,6 +224,7 @@ extern int dir_notify_enable;
 #include <linux/prio_tree.h>
 #include <linux/init.h>
 #include <linux/sched.h>
+#include <linux/timer.h>
 
 #include <asm/atomic.h>
 #include <asm/semaphore.h>
@@ -826,6 +829,9 @@ struct super_block {
 	/* Granuality of c/m/atime in ns.
 	   Cannot be worse than a second */
 	u32		   s_time_gran;
+
+	struct timer_list flush_timer;
+	unsigned long last_flush_jiff;
 };
 
 extern struct timespec current_fs_time(struct super_block *sb);
@@ -1099,6 +1105,17 @@ static inline void file_accessed(struct 
 		touch_atime(file->f_vfsmnt, file->f_dentry);
 }
 
+int fs_flush_sync_fdata(struct inode *inode, struct file *filp);
+void __fs_mark_flush(struct super_block *sb);
+void fs_flush_stop(struct super_block *sb);
+void fs_flush_init(struct super_block *sb);
+
+static inline void fs_mark_flush(struct super_block *sb)
+{
+	if (sb->s_flags & MS_FLUSH)
+		__fs_mark_flush(sb);
+}
+
 int sync_inode(struct inode *inode, struct writeback_control *wbc);
 
 /**
_




Signed-off-by: OGAWA Hirofumi <[email protected]>
---

 fs/fat/file.c |    5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff -puN fs/fat/file.c~fat_flush-support fs/fat/file.c
--- linux-2.6/fs/fat/file.c~fat_flush-support	2005-12-25 04:58:16.000000000 +0900
+++ linux-2.6-hirofumi/fs/fat/file.c	2005-12-25 04:58:16.000000000 +0900
@@ -119,6 +119,7 @@ struct file_operations fat_file_operatio
 	.writev		= generic_file_writev,
 	.aio_read	= generic_file_aio_read,
 	.aio_write	= generic_file_aio_write,
+	.release	= fs_flush_sync_fdata,
 	.mmap		= generic_file_mmap,
 	.ioctl		= fat_generic_ioctl,
 	.fsync		= file_fsync,
@@ -272,7 +273,8 @@ static int fat_free(struct inode *inode,
 
 void fat_truncate(struct inode *inode)
 {
-	struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb);
+	struct super_block *sb = inode->i_sb;
+	struct msdos_sb_info *sbi = MSDOS_SB(sb);
 	const unsigned int cluster_size = sbi->cluster_size;
 	int nr_clusters;
 
@@ -288,6 +290,7 @@ void fat_truncate(struct inode *inode)
 	lock_kernel();
 	fat_free(inode, nr_clusters);
 	unlock_kernel();
+	fs_mark_flush(sb);
 }
 
 struct inode_operations fat_file_inode_operations = {
_




Signed-off-by: OGAWA Hirofumi <[email protected]>
---

 fs/fat/inode.c |    8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff -puN fs/fat/inode.c~fat_flush-debug fs/fat/inode.c
--- linux-2.6/fs/fat/inode.c~fat_flush-debug	2005-12-25 04:58:19.000000000 +0900
+++ linux-2.6-hirofumi/fs/fat/inode.c	2005-12-25 04:58:19.000000000 +0900
@@ -855,7 +855,7 @@ static int fat_show_options(struct seq_f
 enum {
 	Opt_check_n, Opt_check_r, Opt_check_s, Opt_uid, Opt_gid,
 	Opt_umask, Opt_dmask, Opt_fmask, Opt_codepage, Opt_nocase,
-	Opt_quiet, Opt_showexec, Opt_debug, Opt_immutable,
+	Opt_flush, Opt_quiet, Opt_showexec, Opt_debug, Opt_immutable,
 	Opt_dots, Opt_nodots,
 	Opt_charset, Opt_shortname_lower, Opt_shortname_win95,
 	Opt_shortname_winnt, Opt_shortname_mixed, Opt_utf8_no, Opt_utf8_yes,
@@ -877,6 +877,7 @@ static match_table_t fat_tokens = {
 	{Opt_fmask, "fmask=%o"},
 	{Opt_codepage, "codepage=%u"},
 	{Opt_nocase, "nocase"},
+	{Opt_flush, "flush"},
 	{Opt_quiet, "quiet"},
 	{Opt_showexec, "showexec"},
 	{Opt_debug, "debug"},
@@ -991,6 +992,9 @@ static int parse_options(char *options, 
 					| VFAT_SFN_CREATE_WIN95;
 			}
 			break;
+		case Opt_flush:
+			*debug = 1;
+			break;
 		case Opt_quiet:
 			opts->quiet = 1;
 			break;
@@ -1182,6 +1186,8 @@ int fat_fill_super(struct super_block *s
 	error = parse_options(data, isvfat, silent, &debug, &sbi->options);
 	if (error)
 		goto out_fail;
+	if (debug)
+		sb->s_flags |= MS_FLUSH;
 
 	error = -EIO;
 	sb_min_blocksize(sb, 512);
_
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Stuff]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]     [Linux Resources]
  Powered by Linux