[PATCH] struct file cleanup : the very large file_ra_state is now allocated only on demand.

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Andi Kleen a écrit :


(because of the insane struct file_ra_state f_ra. I wish this structure were dynamically allocated only for files that really use it)


How about you submit a patch for that instead?
-Andi

OK, could you please comment this patch ?

The problem of dynamically allocating the readahead state data is that the allocation can fail and should not be fatal.
I made some choices that might be not good.

I also chose not to align "file_ra" slab on SLAB_HWCACHE_ALIGN because the object size is 10*sizeof(long), so alignment would loose 6*sizeof(long) bytes for each object.


[PATCH]

* struct file cleanup : the very large file_ra_state is now allocated only on demand, using a dedicated "file_ra" slab.
	64bits machines handling lot of sockets can save about 72 bytes per file.
* private_data : The field is moved close to f_count and f_op fields to speedup sockfd_lookups

Thank you

Signed-off-by: Eric Dumazet <[email protected]>



--- linux-2.6.13-rc6/include/linux/fs.h	2005-08-07 20:18:56.000000000 +0200
+++ linux-2.6.13-rc6-ed/include/linux/fs.h	2005-08-18 01:33:04.000000000 +0200
@@ -586,20 +586,19 @@
 	struct dentry		*f_dentry;
 	struct vfsmount         *f_vfsmnt;
 	struct file_operations	*f_op;
+	void			*private_data;
 	atomic_t		f_count;
 	unsigned int 		f_flags;
 	mode_t			f_mode;
 	loff_t			f_pos;
 	struct fown_struct	f_owner;
 	unsigned int		f_uid, f_gid;
-	struct file_ra_state	f_ra;
+	struct file_ra_state	*f_rap;
 
 	size_t			f_maxcount;
 	unsigned long		f_version;
 	void			*f_security;
 
-	/* needed for tty driver, and maybe others */
-	void			*private_data;
 
 #ifdef CONFIG_EPOLL
 	/* Used by fs/eventpoll.c to link all the hooks to this file */
@@ -1514,8 +1513,7 @@
 extern void do_generic_mapping_read(struct address_space *mapping,
 				    struct file_ra_state *, struct file *,
 				    loff_t *, read_descriptor_t *, read_actor_t);
-extern void
-file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping);
+extern struct file_ra_state *file_ra_state_init(struct file *);
 extern ssize_t generic_file_direct_IO(int rw, struct kiocb *iocb,
 	const struct iovec *iov, loff_t offset, unsigned long nr_segs);
 extern ssize_t generic_file_readv(struct file *filp, const struct iovec *iov, 
@@ -1549,8 +1547,10 @@
 					read_descriptor_t * desc,
 					read_actor_t actor)
 {
+	if (filp->f_rap == NULL)
+		file_ra_state_init(filp);
 	do_generic_mapping_read(filp->f_mapping,
-				&filp->f_ra,
+				filp->f_rap,
 				filp,
 				ppos,
 				desc,
--- linux-2.6.13-rc6/include/linux/slab.h	2005-08-07 20:18:56.000000000 +0200
+++ linux-2.6.13-rc6-ed/include/linux/slab.h	2005-08-18 00:37:59.000000000 +0200
@@ -125,6 +125,7 @@
 extern kmem_cache_t	*names_cachep;
 extern kmem_cache_t	*files_cachep;
 extern kmem_cache_t	*filp_cachep;
+extern kmem_cache_t	*ra_cachep;
 extern kmem_cache_t	*fs_cachep;
 extern kmem_cache_t	*signal_cachep;
 extern kmem_cache_t	*sighand_cachep;
--- linux-2.6.13-rc6/mm/readahead.c	2005-08-07 20:18:56.000000000 +0200
+++ linux-2.6.13-rc6-ed/mm/readahead.c	2005-08-18 01:14:11.000000000 +0200
@@ -29,14 +29,20 @@
 EXPORT_SYMBOL_GPL(default_backing_dev_info);
 
 /*
- * Initialise a struct file's readahead state.  Assumes that the caller has
- * memset *ra to zero.
+ * Initialise a struct file's readahead state.
  */
-void
-file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping)
+struct file_ra_state *
+file_ra_state_init(struct file *file)
 {
-	ra->ra_pages = mapping->backing_dev_info->ra_pages;
-	ra->prev_page = -1;
+	struct file_ra_state *ra = kmem_cache_alloc(ra_cachep, GFP_KERNEL);
+
+	if (ra != NULL) {
+		file->f_rap = ra;
+		memset(ra, 0, sizeof(*ra));
+		ra->ra_pages = file->f_mapping->host->i_mapping->backing_dev_info->ra_pages;
+		ra->prev_page = -1;
+	}
+	return ra;
 }
 
 /*
--- linux-2.6.13-rc6/mm/filemap.c	2005-08-07 20:18:56.000000000 +0200
+++ linux-2.6.13-rc6-ed/mm/filemap.c	2005-08-18 01:33:58.000000000 +0200
@@ -711,7 +711,7 @@
  * NULL.
  */
 void do_generic_mapping_read(struct address_space *mapping,
-			     struct file_ra_state *_ra,
+			     struct file_ra_state *_rap,
 			     struct file *filp,
 			     loff_t *ppos,
 			     read_descriptor_t *desc,
@@ -727,8 +727,15 @@
 	loff_t isize;
 	struct page *cached_page;
 	int error;
-	struct file_ra_state ra = *_ra;
+	struct file_ra_state ra;
 
+	if (likely(_rap != NULL))
+		ra = *_rap;
+	else {
+		memset(&ra, 0, sizeof(ra));
+		ra.prev_page = -1;
+		ra.ra_pages = default_backing_dev_info.ra_pages;
+	}
 	cached_page = NULL;
 	index = *ppos >> PAGE_CACHE_SHIFT;
 	next_index = index;
@@ -908,7 +915,8 @@
 	}
 
 out:
-	*_ra = ra;
+	if (_rap != NULL)
+		*_rap = ra;
 
 	*ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset;
 	if (cached_page)
@@ -1187,12 +1195,15 @@
 	int error;
 	struct file *file = area->vm_file;
 	struct address_space *mapping = file->f_mapping;
-	struct file_ra_state *ra = &file->f_ra;
+	struct file_ra_state *ra = file->f_rap;
 	struct inode *inode = mapping->host;
 	struct page *page;
 	unsigned long size, pgoff;
 	int did_readaround = 0, majmin = VM_FAULT_MINOR;
 
+	if (ra == NULL)
+		ra = file_ra_state_init(file);
+
 	pgoff = ((address-area->vm_start) >> PAGE_CACHE_SHIFT) + area->vm_pgoff;
 
 retry_all:
@@ -1225,14 +1236,16 @@
 			handle_ra_miss(mapping, ra, pgoff);
 			goto no_cached_page;
 		}
-		ra->mmap_miss++;
+		if (ra != NULL) {
+			ra->mmap_miss++;
 
-		/*
-		 * Do we miss much more than hit in this file? If so,
-		 * stop bothering with read-ahead. It will only hurt.
-		 */
-		if (ra->mmap_miss > ra->mmap_hit + MMAP_LOTSAMISS)
-			goto no_cached_page;
+			/*
+			 * Do we miss much more than hit in this file? If so,
+			 * stop bothering with read-ahead. It will only hurt.
+			 */
+			if (ra->mmap_miss > ra->mmap_hit + MMAP_LOTSAMISS)
+				goto no_cached_page;
+		}
 
 		/*
 		 * To keep the pgmajfault counter straight, we need to
@@ -1243,7 +1256,7 @@
 			inc_page_state(pgmajfault);
 		}
 		did_readaround = 1;
-		ra_pages = max_sane_readahead(file->f_ra.ra_pages);
+		ra_pages = max_sane_readahead(ra != NULL ? ra->ra_pages : default_backing_dev_info.ra_pages);
 		if (ra_pages) {
 			pgoff_t start = 0;
 
@@ -1256,7 +1269,7 @@
 			goto no_cached_page;
 	}
 
-	if (!did_readaround)
+	if (!did_readaround && ra != NULL)
 		ra->mmap_hit++;
 
 	/*
--- linux-2.6.13-rc6/mm/fadvise.c	2005-08-07 20:18:56.000000000 +0200
+++ linux-2.6.13-rc6-ed/mm/fadvise.c	2005-08-18 01:05:15.000000000 +0200
@@ -28,6 +28,7 @@
 	struct file *file = fget(fd);
 	struct address_space *mapping;
 	struct backing_dev_info *bdi;
+	struct file_ra_state *ra;
 	loff_t endbyte;
 	pgoff_t start_index;
 	pgoff_t end_index;
@@ -54,15 +55,20 @@
 
 	bdi = mapping->backing_dev_info;
 
+	if ((ra = file->f_rap) == NULL)
+		ra = file_ra_state_init(file);
 	switch (advice) {
 	case POSIX_FADV_NORMAL:
-		file->f_ra.ra_pages = bdi->ra_pages;
+		if (ra != NULL)
+			ra->ra_pages = bdi->ra_pages;
 		break;
 	case POSIX_FADV_RANDOM:
-		file->f_ra.ra_pages = 0;
+		if (ra != NULL)
+			ra->ra_pages = 0;
 		break;
 	case POSIX_FADV_SEQUENTIAL:
-		file->f_ra.ra_pages = bdi->ra_pages * 2;
+		if (ra != NULL)
+			ra->ra_pages = bdi->ra_pages * 2;
 		break;
 	case POSIX_FADV_WILLNEED:
 	case POSIX_FADV_NOREUSE:
--- linux-2.6.13-rc6/fs/open.c	2005-08-07 20:18:56.000000000 +0200
+++ linux-2.6.13-rc6-ed/fs/open.c	2005-08-18 01:05:15.000000000 +0200
@@ -804,7 +804,7 @@
 	}
 	f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
 
-	file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping);
+	f->f_rap = NULL;
 
 	/* NB: we're sure to have correct a_ops only after f_op->open */
 	if (f->f_flags & O_DIRECT) {
--- linux-2.6.13-rc6/fs/file_table.c	2005-08-07 20:18:56.000000000 +0200
+++ linux-2.6.13-rc6-ed/fs/file_table.c	2005-08-18 00:37:59.000000000 +0200
@@ -55,6 +55,8 @@
 
 static inline void file_free(struct file *f)
 {
+	if (f->f_rap != NULL)
+		kmem_cache_free(ra_cachep, f->f_rap);
 	kmem_cache_free(filp_cachep, f);
 }
 
--- linux-2.6.13-rc6/fs/dcache.c	2005-08-07 20:18:56.000000000 +0200
+++ linux-2.6.13-rc6-ed/fs/dcache.c	2005-08-18 02:22:56.000000000 +0200
@@ -1705,6 +1705,8 @@
 
 /* SLAB cache for file structures */
 kmem_cache_t *filp_cachep;
+/* SLAB cache for ra structures */
+kmem_cache_t *ra_cachep;
 
 EXPORT_SYMBOL(d_genocide);
 
@@ -1733,6 +1735,9 @@
 	filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0,
 			SLAB_HWCACHE_ALIGN|SLAB_PANIC, filp_ctor, filp_dtor);
 
+	ra_cachep = kmem_cache_create("file_ra", sizeof(struct file_ra_state), 0,
+			SLAB_PANIC, NULL, NULL);
+
 	dcache_init(mempages);
 	inode_init(mempages);
 	files_init(mempages);

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]
  Powered by Linux