[Ubuntu PATCH] fix VFS nr_files accounting

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



lkml discussion: http://thread.gmane.org/gmane.linux.kernel/385438/focus=385478

Already in -mm?

From: Dipankar Sarma <[email protected]>

Ubuntu patch location:
http://www.kernel.org/git/?p=linux/kernel/git/bcollins/ubuntu-dapper.git;a=commitdiff;h=5ce2ed3a63172c6ce0b97069e449960c2d538623

---
 fs/dcache.c              |    2 -
 fs/file_table.c          |   86 +++++++++++++++++------------------------------
 include/linux/file.h     |    2 +
 include/linux/fs.h       |    1 
 include/linux/rcupdate.h |    6 ---
 kernel/rcupdate.c        |   49 +++++++++++++-------------
 kernel/sysctl.c          |    5 --
 net/unix/af_unix.c       |    2 -
 8 files changed, 62 insertions(+), 91 deletions(-)

--- linux-2617-g21.orig/fs/dcache.c
+++ linux-2617-g21/fs/dcache.c
@@ -1765,7 +1765,7 @@ void __init vfs_caches_init(unsigned lon
 			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
 
 	filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0,
-			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
+			SLAB_HWCACHE_ALIGN|SLAB_PANIC, filp_ctor, filp_dtor);
 
 	dcache_init(mempages);
 	inode_init(mempages);
--- linux-2617-g21.orig/fs/file_table.c
+++ linux-2617-g21/fs/file_table.c
@@ -19,67 +19,52 @@
 #include <linux/capability.h>
 #include <linux/cdev.h>
 #include <linux/fsnotify.h>
-#include <linux/sysctl.h>
-#include <linux/percpu_counter.h>
-
-#include <asm/atomic.h>
 
 /* sysctl tunables... */
 struct files_stat_struct files_stat = {
 	.max_files = NR_FILE
 };
 
-/* public. Not pretty! */
-__cacheline_aligned_in_smp DEFINE_SPINLOCK(files_lock);
-
-static struct percpu_counter nr_files __cacheline_aligned_in_smp;
+EXPORT_SYMBOL(files_stat); /* Needed by unix.o */
 
-static inline void file_free_rcu(struct rcu_head *head)
-{
-	struct file *f =  container_of(head, struct file, f_u.fu_rcuhead);
-	kmem_cache_free(filp_cachep, f);
-}
+/* public. Not pretty! */
+ __cacheline_aligned_in_smp DEFINE_SPINLOCK(files_lock);
 
-static inline void file_free(struct file *f)
-{
-	percpu_counter_dec(&nr_files);
-	call_rcu(&f->f_u.fu_rcuhead, file_free_rcu);
-}
+static DEFINE_SPINLOCK(filp_count_lock);
 
-/*
- * Return the total number of open files in the system
+/* slab constructors and destructors are called from arbitrary
+ * context and must be fully threaded - use a local spinlock
+ * to protect files_stat.nr_files
  */
-static int get_nr_files(void)
+void filp_ctor(void *objp, struct kmem_cache *cachep, unsigned long cflags)
 {
-	return percpu_counter_read_positive(&nr_files);
+	if ((cflags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
+	    SLAB_CTOR_CONSTRUCTOR) {
+		unsigned long flags;
+		spin_lock_irqsave(&filp_count_lock, flags);
+		files_stat.nr_files++;
+		spin_unlock_irqrestore(&filp_count_lock, flags);
+	}
 }
 
-/*
- * Return the maximum number of open files in the system
- */
-int get_max_files(void)
+void filp_dtor(void *objp, struct kmem_cache *cachep, unsigned long dflags)
 {
-	return files_stat.max_files;
+	unsigned long flags;
+	spin_lock_irqsave(&filp_count_lock, flags);
+	files_stat.nr_files--;
+	spin_unlock_irqrestore(&filp_count_lock, flags);
 }
-EXPORT_SYMBOL_GPL(get_max_files);
 
-/*
- * Handle nr_files sysctl
- */
-#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
-int proc_nr_files(ctl_table *table, int write, struct file *filp,
-                     void __user *buffer, size_t *lenp, loff_t *ppos)
+static inline void file_free_rcu(struct rcu_head *head)
 {
-	files_stat.nr_files = get_nr_files();
-	return proc_dointvec(table, write, filp, buffer, lenp, ppos);
+	struct file *f =  container_of(head, struct file, f_u.fu_rcuhead);
+	kmem_cache_free(filp_cachep, f);
 }
-#else
-int proc_nr_files(ctl_table *table, int write, struct file *filp,
-                     void __user *buffer, size_t *lenp, loff_t *ppos)
+
+static inline void file_free(struct file *f)
 {
-	return -ENOSYS;
+	call_rcu(&f->f_u.fu_rcuhead, file_free_rcu);
 }
-#endif
 
 /* Find an unused file structure and return a pointer to it.
  * Returns NULL, if there are no more free file structures or
@@ -94,20 +79,14 @@ struct file *get_empty_filp(void)
 	/*
 	 * Privileged users can go above max_files
 	 */
-	if (get_nr_files() >= files_stat.max_files && !capable(CAP_SYS_ADMIN)) {
-		/*
-		 * percpu_counters are inaccurate.  Do an expensive check before
-		 * we go and fail.
-		 */
-		if (percpu_counter_sum(&nr_files) >= files_stat.max_files)
-			goto over;
-	}
+	if (files_stat.nr_files >= files_stat.max_files &&
+				!capable(CAP_SYS_ADMIN))
+		goto over;
 
 	f = kmem_cache_alloc(filp_cachep, GFP_KERNEL);
 	if (f == NULL)
 		goto fail;
 
-	percpu_counter_inc(&nr_files);
 	memset(f, 0, sizeof(*f));
 	if (security_file_alloc(f))
 		goto fail_sec;
@@ -124,10 +103,10 @@ struct file *get_empty_filp(void)
 
 over:
 	/* Ran out of filps - report that */
-	if (get_nr_files() > old_max) {
+	if (files_stat.nr_files > old_max) {
 		printk(KERN_INFO "VFS: file-max limit %d reached\n",
-					get_max_files());
-		old_max = get_nr_files();
+					files_stat.max_files);
+		old_max = files_stat.nr_files;
 	}
 	goto fail;
 
@@ -299,5 +278,4 @@ void __init files_init(unsigned long mem
 	if (files_stat.max_files < NR_FILE)
 		files_stat.max_files = NR_FILE;
 	files_defer_init();
-	percpu_counter_init(&nr_files, 0);
 } 
--- linux-2617-g21.orig/include/linux/file.h
+++ linux-2617-g21/include/linux/file.h
@@ -80,6 +80,8 @@ extern void put_filp(struct file *);
 extern int get_unused_fd(void);
 extern void FASTCALL(put_unused_fd(unsigned int fd));
 struct kmem_cache;
+extern void filp_ctor(void * objp, struct kmem_cache *cachep, unsigned long cflags);
+extern void filp_dtor(void * objp, struct kmem_cache *cachep, unsigned long dflags);
 
 extern struct file ** alloc_fd_array(int);
 extern void free_fd_array(struct file **, int);
--- linux-2617-g21.orig/include/linux/fs.h
+++ linux-2617-g21/include/linux/fs.h
@@ -34,7 +34,6 @@ struct files_stat_struct {
 	int max_files;		/* tunable */
 };
 extern struct files_stat_struct files_stat;
-extern int get_max_files(void);
 
 struct inodes_stat_t {
 	int nr_inodes;
--- linux-2617-g21.orig/include/linux/rcupdate.h
+++ linux-2617-g21/include/linux/rcupdate.h
@@ -98,17 +98,13 @@ struct rcu_data {
 	long  	       	batch;           /* Batch # for current RCU batch */
 	struct rcu_head *nxtlist;
 	struct rcu_head **nxttail;
-	long            qlen; 	 	 /* # of queued callbacks */
+	long            count; /* # of queued items */
 	struct rcu_head *curlist;
 	struct rcu_head **curtail;
 	struct rcu_head *donelist;
 	struct rcu_head **donetail;
-	long		blimit;		 /* Upper limit on a processed batch */
 	int cpu;
 	struct rcu_head barrier;
-#ifdef CONFIG_SMP
-	long		last_rs_qlen;	 /* qlen during the last resched */
-#endif
 };
 
 DECLARE_PER_CPU(struct rcu_data, rcu_data);
--- linux-2617-g21.orig/kernel/rcupdate.c
+++ linux-2617-g21/kernel/rcupdate.c
@@ -66,8 +66,14 @@ static struct rcu_ctrlblk rcu_bh_ctrlblk
 DEFINE_PER_CPU(struct rcu_data, rcu_data) = { 0L };
 DEFINE_PER_CPU(struct rcu_data, rcu_bh_data) = { 0L };
 
+static atomic_t rcu_barrier_cpu_count;
+static DEFINE_MUTEX(rcu_barrier_mutex);
+static struct completion rcu_barrier_completion;
+
 /* Fake initialization required by compiler */
 static DEFINE_PER_CPU(struct tasklet_struct, rcu_tasklet) = {NULL};
+static int maxbatch = 10000;
+#if 0
 static int blimit = 10;
 static int qhimark = 10000;
 static int qlowmark = 100;
@@ -75,10 +81,6 @@ static int qlowmark = 100;
 static int rsinterval = 1000;
 #endif
 
-static atomic_t rcu_barrier_cpu_count;
-static DEFINE_MUTEX(rcu_barrier_mutex);
-static struct completion rcu_barrier_completion;
-
 #ifdef CONFIG_SMP
 static void force_quiescent_state(struct rcu_data *rdp,
 			struct rcu_ctrlblk *rcp)
@@ -105,6 +107,7 @@ static inline void force_quiescent_state
 	set_need_resched();
 }
 #endif
+#endif
 
 /**
  * call_rcu - Queue an RCU callback for invocation after a grace period.
@@ -129,13 +132,17 @@ void fastcall call_rcu(struct rcu_head *
 	rdp = &__get_cpu_var(rcu_data);
 	*rdp->nxttail = head;
 	rdp->nxttail = &head->next;
-	if (unlikely(++rdp->qlen > qhimark)) {
-		rdp->blimit = INT_MAX;
-		force_quiescent_state(rdp, &rcu_ctrlblk);
-	}
+
+	if (unlikely(++rdp->count > 10000))
+		set_need_resched();
+
 	local_irq_restore(flags);
 }
 
+static atomic_t rcu_barrier_cpu_count;
+static struct semaphore rcu_barrier_sema;
+static struct completion rcu_barrier_completion;
+
 /**
  * call_rcu_bh - Queue an RCU for invocation after a quicker grace period.
  * @head: structure to be used for queueing the RCU updates.
@@ -164,12 +171,12 @@ void fastcall call_rcu_bh(struct rcu_hea
 	rdp = &__get_cpu_var(rcu_bh_data);
 	*rdp->nxttail = head;
 	rdp->nxttail = &head->next;
-
-	if (unlikely(++rdp->qlen > qhimark)) {
-		rdp->blimit = INT_MAX;
-		force_quiescent_state(rdp, &rcu_bh_ctrlblk);
-	}
-
+	rdp->count++;
+/*
+ *  Should we directly call rcu_do_batch() here ?
+ *  if (unlikely(rdp->count > 10000))
+ *      rcu_do_batch(rdp);
+ */
 	local_irq_restore(flags);
 }
 
@@ -241,12 +248,10 @@ static void rcu_do_batch(struct rcu_data
 		next = rdp->donelist = list->next;
 		list->func(list);
 		list = next;
-		rdp->qlen--;
-		if (++count >= rdp->blimit)
+		rdp->count--;
+		if (++count >= maxbatch)
 			break;
 	}
-	if (rdp->blimit == INT_MAX && rdp->qlen <= qlowmark)
-		rdp->blimit = blimit;
 	if (!rdp->donelist)
 		rdp->donetail = &rdp->donelist;
 	else
@@ -535,7 +540,6 @@ static void rcu_init_percpu_data(int cpu
 	rdp->quiescbatch = rcp->completed;
 	rdp->qs_pending = 0;
 	rdp->cpu = cpu;
-	rdp->blimit = blimit;
 }
 
 static void __devinit rcu_online_cpu(int cpu)
@@ -621,12 +625,7 @@ void synchronize_rcu(void)
 	wait_for_completion(&rcu.completion);
 }
 
-module_param(blimit, int, 0);
-module_param(qhimark, int, 0);
-module_param(qlowmark, int, 0);
-#ifdef CONFIG_SMP
-module_param(rsinterval, int, 0);
-#endif
+module_param(maxbatch, int, 0);
 EXPORT_SYMBOL_GPL(rcu_batches_completed);
 EXPORT_SYMBOL_GPL(rcu_batches_completed_bh);
 EXPORT_SYMBOL_GPL(call_rcu);
--- linux-2617-g21.orig/kernel/sysctl.c
+++ linux-2617-g21/kernel/sysctl.c
@@ -49,9 +49,6 @@
 #include <asm/uaccess.h>
 #include <asm/processor.h>
 
-extern int proc_nr_files(ctl_table *table, int write, struct file *filp,
-                     void __user *buffer, size_t *lenp, loff_t *ppos);
-
 #if defined(CONFIG_SYSCTL)
 
 /* External variables not in a header file. */
@@ -971,7 +968,7 @@ static ctl_table fs_table[] = {
 		.data		= &files_stat,
 		.maxlen		= 3*sizeof(int),
 		.mode		= 0444,
-		.proc_handler	= &proc_nr_files,
+		.proc_handler	= &proc_dointvec,
 	},
 	{
 		.ctl_name	= FS_MAXFILE,
--- linux-2617-g21.orig/net/unix/af_unix.c
+++ linux-2617-g21/net/unix/af_unix.c
@@ -570,7 +570,7 @@ static struct sock * unix_create1(struct
 	struct sock *sk = NULL;
 	struct unix_sock *u;
 
-	if (atomic_read(&unix_nr_socks) >= 2*get_max_files())
+	if (atomic_read(&unix_nr_socks) >= 2*files_stat.max_files)
 		goto out;
 
 	sk = sk_alloc(PF_UNIX, GFP_KERNEL, &unix_proto, 1);

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Stuff]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]     [Linux Resources]
  Powered by Linux