Re: [2.6.14-rt1] slowdown / oops.

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



* Rusty Russell <[email protected]> wrote:

> > -	local_bh_disable();
> > +	read_lock_bh(&ip_conntrack_lock);

> This kind of change is troubling.  I suppose we could go to per-cpu 
> locks, but it's still a loss.

yeah, that's the right solution i think - with a small twist that should 
address your remaining concern: in the -rt tree there is already an API 
variant that gives us per-cpu locks on PREEMPT_RT but which maps to a 
plain per-cpu data structure on non-PREEMPT_RT builds. The idea is to 
pick a CPU once, and to stick to that choice during the critical 
section. This gives us nice per-CPU scalability, keeps the locks 
finegrained so that are no unnecessary interactions between critical 
sections - and keeps things fully preemptible at a small cost.

the patch below shows the API usage - it's pretty straightforward, and 
this has solved the bug too. Conversion is easy: all places that needed 
changing were triggered at build-time, and once converted the first 
attempt booted and worked just fine.

I dont yet want to submit this approach upstream because right now only 
PREEMPT_RT needs it, but what do you think about the approach? It could 
be used by the upstream kernel too, to enable the use of PER_CPU data 
structures in cases where preemption can not always be avoided.

	Ingo

Index: linux/include/linux/netfilter_ipv4/ip_conntrack.h
===================================================================
--- linux.orig/include/linux/netfilter_ipv4/ip_conntrack.h
+++ linux/include/linux/netfilter_ipv4/ip_conntrack.h
@@ -460,10 +460,8 @@ struct ip_conntrack_ecache {
 	struct ip_conntrack *ct;
 	unsigned int events;
 };
-DECLARE_PER_CPU(struct ip_conntrack_ecache, ip_conntrack_ecache);
+DECLARE_PER_CPU_LOCKED(struct ip_conntrack_ecache, ip_conntrack_ecache);
 
-#define CONNTRACK_ECACHE(x)	(__get_cpu_var(ip_conntrack_ecache).x)
- 
 extern struct notifier_block *ip_conntrack_chain;
 extern struct notifier_block *ip_conntrack_expect_chain;
 
@@ -498,12 +496,14 @@ ip_conntrack_event_cache(enum ip_conntra
 {
 	struct ip_conntrack *ct = (struct ip_conntrack *)skb->nfct;
 	struct ip_conntrack_ecache *ecache;
-	
+	int cpu;
+
 	local_bh_disable();
-	ecache = &__get_cpu_var(ip_conntrack_ecache);
+	ecache = &get_cpu_var_locked(ip_conntrack_ecache, &cpu);
 	if (ct != ecache->ct)
 		__ip_ct_event_cache_init(ct);
 	ecache->events |= event;
+	put_cpu_var_locked(ip_conntrack_ecache, cpu);
 	local_bh_enable();
 }
 
Index: linux/net/ipv4/netfilter/ip_conntrack_core.c
===================================================================
--- linux.orig/net/ipv4/netfilter/ip_conntrack_core.c
+++ linux/net/ipv4/netfilter/ip_conntrack_core.c
@@ -83,7 +83,7 @@ static unsigned int ip_conntrack_expect_
 struct notifier_block *ip_conntrack_chain;
 struct notifier_block *ip_conntrack_expect_chain;
 
-DEFINE_PER_CPU(struct ip_conntrack_ecache, ip_conntrack_ecache);
+DEFINE_PER_CPU_LOCKED(struct ip_conntrack_ecache, ip_conntrack_ecache);
 
 /* deliver cached events and clear cache entry - must be called with locally
  * disabled softirqs */
@@ -104,20 +104,23 @@ __ip_ct_deliver_cached_events(struct ip_
 void ip_ct_deliver_cached_events(const struct ip_conntrack *ct)
 {
 	struct ip_conntrack_ecache *ecache;
-	
+	int cpu;
+
 	local_bh_disable();
-	ecache = &__get_cpu_var(ip_conntrack_ecache);
+	ecache = &get_cpu_var_locked(ip_conntrack_ecache, &cpu);
 	if (ecache->ct == ct)
 		__ip_ct_deliver_cached_events(ecache);
+	put_cpu_var_locked(ip_conntrack_ecache, cpu);
 	local_bh_enable();
 }
 
 void __ip_ct_event_cache_init(struct ip_conntrack *ct)
 {
 	struct ip_conntrack_ecache *ecache;
+	int cpu = raw_smp_processor_id();
 
 	/* take care of delivering potentially old events */
-	ecache = &__get_cpu_var(ip_conntrack_ecache);
+	ecache = &__get_cpu_var_locked(ip_conntrack_ecache, cpu);
 	BUG_ON(ecache->ct == ct);
 	if (ecache->ct)
 		__ip_ct_deliver_cached_events(ecache);
@@ -133,8 +136,11 @@ static void ip_ct_event_cache_flush(void
 	struct ip_conntrack_ecache *ecache;
 	int cpu;
 
+	/*
+	 * First get all locks, then do the flush and drop the locks.
+	 */
 	for_each_cpu(cpu) {
-		ecache = &per_cpu(ip_conntrack_ecache, cpu);
+		ecache = &__get_cpu_var_locked(ip_conntrack_ecache, cpu);
 		if (ecache->ct)
 			ip_conntrack_put(ecache->ct);
 	}
Index: linux/net/ipv4/netfilter/ip_conntrack_standalone.c
===================================================================
--- linux.orig/net/ipv4/netfilter/ip_conntrack_standalone.c
+++ linux/net/ipv4/netfilter/ip_conntrack_standalone.c
@@ -977,7 +977,7 @@ EXPORT_SYMBOL_GPL(ip_conntrack_expect_ch
 EXPORT_SYMBOL_GPL(ip_conntrack_register_notifier);
 EXPORT_SYMBOL_GPL(ip_conntrack_unregister_notifier);
 EXPORT_SYMBOL_GPL(__ip_ct_event_cache_init);
-EXPORT_PER_CPU_SYMBOL_GPL(ip_conntrack_ecache);
+EXPORT_PER_CPU_LOCKED_SYMBOL_GPL(ip_conntrack_ecache);
 #endif
 EXPORT_SYMBOL(ip_conntrack_protocol_register);
 EXPORT_SYMBOL(ip_conntrack_protocol_unregister);
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Stuff]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]     [Linux Resources]
  Powered by Linux