* Rusty Russell <[email protected]> wrote:
> > - local_bh_disable();
> > + read_lock_bh(&ip_conntrack_lock);
> This kind of change is troubling. I suppose we could go to per-cpu
> locks, but it's still a loss.
yeah, that's the right solution i think - with a small twist that should
address your remaining concern: in the -rt tree there is already an API
variant that gives us per-cpu locks on PREEMPT_RT but which maps to a
plain per-cpu data structure on non-PREEMPT_RT builds. The idea is to
pick a CPU once, and to stick to that choice during the critical
section. This gives us nice per-CPU scalability, keeps the locks
finegrained so that are no unnecessary interactions between critical
sections - and keeps things fully preemptible at a small cost.
the patch below shows the API usage - it's pretty straightforward, and
this has solved the bug too. Conversion is easy: all places that needed
changing were triggered at build-time, and once converted the first
attempt booted and worked just fine.
I dont yet want to submit this approach upstream because right now only
PREEMPT_RT needs it, but what do you think about the approach? It could
be used by the upstream kernel too, to enable the use of PER_CPU data
structures in cases where preemption can not always be avoided.
Ingo
Index: linux/include/linux/netfilter_ipv4/ip_conntrack.h
===================================================================
--- linux.orig/include/linux/netfilter_ipv4/ip_conntrack.h
+++ linux/include/linux/netfilter_ipv4/ip_conntrack.h
@@ -460,10 +460,8 @@ struct ip_conntrack_ecache {
struct ip_conntrack *ct;
unsigned int events;
};
-DECLARE_PER_CPU(struct ip_conntrack_ecache, ip_conntrack_ecache);
+DECLARE_PER_CPU_LOCKED(struct ip_conntrack_ecache, ip_conntrack_ecache);
-#define CONNTRACK_ECACHE(x) (__get_cpu_var(ip_conntrack_ecache).x)
-
extern struct notifier_block *ip_conntrack_chain;
extern struct notifier_block *ip_conntrack_expect_chain;
@@ -498,12 +496,14 @@ ip_conntrack_event_cache(enum ip_conntra
{
struct ip_conntrack *ct = (struct ip_conntrack *)skb->nfct;
struct ip_conntrack_ecache *ecache;
-
+ int cpu;
+
local_bh_disable();
- ecache = &__get_cpu_var(ip_conntrack_ecache);
+ ecache = &get_cpu_var_locked(ip_conntrack_ecache, &cpu);
if (ct != ecache->ct)
__ip_ct_event_cache_init(ct);
ecache->events |= event;
+ put_cpu_var_locked(ip_conntrack_ecache, cpu);
local_bh_enable();
}
Index: linux/net/ipv4/netfilter/ip_conntrack_core.c
===================================================================
--- linux.orig/net/ipv4/netfilter/ip_conntrack_core.c
+++ linux/net/ipv4/netfilter/ip_conntrack_core.c
@@ -83,7 +83,7 @@ static unsigned int ip_conntrack_expect_
struct notifier_block *ip_conntrack_chain;
struct notifier_block *ip_conntrack_expect_chain;
-DEFINE_PER_CPU(struct ip_conntrack_ecache, ip_conntrack_ecache);
+DEFINE_PER_CPU_LOCKED(struct ip_conntrack_ecache, ip_conntrack_ecache);
/* deliver cached events and clear cache entry - must be called with locally
* disabled softirqs */
@@ -104,20 +104,23 @@ __ip_ct_deliver_cached_events(struct ip_
void ip_ct_deliver_cached_events(const struct ip_conntrack *ct)
{
struct ip_conntrack_ecache *ecache;
-
+ int cpu;
+
local_bh_disable();
- ecache = &__get_cpu_var(ip_conntrack_ecache);
+ ecache = &get_cpu_var_locked(ip_conntrack_ecache, &cpu);
if (ecache->ct == ct)
__ip_ct_deliver_cached_events(ecache);
+ put_cpu_var_locked(ip_conntrack_ecache, cpu);
local_bh_enable();
}
void __ip_ct_event_cache_init(struct ip_conntrack *ct)
{
struct ip_conntrack_ecache *ecache;
+ int cpu = raw_smp_processor_id();
/* take care of delivering potentially old events */
- ecache = &__get_cpu_var(ip_conntrack_ecache);
+ ecache = &__get_cpu_var_locked(ip_conntrack_ecache, cpu);
BUG_ON(ecache->ct == ct);
if (ecache->ct)
__ip_ct_deliver_cached_events(ecache);
@@ -133,8 +136,11 @@ static void ip_ct_event_cache_flush(void
struct ip_conntrack_ecache *ecache;
int cpu;
+ /*
+ * First get all locks, then do the flush and drop the locks.
+ */
for_each_cpu(cpu) {
- ecache = &per_cpu(ip_conntrack_ecache, cpu);
+ ecache = &__get_cpu_var_locked(ip_conntrack_ecache, cpu);
if (ecache->ct)
ip_conntrack_put(ecache->ct);
}
Index: linux/net/ipv4/netfilter/ip_conntrack_standalone.c
===================================================================
--- linux.orig/net/ipv4/netfilter/ip_conntrack_standalone.c
+++ linux/net/ipv4/netfilter/ip_conntrack_standalone.c
@@ -977,7 +977,7 @@ EXPORT_SYMBOL_GPL(ip_conntrack_expect_ch
EXPORT_SYMBOL_GPL(ip_conntrack_register_notifier);
EXPORT_SYMBOL_GPL(ip_conntrack_unregister_notifier);
EXPORT_SYMBOL_GPL(__ip_ct_event_cache_init);
-EXPORT_PER_CPU_SYMBOL_GPL(ip_conntrack_ecache);
+EXPORT_PER_CPU_LOCKED_SYMBOL_GPL(ip_conntrack_ecache);
#endif
EXPORT_SYMBOL(ip_conntrack_protocol_register);
EXPORT_SYMBOL(ip_conntrack_protocol_unregister);
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
[Index of Archives]
[Kernel Newbies]
[Netfilter]
[Bugtraq]
[Photo]
[Stuff]
[Gimp]
[Yosemite News]
[MIPS Linux]
[ARM Linux]
[Linux Security]
[Linux RAID]
[Video 4 Linux]
[Linux for the blind]
[Linux Resources]