Re: 2.6.14-rt22 (and mainline): netstat -anop triggers excessive latencies

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, 27 Dec 2005, Lee Revell wrote:

> On Tue, 2005-12-27 at 21:11 -0500, Steven Rostedt wrote:
> > On Tue, 27 Dec 2005, Lee Revell wrote:
> >
> > > > [snip]
> > > >
> > > > So it really does improve the latency here.  Now is this worth the
> > > > overhead?  This might be useful in other places to.
> > >
> > > Any chance you can regenerate the patch against 2.6.15-rc5-rt4?
> > >
> >
> > Sure, if I can find the damn thing.  Too many kernels, and too many patch
> > directories.
>
> Never mind, I applied it by hand.  I'll let you know how it works.
>

OK, I did find it though, and it only had one rej. So you probably can
easily do that change yourself.

Aw heck, here it is anyway. (look everybody, a patch pulled in with
pine!).  Complements of quilt.


-- Steve

Index: linux-2.6.15-rc5-rt4/include/net/inet_hashtables.h
===================================================================
--- linux-2.6.15-rc5-rt4.orig/include/net/inet_hashtables.h	2005-12-14 14:37:00.000000000 -0500
+++ linux-2.6.15-rc5-rt4/include/net/inet_hashtables.h	2005-12-27 21:12:57.000000000 -0500
@@ -101,6 +101,7 @@
 	 * is for TIME_WAIT sockets only.
 	 */
 	struct inet_ehash_bucket	*ehash;
+	unsigned long			*ebitmask;

 	/* Ok, let's try this, I give up, we do need a local binding
 	 * TCP hash as well as the others for fast bind/connect.
@@ -155,6 +156,13 @@
 	return &hashinfo->ehash[hash & (hashinfo->ehash_size - 1)];
 }

+static inline unsigned int inet_ehash_index(
+	struct inet_hashinfo *hashinfo,
+	unsigned int hash)
+{
+	return hash & (hashinfo->ehash_size - 1);
+}
+
 extern struct inet_bind_bucket *
 		    inet_bind_bucket_create(kmem_cache_t *cachep,
 					    struct inet_bind_hashbucket *head,
@@ -227,11 +235,25 @@
 		wake_up(&hashinfo->lhash_wait);
 }

+static inline void __inet_hash_setbit(unsigned long *bitmask, unsigned int index)
+{
+	if (bitmask)
+		set_bit(index, bitmask);
+}
+
+static inline void __inet_hash_clearbit(unsigned long *bitmask, unsigned int index)
+{
+	if (bitmask)
+		clear_bit(index, bitmask);
+}
+
 static inline void __inet_hash(struct inet_hashinfo *hashinfo,
 			       struct sock *sk, const int listen_possible)
 {
 	struct hlist_head *list;
 	rwlock_t *lock;
+	unsigned long *bitmask = NULL;
+	unsigned int index = 0;

 	BUG_TRAP(sk_unhashed(sk));
 	if (listen_possible && sk->sk_state == TCP_LISTEN) {
@@ -241,12 +263,15 @@
 	} else {
 		struct inet_ehash_bucket *head;
 		sk->sk_hash = inet_sk_ehashfn(sk);
+		index = inet_ehash_index(hashinfo, sk->sk_hash);
 		head = inet_ehash_bucket(hashinfo, sk->sk_hash);
 		list = &head->chain;
 		lock = &head->lock;
+		bitmask = hashinfo->ebitmask;
 		write_lock(lock);
 	}
 	__sk_add_node(sk, list);
+	__inet_hash_setbit(bitmask, index);
 	sock_prot_inc_use(sk->sk_prot);
 	write_unlock(lock);
 	if (listen_possible && sk->sk_state == TCP_LISTEN)
@@ -265,6 +290,8 @@
 static inline void inet_unhash(struct inet_hashinfo *hashinfo, struct sock *sk)
 {
 	rwlock_t *lock;
+	unsigned long *bitmask = NULL;
+	unsigned int index = 0;

 	if (sk_unhashed(sk))
 		goto out;
@@ -274,12 +301,16 @@
 		inet_listen_wlock(hashinfo);
 		lock = &hashinfo->lhash_lock;
 	} else {
+		index = inet_ehash_index(hashinfo, sk->sk_hash);
 		lock = &inet_ehash_bucket(hashinfo, sk->sk_hash)->lock;
+		bitmask = hashinfo->ebitmask;
 		write_lock_bh(lock);
 	}

-	if (__sk_del_node_init(sk))
+	if (__sk_del_node_init(sk)) {
+		__inet_hash_clearbit(bitmask, index);
 		sock_prot_dec_use(sk->sk_prot);
+	}
 	write_unlock_bh(lock);
 out:
 	if (sk->sk_state == TCP_LISTEN)
Index: linux-2.6.15-rc5-rt4/mm/page_alloc.c
===================================================================
--- linux-2.6.15-rc5-rt4.orig/mm/page_alloc.c	2005-12-20 18:18:14.000000000 -0500
+++ linux-2.6.15-rc5-rt4/mm/page_alloc.c	2005-12-27 21:12:57.000000000 -0500
@@ -2664,3 +2664,30 @@

 	return table;
 }
+
+void *__init alloc_large_system_bitmask(char *bitmaskname,
+					unsigned long bits, int flags)
+{
+	unsigned long words = bits / (sizeof(unsigned long)*8);
+	unsigned long size = words * sizeof(unsigned long);
+	unsigned long *bitmask = NULL;
+
+	if (flags & HASH_EARLY)
+		bitmask = alloc_bootmem(size);
+	else if (hashdist)
+		bitmask = __vmalloc(size, GFP_ATOMIC, PAGE_KERNEL);
+	else {
+		bitmask = kmalloc(size, GFP_ATOMIC);
+		if (!bitmask) {
+			unsigned long order;
+			for (order = 0; ((1UL << order) << PAGE_SHIFT) < size; order++)
+				;
+			bitmask = (void*) __get_free_pages(GFP_ATOMIC, order);
+		}
+	}
+
+	if (!bitmask)
+		panic("Failed to allocate %s bitmask\n", bitmaskname);
+
+	return bitmask;
+}
Index: linux-2.6.15-rc5-rt4/net/ipv4/tcp.c
===================================================================
--- linux-2.6.15-rc5-rt4.orig/net/ipv4/tcp.c	2005-12-14 14:37:00.000000000 -0500
+++ linux-2.6.15-rc5-rt4/net/ipv4/tcp.c	2005-12-27 21:14:16.000000000 -0500
@@ -2039,6 +2039,8 @@
 }
 __setup("thash_entries=", set_thash_entries);

+void *__init alloc_large_system_bitmask(char *bitmaskname,
+					unsigned long bits, int flags);
 void __init tcp_init(void)
 {
 	struct sk_buff *skb = NULL;
@@ -2071,6 +2073,10 @@
 					NULL,
 					0);
 	tcp_hashinfo.ehash_size = (1 << tcp_hashinfo.ehash_size) >> 1;
+	tcp_hashinfo.ebitmask =
+		alloc_large_system_bitmask("TCP established",
+					  tcp_hashinfo.ehash_size,
+					  HASH_HIGHMEM);
 	for (i = 0; i < (tcp_hashinfo.ehash_size << 1); i++) {
 		rwlock_init(&tcp_hashinfo.ehash[i].lock);
 		INIT_HLIST_HEAD(&tcp_hashinfo.ehash[i].chain);
Index: linux-2.6.15-rc5-rt4/net/ipv4/tcp_ipv4.c
===================================================================
--- linux-2.6.15-rc5-rt4.orig/net/ipv4/tcp_ipv4.c	2005-12-20 18:18:14.000000000 -0500
+++ linux-2.6.15-rc5-rt4/net/ipv4/tcp_ipv4.c	2005-12-27 21:12:57.000000000 -0500
@@ -1581,7 +1581,12 @@
 	struct tcp_iter_state* st = seq->private;
 	void *rc = NULL;

-	for (st->bucket = 0; st->bucket < tcp_hashinfo.ehash_size; ++st->bucket) {
+	for (st->bucket = find_first_bit(tcp_hashinfo.ebitmask,
+					 tcp_hashinfo.ehash_size);
+	     st->bucket < tcp_hashinfo.ehash_size;
+	     st->bucket = find_next_bit(tcp_hashinfo.ebitmask,
+					tcp_hashinfo.ehash_size,
+					st->bucket+1)) {
 		struct sock *sk;
 		struct hlist_node *node;
 		struct inet_timewait_sock *tw;
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Stuff]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]     [Linux Resources]
  Powered by Linux