Socket notifications.
This patch includes socket send/recv/accept notifications.
Using trivial web server based on kevent and this features
instead of epoll it's performance increased more than noticebly.
More details about various benchmarks and server itself
(evserver_kevent.c) can be found on project's homepage.
Signed-off-by: Evgeniy Polyakov <[email protected]>
diff --git a/fs/inode.c b/fs/inode.c
index bf21dc6..82817b1 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -21,6 +21,7 @@
#include <linux/cdev.h>
#include <linux/bootmem.h>
#include <linux/inotify.h>
+#include <linux/kevent.h>
#include <linux/mount.h>
/*
@@ -164,12 +165,18 @@ static struct inode *alloc_inode(struct super_block *sb)
}
inode->i_private = NULL;
inode->i_mapping = mapping;
+#if defined CONFIG_KEVENT_SOCKET || defined CONFIG_KEVENT_PIPE
+ kevent_storage_init(inode, &inode->st);
+#endif
}
return inode;
}
void destroy_inode(struct inode *inode)
{
+#if defined CONFIG_KEVENT_SOCKET || defined CONFIG_KEVENT_PIPE
+ kevent_storage_fini(&inode->st);
+#endif
BUG_ON(inode_has_buffers(inode));
security_inode_free(inode);
if (inode->i_sb->s_op->destroy_inode)
diff --git a/include/net/sock.h b/include/net/sock.h
index 03684e7..d840399 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -49,6 +49,7 @@
#include <linux/skbuff.h> /* struct sk_buff */
#include <linux/mm.h>
#include <linux/security.h>
+#include <linux/kevent.h>
#include <linux/filter.h>
@@ -451,6 +452,21 @@ static inline int sk_stream_memory_free(struct sock *sk)
extern void sk_stream_rfree(struct sk_buff *skb);
+struct socket_alloc {
+ struct socket socket;
+ struct inode vfs_inode;
+};
+
+static inline struct socket *SOCKET_I(struct inode *inode)
+{
+ return &container_of(inode, struct socket_alloc, vfs_inode)->socket;
+}
+
+static inline struct inode *SOCK_INODE(struct socket *socket)
+{
+ return &container_of(socket, struct socket_alloc, socket)->vfs_inode;
+}
+
static inline void sk_stream_set_owner_r(struct sk_buff *skb, struct sock *sk)
{
skb->sk = sk;
@@ -478,6 +494,7 @@ static inline void sk_add_backlog(struct sock *sk, struct sk_buff *skb)
sk->sk_backlog.tail = skb;
}
skb->next = NULL;
+ kevent_socket_notify(sk, KEVENT_SOCKET_RECV);
}
#define sk_wait_event(__sk, __timeo, __condition) \
@@ -679,21 +696,6 @@ static inline struct kiocb *siocb_to_kiocb(struct sock_iocb *si)
return si->kiocb;
}
-struct socket_alloc {
- struct socket socket;
- struct inode vfs_inode;
-};
-
-static inline struct socket *SOCKET_I(struct inode *inode)
-{
- return &container_of(inode, struct socket_alloc, vfs_inode)->socket;
-}
-
-static inline struct inode *SOCK_INODE(struct socket *socket)
-{
- return &container_of(socket, struct socket_alloc, socket)->vfs_inode;
-}
-
extern void __sk_stream_mem_reclaim(struct sock *sk);
extern int sk_stream_mem_schedule(struct sock *sk, int size, int kind);
diff --git a/include/net/tcp.h b/include/net/tcp.h
index b7d8317..2763b30 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -864,6 +864,7 @@ static inline int tcp_prequeue(struct sock *sk, struct sk_buff *skb)
tp->ucopy.memory = 0;
} else if (skb_queue_len(&tp->ucopy.prequeue) == 1) {
wake_up_interruptible(sk->sk_sleep);
+ kevent_socket_notify(sk, KEVENT_SOCKET_RECV|KEVENT_SOCKET_SEND);
if (!inet_csk_ack_scheduled(sk))
inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
(3 * TCP_RTO_MIN) / 4,
diff --git a/kernel/kevent/kevent_socket.c b/kernel/kevent/kevent_socket.c
new file mode 100644
index 0000000..d1a2701
--- /dev/null
+++ b/kernel/kevent/kevent_socket.c
@@ -0,0 +1,144 @@
+/*
+ * kevent_socket.c
+ *
+ * 2006 Copyright (c) Evgeniy Polyakov <[email protected]>
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/timer.h>
+#include <linux/file.h>
+#include <linux/tcp.h>
+#include <linux/kevent.h>
+
+#include <net/sock.h>
+#include <net/request_sock.h>
+#include <net/inet_connection_sock.h>
+
+static int kevent_socket_callback(struct kevent *k)
+{
+ struct inode *inode = k->st->origin;
+ unsigned int events = SOCKET_I(inode)->ops->poll(SOCKET_I(inode)->file, SOCKET_I(inode), NULL);
+
+ if ((events & (POLLIN | POLLRDNORM)) && (k->event.event & (KEVENT_SOCKET_RECV | KEVENT_SOCKET_ACCEPT)))
+ return 1;
+ if ((events & (POLLOUT | POLLWRNORM)) && (k->event.event & KEVENT_SOCKET_SEND))
+ return 1;
+ if (events & (POLLERR | POLLHUP | POLLRDHUP | POLLREMOVE))
+ return -1;
+ return 0;
+}
+
+int kevent_socket_enqueue(struct kevent *k)
+{
+ struct inode *inode;
+ struct socket *sock;
+ int err = -EBADF;
+
+ sock = sockfd_lookup(k->event.id.raw[0], &err);
+ if (!sock)
+ goto err_out_exit;
+
+ inode = igrab(SOCK_INODE(sock));
+ if (!inode)
+ goto err_out_fput;
+
+ err = kevent_storage_enqueue(&inode->st, k);
+ if (err)
+ goto err_out_iput;
+
+ if (k->event.req_flags & KEVENT_REQ_ALWAYS_QUEUE) {
+ kevent_requeue(k);
+ err = 0;
+ } else {
+ err = k->callbacks.callback(k);
+ if (err)
+ goto err_out_dequeue;
+ }
+
+ return err;
+
+err_out_dequeue:
+ kevent_storage_dequeue(k->st, k);
+err_out_iput:
+ iput(inode);
+err_out_fput:
+ sockfd_put(sock);
+err_out_exit:
+ return err;
+}
+
+int kevent_socket_dequeue(struct kevent *k)
+{
+ struct inode *inode = k->st->origin;
+ struct socket *sock;
+
+ kevent_storage_dequeue(k->st, k);
+
+ sock = SOCKET_I(inode);
+ iput(inode);
+ sockfd_put(sock);
+
+ return 0;
+}
+
+void kevent_socket_notify(struct sock *sk, u32 event)
+{
+ if (sk->sk_socket)
+ kevent_storage_ready(&SOCK_INODE(sk->sk_socket)->st, NULL, event);
+}
+
+/*
+ * It is required for network protocols compiled as modules, like IPv6.
+ */
+EXPORT_SYMBOL_GPL(kevent_socket_notify);
+
+#ifdef CONFIG_LOCKDEP
+static struct lock_class_key kevent_sock_key;
+
+void kevent_socket_reinit(struct socket *sock)
+{
+ struct inode *inode = SOCK_INODE(sock);
+
+ lockdep_set_class(&inode->st.lock, &kevent_sock_key);
+}
+
+void kevent_sk_reinit(struct sock *sk)
+{
+ if (sk->sk_socket) {
+ struct inode *inode = SOCK_INODE(sk->sk_socket);
+
+ lockdep_set_class(&inode->st.lock, &kevent_sock_key);
+ }
+}
+#endif
+static int __init kevent_init_socket(void)
+{
+ struct kevent_callbacks sc = {
+ .callback = &kevent_socket_callback,
+ .enqueue = &kevent_socket_enqueue,
+ .dequeue = &kevent_socket_dequeue,
+ .flags = 0,
+ };
+
+ return kevent_add_callbacks(&sc, KEVENT_SOCKET);
+}
+module_init(kevent_init_socket);
diff --git a/net/core/sock.c b/net/core/sock.c
index 0ed5b4f..e687f54 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1393,6 +1393,7 @@ static void sock_def_wakeup(struct sock *sk)
if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
wake_up_interruptible_all(sk->sk_sleep);
read_unlock(&sk->sk_callback_lock);
+ kevent_socket_notify(sk, KEVENT_SOCKET_RECV|KEVENT_SOCKET_SEND);
}
static void sock_def_error_report(struct sock *sk)
@@ -1402,6 +1403,7 @@ static void sock_def_error_report(struct sock *sk)
wake_up_interruptible(sk->sk_sleep);
sk_wake_async(sk,0,POLL_ERR);
read_unlock(&sk->sk_callback_lock);
+ kevent_socket_notify(sk, KEVENT_SOCKET_RECV|KEVENT_SOCKET_SEND);
}
static void sock_def_readable(struct sock *sk, int len)
@@ -1411,6 +1413,7 @@ static void sock_def_readable(struct sock *sk, int len)
wake_up_interruptible(sk->sk_sleep);
sk_wake_async(sk,1,POLL_IN);
read_unlock(&sk->sk_callback_lock);
+ kevent_socket_notify(sk, KEVENT_SOCKET_RECV|KEVENT_SOCKET_SEND);
}
static void sock_def_write_space(struct sock *sk)
@@ -1430,6 +1433,7 @@ static void sock_def_write_space(struct sock *sk)
}
read_unlock(&sk->sk_callback_lock);
+ kevent_socket_notify(sk, KEVENT_SOCKET_SEND|KEVENT_SOCKET_RECV);
}
static void sock_def_destruct(struct sock *sk)
@@ -1480,6 +1484,8 @@ void sock_init_data(struct socket *sock, struct sock *sk)
sk->sk_state = TCP_CLOSE;
sk->sk_socket = sock;
+ kevent_sk_reinit(sk);
+
sock_set_flag(sk, SOCK_ZAPPED);
if(sock)
@@ -1546,8 +1552,10 @@ void fastcall release_sock(struct sock *sk)
if (sk->sk_backlog.tail)
__release_sock(sk);
sk->sk_lock.owner = NULL;
- if (waitqueue_active(&sk->sk_lock.wq))
+ if (waitqueue_active(&sk->sk_lock.wq)) {
wake_up(&sk->sk_lock.wq);
+ kevent_socket_notify(sk, KEVENT_SOCKET_RECV|KEVENT_SOCKET_SEND);
+ }
spin_unlock_bh(&sk->sk_lock.slock);
}
EXPORT_SYMBOL(release_sock);
diff --git a/net/core/stream.c b/net/core/stream.c
index d1d7dec..2878c2a 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -36,6 +36,7 @@ void sk_stream_write_space(struct sock *sk)
wake_up_interruptible(sk->sk_sleep);
if (sock->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN))
sock_wake_async(sock, 2, POLL_OUT);
+ kevent_socket_notify(sk, KEVENT_SOCKET_SEND|KEVENT_SOCKET_RECV);
}
}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index c701f6a..84ce4c5 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3127,6 +3127,7 @@ static void tcp_ofo_queue(struct sock *sk)
__skb_unlink(skb, &tp->out_of_order_queue);
__skb_queue_tail(&sk->sk_receive_queue, skb);
+ kevent_socket_notify(sk, KEVENT_SOCKET_RECV);
tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
if(skb->h.th->fin)
tcp_fin(skb, sk, skb->h.th);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index bf7a224..bca9f2a 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -61,6 +61,7 @@
#include <linux/jhash.h>
#include <linux/init.h>
#include <linux/times.h>
+#include <linux/kevent.h>
#include <net/icmp.h>
#include <net/inet_hashtables.h>
@@ -1391,6 +1392,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
reqsk_free(req);
} else {
inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
+ kevent_socket_notify(sk, KEVENT_SOCKET_ACCEPT);
}
return 0;
diff --git a/net/socket.c b/net/socket.c
index 4e39631..776dc2e 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -84,6 +84,7 @@
#include <linux/kmod.h>
#include <linux/audit.h>
#include <linux/wireless.h>
+#include <linux/kevent.h>
#include <asm/uaccess.h>
#include <asm/unistd.h>
@@ -496,6 +497,8 @@ static struct socket *sock_alloc(void)
inode->i_uid = current->fsuid;
inode->i_gid = current->fsgid;
+ kevent_socket_reinit(sock);
+
get_cpu_var(sockets_in_use)++;
put_cpu_var(sockets_in_use);
return sock;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 2f208c7..835e20f 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1563,8 +1563,10 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
struct scm_cookie tmp_scm;
struct sock *sk = sock->sk;
struct unix_sock *u = unix_sk(sk);
+ struct sock *other;
int noblock = flags & MSG_DONTWAIT;
struct sk_buff *skb;
+
int err;
err = -EOPNOTSUPP;
@@ -1580,6 +1582,12 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
goto out_unlock;
wake_up_interruptible(&u->peer_wait);
+ other =unix_peer_get(sk);
+ if (other) {
+ kevent_socket_notify(other, KEVENT_SOCKET_SEND);
+ sock_put(other);
+ } else
+ kevent_socket_notify(sk, KEVENT_SOCKET_RECV);
if (msg->msg_name)
unix_copy_addr(msg, skb->sk);
@@ -1674,7 +1682,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
{
struct sock_iocb *siocb = kiocb_to_siocb(iocb);
struct scm_cookie tmp_scm;
- struct sock *sk = sock->sk;
+ struct sock *sk = sock->sk, *other;
struct unix_sock *u = unix_sk(sk);
struct sockaddr_un *sunaddr=msg->msg_name;
int copied = 0;
@@ -1803,6 +1811,14 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
}
} while (size);
+ other =unix_peer_get(sk);
+ if (other) {
+ kevent_socket_notify(other, KEVENT_SOCKET_SEND);
+ sock_put(other);
+ }
+ if (sk->sk_shutdown & RCV_SHUTDOWN || !other)
+ kevent_socket_notify(sk, KEVENT_SOCKET_RECV);
+
mutex_unlock(&u->readlock);
scm_recv(sock, msg, siocb->scm, flags);
out:
@@ -1824,6 +1840,10 @@ static int unix_shutdown(struct socket *sock, int mode)
sock_hold(other);
unix_state_wunlock(sk);
sk->sk_state_change(sk);
+ kevent_socket_notify(sk, KEVENT_SOCKET_SEND|KEVENT_SOCKET_RECV);
+ if (other)
+ kevent_socket_notify(other, KEVENT_SOCKET_SEND|KEVENT_SOCKET_RECV);
+ kevent_socket_notify(sk, KEVENT_SOCKET_SEND|KEVENT_SOCKET_RECV);
if (other &&
(sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
[Index of Archives]
[Kernel Newbies]
[Netfilter]
[Bugtraq]
[Photo]
[Stuff]
[Gimp]
[Yosemite News]
[MIPS Linux]
[ARM Linux]
[Linux Security]
[Linux RAID]
[Video 4 Linux]
[Linux for the blind]
[Linux Resources]