Re: [PATCH] CONFIG_PACKET_MMAP should depend on MMU

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Aubrey Li <[email protected]> wrote:

> Here, in the attachment I wrote a small test app. Please correct if
> there is anything wrong, and feel free to improve it.

Okay... I have that working... probably.  I don't know what output it's
supposed to produce, but I see this:

	# /packet-mmap/sample_packet_mmap
	00-00-00-01-00-00-00-8a-00-00-00-8a-00-42-00-50-
	38-43-13-a0-00-07-ff-3c-00-00-00-00-00-00-00-00-
	00-11-08-00-00-00-00-01-00-01-00-06-00-d0-b7-de-
	32-7b-00-00-00-00-00-00-00-00-00-00-00-00-00-00-
	00-00-00-90-cc-a2-75-6b-00-d0-b7-de-32-7b-08-00-
	45-00-00-7c-00-00-40-00-40-11-b4-13-c0-a8-02-80-
	c0-a8-02-8d-08-01-03-20-00-68-8e-65-7f-5b-7e-03-
	00-00-00-01-00-00-00-00-00-00-00-00-00-00-00-00-
	00-00-00-00-00-00-00-00-00-00-00-01-00-00-81-a4-
	00-00-00-01-00-00-00-00-00-00-00-00-00-1d-b8-86-
	00-00-10-00-ff-ff-ff-ff-00-00-0e-f0-00-00-09-02-
	01-cb-03-16-46-26-38-0d-00-00-00-00-46-26-38-1e-
	00-00-00-00-46-26-38-1e-00-00-00-00-00-00-00-00-
	00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00- [repeated]

Does that look reasonable?

I've attached the preliminary patch.  Note four things about it:

 (1) I've had to add the get_unmapped_area() op to the proto_ops struct, but
     I've only done it for CONFIG_MMU=n as making it available for CONFIG_MMU=y
     could cause problems.

 (2) There's a race between packet_get_unmapped_area() being called and
     packet_mmap() being called.

 (3) I've added an extra check into packet_set_ring() to make sure the caller
     isn't asking for a combination of buffer size and count that will exceed
     ULONG_MAX.  This protects a multiply done elsewhere.

 (4) The entire data buffer is allocated as one contiguous lump in NOMMU-mode.

David

---
[PATCH] NOMMU: Support mmap() on AF_PACKET sockets

From: David Howells <[email protected]>

Support mmap() on AF_PACKET sockets in NOMMU-mode kernels.

Signed-Off-By: David Howells <[email protected]>
---

 include/linux/net.h    |    7 +++
 include/net/sock.h     |    8 +++
 net/core/sock.c        |   10 ++++
 net/packet/af_packet.c |  118 ++++++++++++++++++++++++++++++++++++++++++++++++
 net/socket.c           |   77 +++++++++++++++++++++++++++++++
 5 files changed, 219 insertions(+), 1 deletions(-)

diff --git a/include/linux/net.h b/include/linux/net.h
index 4db21e6..9e77cf6 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -161,6 +161,11 @@ struct proto_ops {
 	int		(*recvmsg)   (struct kiocb *iocb, struct socket *sock,
 				      struct msghdr *m, size_t total_len,
 				      int flags);
+#ifndef CONFIG_MMU
+	unsigned long	(*get_unmapped_area)(struct file *file, struct socket *sock,
+					     unsigned long addr, unsigned long len,
+					     unsigned long pgoff, unsigned long flags);
+#endif
 	int		(*mmap)	     (struct file *file, struct socket *sock,
 				      struct vm_area_struct * vma);
 	ssize_t		(*sendpage)  (struct socket *sock, struct page *page,
@@ -191,6 +196,8 @@ extern int   	     sock_sendmsg(struct socket *sock, struct msghdr *msg,
 extern int	     sock_recvmsg(struct socket *sock, struct msghdr *msg,
 				  size_t size, int flags);
 extern int 	     sock_map_fd(struct socket *sock);
+extern void	     sock_make_mappable(struct socket *sock,
+					unsigned long prot);
 extern struct socket *sockfd_lookup(int fd, int *err);
 #define		     sockfd_put(sock) fput(sock->file)
 extern int	     net_ratelimit(void);
diff --git a/include/net/sock.h b/include/net/sock.h
index 2c7d60c..d91edea 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -841,6 +841,14 @@ extern int                      sock_no_sendmsg(struct kiocb *, struct socket *,
 						struct msghdr *, size_t);
 extern int                      sock_no_recvmsg(struct kiocb *, struct socket *,
 						struct msghdr *, size_t, int);
+#ifndef CONFIG_MMU
+extern unsigned long		sock_no_get_unmapped_area(struct file *,
+							  struct socket *,
+							  unsigned long,
+							  unsigned long,
+							  unsigned long,
+							  unsigned long);
+#endif
 extern int			sock_no_mmap(struct file *file,
 					     struct socket *sock,
 					     struct vm_area_struct *vma);
diff --git a/net/core/sock.c b/net/core/sock.c
index 27c4f62..b288799 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1364,6 +1364,15 @@ int sock_no_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
 	return -EOPNOTSUPP;
 }
 
+#ifndef CONFIG_MMU
+unsigned long sock_no_get_unmapped_area(struct file *file, struct socket *sock,
+					unsigned long addr, unsigned long len,
+					unsigned long pgoff, unsigned long flags)
+{
+	return (unsigned long) -ENODEV;
+}
+#endif
+
 int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
 {
 	/* Mirror missing mmap method error code */
@@ -1943,6 +1952,7 @@ EXPORT_SYMBOL(sock_no_getname);
 EXPORT_SYMBOL(sock_no_getsockopt);
 EXPORT_SYMBOL(sock_no_ioctl);
 EXPORT_SYMBOL(sock_no_listen);
+EXPORT_SYMBOL(sock_no_get_unmapped_area);
 EXPORT_SYMBOL(sock_no_mmap);
 EXPORT_SYMBOL(sock_no_poll);
 EXPORT_SYMBOL(sock_no_recvmsg);
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 28d47e8..12d970a 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -78,6 +78,7 @@
 #include <linux/poll.h>
 #include <linux/module.h>
 #include <linux/init.h>
+#include <asm/mman.h>
 
 #ifdef CONFIG_INET
 #include <net/inet_common.h>
@@ -1023,6 +1024,7 @@ static int packet_create(struct socket *sock, int protocol)
 		sock->ops = &packet_ops_spkt;
 #endif
 	sock_init_data(sock, sk);
+	sock_make_mappable(sock, PROT_READ | PROT_WRITE);
 
 	po = pkt_sk(sk);
 	sk->sk_family = PF_PACKET;
@@ -1629,6 +1631,7 @@ static inline struct page *pg_vec_endpage(char *one_pg_vec, unsigned int order)
 	return virt_to_page(one_pg_vec + (PAGE_SIZE << order) - 1);
 }
 
+#ifdef CONFIG_MMU
 static void free_pg_vec(char **pg_vec, unsigned int order, unsigned int len)
 {
 	int i;
@@ -1671,6 +1674,45 @@ out_free_pgvec:
 	goto out;
 }
 
+#else
+/*
+ * free the buffer pointer block and all the buffers for NOMMU
+ */
+static void free_pg_vec(char **pg_vec, unsigned int order, unsigned int len)
+{
+	kfree(pg_vec[0]);
+	kfree(pg_vec);
+}
+
+/*
+ * allocate the buffer pointer block and all the buffer blocks for the mappable
+ * AF_PACKET buffers for NOMMU mode
+ * - in NOMMU mode the buffers must be contiguous with each other
+ */
+static char **alloc_pg_vec(struct tpacket_req *req, int order)
+{
+	char *buffers;
+	char **ptrblock;
+	int loop;
+
+	buffers = kcalloc(req->tp_block_size, req->tp_block_nr, GFP_KERNEL);
+	if (!buffers)
+		return ERR_PTR(-ENOMEM);
+	ptrblock = kmalloc(sizeof(char *) * req->tp_block_nr, GFP_KERNEL);
+	if (!ptrblock) {
+		kfree(buffers);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	for (loop = 0; loop < req->tp_block_nr; loop++) {
+		ptrblock[loop] = buffers;
+		buffers += req->tp_block_size;
+	}
+
+	return ptrblock;
+}
+#endif
+
 static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing)
 {
 	char **pg_vec = NULL;
@@ -1695,6 +1737,8 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing
 			return -EINVAL;
 		if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
 			return -EINVAL;
+		if (unlikely(ULONG_MAX / req->tp_block_size < req->tp_block_nr))
+			return -ENOMEM;
 
 		po->frames_per_block = req->tp_block_size/req->tp_frame_size;
 		if (unlikely(po->frames_per_block <= 0))
@@ -1783,6 +1827,7 @@ out:
 	return err;
 }
 
+#ifdef CONFIG_MMU
 static int packet_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
 {
 	struct sock *sk = sock->sk;
@@ -1823,7 +1868,72 @@ out:
 	release_sock(sk);
 	return err;
 }
-#endif
+
+#else
+/*
+ * find out where the socket buffers are so that NOMMU mmap can return the
+ * address directly
+ */
+static unsigned long packet_get_unmapped_area(struct file *file,
+					      struct socket *sock,
+					      unsigned long addr,
+					      unsigned long len,
+					      unsigned long pgoff,
+					      unsigned long flags)
+{
+	struct sock *sk = sock->sk;
+	struct packet_sock *po = pkt_sk(sk);
+
+	/* check that they want to map the whole buffer */
+	if (pgoff)
+		return (unsigned long) -EINVAL;
+
+	lock_sock(sk);
+	if (!po->pg_vec ||
+	    len != po->pg_vec_len * po->pg_vec_pages * PAGE_SIZE)
+		addr = (unsigned long) -EINVAL;
+	else
+		addr = (unsigned long) po->pg_vec[0];
+	release_sock(sk);
+	return addr;
+}
+
+/*
+ * set the mapping of a packet socket buffer for NOMMU mmap
+ */
+static int packet_mmap(struct file *file, struct socket *sock,
+		       struct vm_area_struct *vma)
+{
+	struct sock *sk = sock->sk;
+	struct packet_sock *po = pkt_sk(sk);
+	unsigned long len;
+	int ret;
+
+	if (vma->vm_pgoff)
+		return -EINVAL;
+
+	lock_sock(sk);
+
+	/* check that they want to map the whole buffer */
+	len = vma->vm_end - vma->vm_start;
+	if (!po->pg_vec ||
+	    len != po->pg_vec_len * po->pg_vec_pages * PAGE_SIZE) {
+		ret = -EINVAL;
+	} else if (vma->vm_start != (unsigned long) po->pg_vec[0]) {
+		/* someone found the gap between get_unmapped_area and mmap */
+		ret = -ESTALE;
+	} else {
+		atomic_inc(&po->mapped);
+		vma->vm_ops = &packet_mmap_ops;
+		ret = 0;
+	}
+
+	release_sock(sk);
+	return ret;
+}
+
+#endif /* CONFIG_MMU */
+#endif /* CONFIG_PACKET_MMAP */
 
 
 #ifdef CONFIG_SOCK_PACKET
@@ -1844,6 +1954,9 @@ static const struct proto_ops packet_ops_spkt = {
 	.getsockopt =	sock_no_getsockopt,
 	.sendmsg =	packet_sendmsg_spkt,
 	.recvmsg =	packet_recvmsg,
+#ifndef CONFIG_MMU
+	.get_unmapped_area = sock_no_get_unmapped_area,
+#endif
 	.mmap =		sock_no_mmap,
 	.sendpage =	sock_no_sendpage,
 };
@@ -1866,6 +1979,9 @@ static const struct proto_ops packet_ops = {
 	.getsockopt =	packet_getsockopt,
 	.sendmsg =	packet_sendmsg,
 	.recvmsg =	packet_recvmsg,
+#ifndef CONFIG_MMU
+	.get_unmapped_area = packet_get_unmapped_area,
+#endif
 	.mmap =		packet_mmap,
 	.sendpage =	sock_no_sendpage,
 };
diff --git a/net/socket.c b/net/socket.c
index ea8f81a..4109db6 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -87,6 +87,7 @@
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
+#include <asm/mman.h>
 
 #include <net/compat.h>
 
@@ -98,6 +99,13 @@ static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
 			 unsigned long nr_segs, loff_t pos);
 static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
 			  unsigned long nr_segs, loff_t pos);
+#ifndef CONFIG_MMU
+static unsigned long sock_get_unmapped_area(struct file *file,
+					    unsigned long addr,
+					    unsigned long len,
+					    unsigned long pgoff,
+					    unsigned long flags);
+#endif
 static int sock_mmap(struct file *file, struct vm_area_struct *vma);
 
 static int sock_close(struct inode *inode, struct file *file);
@@ -127,6 +135,9 @@ static const struct file_operations socket_file_ops = {
 #ifdef CONFIG_COMPAT
 	.compat_ioctl = compat_sock_ioctl,
 #endif
+#ifndef CONFIG_MMU
+	.get_unmapped_area = sock_get_unmapped_area,
+#endif
 	.mmap =		sock_mmap,
 	.open =		sock_no_open,	/* special open code to disallow open via /proc */
 	.release =	sock_close,
@@ -136,6 +147,26 @@ static const struct file_operations socket_file_ops = {
 };
 
 /*
+ * capabilities for non-mappable sockets
+ * - does not permit mmap at all
+ * - no readahead or I/O queue unplugging required
+ */
+struct backing_dev_info non_mappable_socket_bdi = {
+	.capabilities	= 0,
+};
+
+/*
+ * capabilities for read/write mappable sockets
+ * - permits shared-mmap for read and write only
+ * - does not permit private mmap
+ * - no readahead or I/O queue unplugging required
+ */
+struct backing_dev_info rw_mappable_socket_bdi = {
+	.capabilities	= (BDI_CAP_MAP_DIRECT | BDI_CAP_READ_MAP |
+			   BDI_CAP_WRITE_MAP),
+};
+
+/*
  *	The protocol list. Each protocol is registered in here.
  */
 
@@ -415,6 +446,34 @@ static struct socket *sock_from_file(struct file *file, int *err)
 }
 
 /**
+ *	sock_make_mappable -	Change the mmappability of a socket
+ *	@sock: The socket to alter
+ *	@prot: The mmap protection to permit
+ *
+ *	The nominated socket has its backing device information selection
+ *	changed to indicate to mmap() what sort of mappings are available on
+ *	this socket.
+ */
+void sock_make_mappable(struct socket *sock, unsigned long prot)
+{
+	struct address_space *mapping = &SOCK_INODE(sock)->i_data;
+
+	switch (prot) {
+	case 0:
+		mapping->backing_dev_info = &non_mappable_socket_bdi;
+		break;
+	case PROT_READ | PROT_WRITE:
+		mapping->backing_dev_info = &rw_mappable_socket_bdi;
+		break;
+	case PROT_READ:
+	case PROT_WRITE:
+	default:
+		BUG();
+		break;
+	}
+}
+
+/**
  *	sockfd_lookup	- 	Go from a file number to its socket slot
  *	@fd: file handle
  *	@err: pointer to an error code return
@@ -482,6 +541,7 @@ static struct socket *sock_alloc(void)
 	inode->i_mode = S_IFSOCK | S_IRWXUGO;
 	inode->i_uid = current->fsuid;
 	inode->i_gid = current->fsgid;
+	inode->i_data.backing_dev_info = &non_mappable_socket_bdi;
 
 	get_cpu_var(sockets_in_use)++;
 	put_cpu_var(sockets_in_use);
@@ -918,6 +978,22 @@ static unsigned int sock_poll(struct file *file, poll_table *wait)
 	return sock->ops->poll(file, sock, wait);
 }
 
+#ifndef CONFIG_MMU
+static unsigned long sock_get_unmapped_area(struct file *file,
+					    unsigned long addr,
+					    unsigned long len,
+					    unsigned long pgoff,
+					    unsigned long flags)
+{
+	struct socket *sock = file->private_data;
+
+	if (!sock->ops->get_unmapped_area)
+		return -ENOSYS;
+	return sock->ops->get_unmapped_area(file, sock,
+					    addr, len, pgoff, flags);
+}
+#endif
+
 static int sock_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	struct socket *sock = file->private_data;
@@ -2288,6 +2364,7 @@ EXPORT_SYMBOL(sock_create);
 EXPORT_SYMBOL(sock_create_kern);
 EXPORT_SYMBOL(sock_create_lite);
 EXPORT_SYMBOL(sock_map_fd);
+EXPORT_SYMBOL(sock_make_mappable);
 EXPORT_SYMBOL(sock_recvmsg);
 EXPORT_SYMBOL(sock_register);
 EXPORT_SYMBOL(sock_release);
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Stuff]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]     [Linux Resources]
  Powered by Linux