Matt Helsley wrote:Thanks for Matt's careful review. I'll follow your advices to modify it and new version will be released soon.
On Wed, 2006-03-22 at 22:58 +0800, Yi Yang wrote:This patch implements a new connector, Filesystem Event Connector, the user can monitor filesystem activities via it, currently, it can monitor access, attribute change, open, create, modify, delete, move and close of any file or directory. Every filesystem event will include tgid, uid and gid of the processwhich triggered this event, process name, file or directory name operated by it.Filesystem events connector is never a duplicate of inotify, inotify just concerns change on file or directory, Beagle uses it to watch file changes in order to regenerate index for it, inotify can't tell us who did that change and what is its process name, but filesystem events connector can do these, moreover inotify's overhead is greaterthan filesystem events connector, inotify needs compare inode with watched file or directories list to decide whether it should generate an inotify_event, some locks also increase overhead, filesystem event connector hasn't these overhead, it just generates a fsevent and send.To be important, filesystem event connector doesn't add any new system call, the user space application can make use of it by netlink socket, but inotify added several system calls, many events mechanism in kernel have used netlink as communication way with user space, for example, KOBJECT_UEVENT, PROC_EVENTS, to use netlink will make it more possible to unify events interface to netlink, the user space application can use it very easy.Signed-off-by: Yi Yang <[email protected]> --- a/include/linux/connector.h.orig 2006-03-15 23:21:37.000000000 +0800 +++ b/include/linux/connector.h 2006-03-15 23:23:09.000000000 +0800 @@ -34,6 +34,8 @@ #define CN_VAL_PROC 0x1 #define CN_IDX_CIFS 0x2 #define CN_VAL_CIFS 0x1 +#define CN_IDX_FS 0x3 +#define CN_VAL_FS 0x1#define CN_NETLINK_USERS 1 --- a/include/linux/fsnotify.h.orig 2006-01-03 11:21:10.000000000 +0800+++ b/include/linux/fsnotify.h 2006-03-22 21:48:24.000000000 +0800 @@ -15,6 +15,7 @@#include <linux/dnotify.h>#include <linux/inotify.h> +#include <linux/fsevent.h>/** fsnotify_move - file old_name at old_dir was moved to new_name at new_dir@@ -45,6 +46,8 @@ static inline void fsnotify_move(struct if (source) {inotify_inode_queue_event(source, IN_MOVE_SELF, 0, NULL); } + raise_fsevent_move(old_dir, old_name, new_dir, new_name, + FSEVENT_MOVE | (isdir?FSEVENT_ISDIR:0)); }/*@@ -56,6 +59,8 @@ static inline void fsnotify_nameremove(s isdir = IN_ISDIR; dnotify_parent(dentry, DN_DELETE); inotify_dentry_parent_queue_event(dentry, IN_DELETE|isdir, 0, dentry->d_name.name); + raise_fsevent(dentry, + FSEVENT_DELETE | (isdir?FSEVENT_ISDIR:0)); }/*@@ -74,6 +79,7 @@ static inline void fsnotify_create(struc { inode_dir_notify(inode, DN_CREATE); inotify_inode_queue_event(inode, IN_CREATE, 0, name); + raise_fsevent_create(inode, name, FSEVENT_CREATE); }/*@@ -83,6 +89,8 @@ static inline void fsnotify_mkdir(struct { inode_dir_notify(inode, DN_CREATE); inotify_inode_queue_event(inode, IN_CREATE | IN_ISDIR, 0, name); + raise_fsevent_create(inode, name, + FSEVENT_CREATE | FSEVENT_ISDIR); }/*@@ -99,6 +107,8 @@ static inline void fsnotify_access(struc dnotify_parent(dentry, DN_ACCESS); inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name); inotify_inode_queue_event(inode, mask, 0, NULL); + raise_fsevent(dentry, FSEVENT_ACCESS | + ((S_ISDIR(inode->i_mode))?FSEVENT_ISDIR:0)); }/*@@ -115,6 +125,8 @@ static inline void fsnotify_modify(struc dnotify_parent(dentry, DN_MODIFY); inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name); inotify_inode_queue_event(inode, mask, 0, NULL); + raise_fsevent(dentry, FSEVENT_MODIFY | + ((S_ISDIR(inode->i_mode))?FSEVENT_ISDIR:0)); }/* @@ -130,6 +142,9 @@ static inline void fsnotify_open(struct inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name);inotify_inode_queue_event(inode, mask, 0, NULL); + raise_fsevent(dentry, FSEVENT_OPEN | + ((S_ISDIR(inode->i_mode))?FSEVENT_ISDIR:0)); + }/*@@ -148,6 +163,8 @@ static inline void fsnotify_close(structinotify_dentry_parent_queue_event(dentry, mask, 0, name);inotify_inode_queue_event(inode, mask, 0, NULL); + raise_fsevent(dentry, FSEVENT_CLOSE | + ((S_ISDIR(inode->i_mode))?FSEVENT_ISDIR:0)); }/*@@ -163,6 +180,8 @@ static inline void fsnotify_xattr(structinotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name);inotify_inode_queue_event(inode, mask, 0, NULL); + raise_fsevent(dentry, FSEVENT_MODIFY_ATTRIB | + ((S_ISDIR(inode->i_mode))?FSEVENT_ISDIR:0)); }/*@@ -213,6 +232,24 @@ static inline void fsnotify_change(struc inotify_dentry_parent_queue_event(dentry, in_mask, 0, dentry->d_name.name); } + +#ifdef CONFIG_FS_EVENTS + { + u32 fsevent_mask = 0; + if (ia_valid & (ATTR_UID | ATTR_GID | ATTR_MODE)) + fsevent_mask |= FSEVENT_MODIFY_ATTRIB; + if ((ia_valid & ATTR_ATIME) && (ia_valid & ATTR_MTIME)) + fsevent_mask |= FSEVENT_MODIFY_ATTRIB; + else if (ia_valid & ATTR_ATIME) + fsevent_mask |= FSEVENT_ACCESS; + else if (ia_valid & ATTR_MTIME) + fsevent_mask |= FSEVENT_MODIFY; + if (ia_valid & ATTR_SIZE) + fsevent_mask |= FSEVENT_MODIFY; + if (fsevent_mask) + raise_fsevent(dentry, fsevent_mask); + } +#endif /* CONFIG_FS_EVENTS */ }#ifdef CONFIG_INOTIFY /* inotify helpers */--- a/drivers/connector/Makefile.orig 2006-03-12 21:23:18.000000000 +0800 +++ b/drivers/connector/Makefile 2006-03-12 21:24:25.000000000 +0800 @@ -1,4 +1,5 @@ obj-$(CONFIG_CONNECTOR) += cn.o obj-$(CONFIG_PROC_EVENTS) += cn_proc.o +obj-$(CONFIG_FS_EVENTS) += cn_fs.ocn-y += cn_queue.o connector.o--- a/drivers/connector/Kconfig.orig 2006-03-12 21:23:53.000000000 +0800 +++ b/drivers/connector/Kconfig 2006-03-12 21:21:44.000000000 +0800 @@ -18,4 +18,12 @@ config PROC_EVENTS Provide a connector that reports process events to userspace. Send events such as fork, exec, id change (uid, gid, suid, etc), and exit.+config FS_EVENTS+ boolean "Report filesystem events to userspace" + depends on CONNECTOR=y + default y + ---help--- + Provide a connector that reports filesystem events to userspace. The + reported event include open, read, write. + endmenu --- /dev/null 2003-01-30 18:24:37.000000000 +0800 +++ b/include/linux/fsevent.h 2006-03-16 22:52:06.000000000 +0800 @@ -0,0 +1,86 @@ +/* + * fsevent.h - filesystem events connector + * + * Copyright (C) 2006 Yi Yang <[email protected]> + * Based on cn_proc.h by Matt Helsley, IBM Corp + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef LINUX_FSEVENT_H +#define LINUX_FSEVENT_H + +#include <linux/types.h> +#include <linux/time.h> +#include <linux/connector.h> + +enum fsevent_type { + FSEVENT_ACCESS = 0x00000001, /* File was accessed */ + FSEVENT_MODIFY = 0x00000002, /* File was modified */ + FSEVENT_MODIFY_ATTRIB = 0x00000004, /* Metadata changed */ + FSEVENT_CLOSE = 0x00000008, /* File was closed */ + FSEVENT_OPEN = 0x00000010, /* File was opened */ + FSEVENT_MOVE = 0x00000020, /* File was moved */ + FSEVENT_CREATE = 0x00000040, /* File was created */ + FSEVENT_DELETE = 0x00000080, /* File was deleted */ + FSEVENT_CMDACK = 0x40000000, /* For used internally */s/used/use/+ FSEVENT_ISDIR = 0x80000000 /* It is set for a dir */ +};So we'd have DN_FOO, IN_FOO, and FSEVENT_FOO? Any chance inotify definitions could be used instead of redefining all of these? Perhaps that would simplify the code in fsnotify.h. The only problem I can see is the FSEVENT_CMDACK type. This could be avoided by breaking up the type field into an inotify-compatible mask field and the type field (EVENT or CMDACK).
This advice is very good, I'll use this kind of way.
+/* + * Userspace sends this enum to register with the kernel that it is listening + * for events on the connector. + */ +enum fsevent_mode { + FSEVENT_LISTEN = 1, + FSEVENT_IGNORE = 2 +}; +Process Events Connector uses this mechanism to avoid most of the eventgeneration code if there are no listeners.Michael Kerrisk has privately suggested to me that this mechanism gives userspace too much rope with which to hang itself. I think it just gives userspace more rope. That said, perhaps we can shorten the rope by adding a connector function to quickly return a value indicating if a process in userspace is listening to messages sent by the kernel. Then connectors could use that function rather that reinvent the same mechanism.
After Evgeniy did it, I'll follow his way.
I ever used pid_t, but in the user space, it has problem, size of pid_t in kernel space different from in user space,+struct fsevent { + __u32 type; + __u32 cpu; + struct timespec timestamp; + __u32 tgid;I think pid_t would be more appropriate here instead of a __u32. Also a tid field of type pid_t would be consistent with the Process Events returned to userspace. If a userspace program ever wanted to relate the two sets of events the tid field could be important. That said it would be nice to know if any userspace programs are planning on using this.
I don't know why, do you have an idea about it?
+ __u32 uid; + __u32 gid; + __u32 err;The err field appears to be unused when sending events back to userspace. I suggest reusing it for the event mask and using the type field to indicate whether the message is a CMDACK or simply an EVENT.
This is a good idea, I'll do so.
If some modules want to use it to transfer file system events reliably, the return value will be very valuble,+ __u32 len; + __u32 pname_len; + __u32 fname_len; + __u32 new_fname_len;I think one of these _len fields could be dropped and later calculated as: new_fname_len = len - (pname_len + fname_len);+ char name[0]; +}; + +#ifdef __KERNEL__ +#ifdef CONFIG_FS_EVENTS +extern void raise_fsevent(struct dentry * dentryp, u32 mask);+extern void raise_fsevent_move(struct inode * olddir, const char * oldname, + struct inode * newdir, const char * newname, u32 mask); +extern void raise_fsevent_create(struct inode * inode, + const char * name, u32 mask);+#else +static void raise_fsevent(struct dentry * dentryp, u32 mask); +{} ++static void raise_fsevent_move(struct inode * olddir, const char * oldname, + struct inode * newdir, const char * newname, u32 mask)+{} ++static void raise_fsevent_create(struct inode * inode, + const char * name, u32 mask)+{} +#endif /* CONFIG_FS_EVENTS */ +#endif /* __KERNEL__ */ +#endif /* LINUX_FSEVENT_H */ --- /dev/null 2003-01-30 18:24:37.000000000 +0800 +++ b/drivers/connector/cn_fs.c 2006-03-16 22:54:28.000000000 +0800 @@ -0,0 +1,200 @@ +/* + * cn_fs.c - filesystem events connector + * + * Copyright (C) 2006 Yi Yang <[email protected]> + * Based on cn_proc.c by Matt Helsley, IBM Corp + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/ktime.h> +#include <linux/init.h> +#include <linux/fs.h> +#include <linux/fsevent.h> +#include <asm/atomic.h> + +#define CN_FS_MSG_SIZE (sizeof(struct cn_msg) + sizeof(struct fsevent)) + +static atomic_t cn_fs_event_listeners = ATOMIC_INIT(0); +static struct cb_id cn_fs_event_id = { CN_IDX_FS, CN_VAL_FS }; + +static DEFINE_PER_CPU(__u32, cn_fs_event_counts) = { 0 }; +static int fsevent_burst_limit = 100; +static int fsevent_ratelimit = 5 * HZ; + +static inline void get_seq(__u32 *ts, int *cpu) +{ + *ts = get_cpu_var(cn_fs_event_counts)++; + *cpu = smp_processor_id(); + put_cpu_var(cn_fs_event_counts); +} + +int __raise_fsevent(const char * oldname, const char * newname, u32 mask)The return value of this function does not appear to be used.
because the caller can retry the transfer until it successes.
+{ + struct cn_msg *msg; + struct fsevent *event; + __u8 * buffer;While the contents of buffer are being sent to userspace eventually, the pointer is not, so I don't think __u8 is necessary or conventional.
I agree.
+ int namelen = 0; + static unsigned long last = 0; + static int fsevent_sum = 0;Yuck, static local variables. IMHO these should be globals. It would make the fact they aren't protected from concurrent access more obvious (see below).
Yes, they should be global.
+ if (atomic_read(&cn_fs_event_listeners) < 1) + return 0; + + if (jiffies - last <= fsevent_ratelimit) { + if (fsevent_sum > fsevent_burst_limit) { + return -1;OK, so you're rate limiting the events. Shouldn't you still boost the sequence number so that userspace knows some events got dropped? Also perhaps you can find an appropriate error to return instead of -1.
Good idea.
This just is used to limit event rate when the user space application leads to an unlimited events loop. so it mustn't be precise, I used spinlock originally, but Andrew thinks lock overhead is big, inotify has led to some frustrating lock overhead, so I decide to remove it, in fact fsevent_sum just is the number used to limit rate,+ }remove unecessary braces+ fsevent_sum++;Looks racy to me -- what's protecting fsevent_sum from access by multiple cpus?
some race conditions don't effect the rate limit.
+ } + else {I seem to recall that coding style would have you move the else up.+ last = jiffies; + fsevent_sum = 0; + } + + namelen = strlen(current->comm) + strlen(oldname) + 2; + if (newname) { + namelen += strlen(newname) + 1; + }remove braces+ if ((buffer = (__u8 *)kmalloc(CN_FS_MSG_SIZE + namelen, GFP_KERNEL)) + == NULL) {Pull the assignment out of the condition. You're not saving any space by putting it into the if () and it's harder to read. I don't think the __u8 cast is necessary..
I'll do so.
+ printk("cn_fs: out of memory\n");missing printk tag+ return -1;Perhaps: return -ENOMEM;+ } + + msg = (struct cn_msg*)buffer; + event = (struct fsevent*)msg->data; + get_seq(&msg->seq, &event->cpu); + ktime_get_ts(&event->timestamp); + event->type = mask; + event->tgid = current->tgid; + event->uid = current->uid; + event->gid = current->gid; + event->pname_len = strlen(current->comm); + memcpy(event->name, current->comm, event->pname_len); + event->name[event->pname_len] = '\0'; + event->fname_len = strlen(oldname); + memcpy(event->name + event->pname_len + 1, oldname, strlen(oldname)); + event->len = event->pname_len + event->fname_len + 2; + event->name[event->len-1] = '\0'; + event->new_fname_len = 0; + if (newname) { + event->new_fname_len = strlen(newname); + memcpy(event->name + event->len, newname, strlen(newname)); + event->len += event->new_fname_len + 1; + event->name[event->len-1] = '\0'; + }Maybe it's just my personal taste, but you could use pointer arithmetic while copying the strings into the event buffer: name = event->name; strncpy(name, current->comm, event->pname_len); name += event->pname_len; name[0] = '\0'; name++; strncpy(name, oldname, event->fname_len); name += event->fname_len; name[0] = '\0'; name++; ... Then you could break it out into a small function that appends the string to the buffer for you and call it for each string: static void append_string(char **dest, const char *src, size_t len) { strncpy(*dest, src, len); *dest += len; (*dest)[0] = '\0'; (*dest)++;}... int __raise_fsevent(const char * oldname, const char * newname, u32 mask) { ... name = event->name; append_string(&name, current->comm, event->pname_len); event->fname_len = strlen(oldname); append_string(&name, oldname, event->fname_len); ... }+ memcpy(&msg->id, &cn_fs_event_id, sizeof(msg->id)); + msg->ack = 0; + msg->len = sizeof(struct fsevent) + event->len; + cn_netlink_send(msg, CN_IDX_FS, GFP_KERNEL); + kfree(buffer); + return 0; +} + +void raise_fsevent(struct dentry * dentryp, u32 mask) +{ + __raise_fsevent(dentryp->d_name.name, NULL, mask); +} +EXPORT_SYMBOL_GPL(raise_fsevent); + +void raise_fsevent_create(struct inode * inode, const char * name, u32 mask) +{ + __raise_fsevent(name, NULL, mask); +} +EXPORT_SYMBOL_GPL(raise_fsevent_create); ++void raise_fsevent_move(struct inode * olddir, const char * oldname, + struct inode * newdir, const char * newname, u32 mask)+{ + __raise_fsevent(oldname, newname, mask); +} +EXPORT_SYMBOL_GPL(raise_fsevent_move); + +static void cn_fs_event_ack(int err, int rcvd_seq, int rcvd_ack) +{ + struct cn_msg *msg; + struct fsevent *event; + __u8 * buffer = NULL; + + if (atomic_read(&cn_fs_event_listeners) < 1) + return; + + if ((buffer = (__u8 *)kmalloc(CN_FS_MSG_SIZE, GFP_KERNEL)) == NULL) {I don't think the __u8 cast is necessary+ printk("cn_fs: out of memory\n");missing printk tag+ return; + } + + msg = (struct cn_msg*)buffer; + event = (struct fsevent*)msg->data; + msg->seq = rcvd_seq; + ktime_get_ts(&event->timestamp); + event->cpu = -1; + event->type = FSEVENT_CMDACK; + event->tgid = 0; + event->uid = 0; + event->gid = 0; + event->len = 0; + event->pname_len = 0; + event->fname_len = 0; + event->new_fname_len = 0; + event->err = err; + memcpy(&msg->id, &cn_fs_event_id, sizeof(msg->id)); + msg->ack = rcvd_ack + 1; + msg->len = sizeof(struct fsevent); + cn_netlink_send(msg, CN_IDX_FS, GFP_KERNEL); +} + +static void cn_fsevent_mode_ctl(void *data) +{ + struct cn_msg *msg = data; + enum fsevent_mode *mode = NULL; + int err = 0; + + if (msg->len != sizeof(*mode)) + return; + + mode = (enum fsevent_mode*)msg->data; + switch (*mode) { + case FSEVENT_LISTEN: + atomic_inc(&cn_fs_event_listeners); + break; + case FSEVENT_IGNORE: + atomic_dec(&cn_fs_event_listeners); + break; + default: + err = EINVAL; + break; + } + cn_fs_event_ack(err, msg->seq, msg->ack); +} + +static int __init cn_fs_init(void) +{ + int err; + + if ((err = cn_add_callback(&cn_fs_event_id, "cn_fs", + &cn_fsevent_mode_ctl))) { + printk(KERN_WARNING "cn_fs failed to register\n"); + return err; + } + return 0; +} + +module_init(cn_fs_init);
- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [email protected] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
- Follow-Ups:
- Re: [2.6.16 PATCH] Connector: Filesystem Events Connector
- From: Matt Helsley <[email protected]>
- Re: [2.6.16 PATCH] Connector: Filesystem Events Connector
- References:
- [2.6.16 PATCH] Connector: Filesystem Events Connector
- From: Yi Yang <[email protected]>
- Re: [2.6.16 PATCH] Connector: Filesystem Events Connector
- From: Matt Helsley <[email protected]>
- [2.6.16 PATCH] Connector: Filesystem Events Connector
- Prev by Date: Re: 2.6.16-rc5: known regressions [TP 600X S3, vanilla DSDT]
- Next by Date: [PATCH 2.6.16-mm1 2/3] rtc: m41t00 driver cleanup
- Previous by thread: Re: [2.6.16 PATCH] Connector: Filesystem Events Connector
- Next by thread: Re: [2.6.16 PATCH] Connector: Filesystem Events Connector
- Index(es):