Some comments below...
On Tuesday 23 August 2005 15:37, Amy Griffis wrote:
 diff -r 8ecff93e704a -r 58e1301e9661 fs/inotify.c
 --- a/fs/inotify.c      Thu Aug 18 19:53:59 2005
 +++ b/fs/inotify.c      Thu Aug 18 23:19:52 2005
 @@ -83,14 +83,18 @@
         wait_queue_head_t       wq;             /* wait queue for i/o */
         struct idr              idr;            /* idr mapping wd -> watch */
         struct semaphore        sem;            /* protects this bad boy */
 -       struct list_head        events;         /* list of queued events */
         struct list_head        watches;        /* list of watches */
         atomic_t                count;          /* reference count */
 +       u32                     last_wd;        /* the last wd allocated */
 +       /* userland consumer API */
 +       struct list_head        events;         /* list of queued events */
         struct user_struct      *user;          /* user who opened this dev */
         unsigned int            queue_size;     /* size of the queue (bytes) */
         unsigned int            event_count;    /* number of pending events */
         unsigned int            max_events;     /* maximum number of events */
 -       u32                     last_wd;        /* the last wd allocated */
 +       /* kernel consumer API */
 +       void (*callback)(struct inotify_event *, const char *,
 +                        void *);               /* event callback */
  };
  
  /*
 @@ -122,6 +126,7 @@
         struct inode            *inode; /* associated inode */
         s32                     wd;     /* watch descriptor */
         u32                     mask;   /* event mask for this watch */
 +       void            *callback_arg;  /* callback argument - kernel API */
  };
  
  #ifdef CONFIG_SYSCTL
 @@ -173,8 +178,10 @@
  static inline void put_inotify_dev(struct inotify_device *dev)
  {
         if (atomic_dec_and_test(&dev->count)) {
 -               atomic_dec(&dev->user->inotify_devs);
 -               free_uid(dev->user);
 +               if (dev->user) {
 +                       atomic_dec(&dev->user->inotify_devs);
 +                       free_uid(dev->user);
 +               }
                 kfree(dev);
         }
  }
 @@ -341,6 +348,23 @@
  }
  
  /*
 + * inotify_callback_event - notify kernel consumers of events
 + */
 +static void inotify_callback_event(struct inotify_device *dev, 
 +                                 struct inotify_watch *watch, 
 +                                 u32 mask, u32 cookie, const char *name)
 +{
 +       struct inotify_event event;
 +
 +       event.wd = watch->wd;
 +       event.mask = mask;
 +       event.cookie = cookie;
 +       event.len = 0; /* kernel consumers don't need length */
 +
 +       dev->callback(&event, name, watch->callback_arg);
 +}
 +
 +/*
   * inotify_dev_get_wd - returns the next WD for use by the given dev
   *
   * Callers must hold dev->sem.  This function can sleep.
 @@ -383,12 +407,13 @@
   * Both 'dev' and 'inode' (by way of nameidata) need to be pinned.
   */
  static struct inotify_watch *create_watch(struct inotify_device *dev,
 -                                         u32 mask, struct inode *inode)
 +                                         u32 mask, struct inode *inode,
 +                                         void *callback_arg)
  {
         struct inotify_watch *watch;
         int ret;
  
 -       if (atomic_read(&dev->user->inotify_watches) >=
 +       if (dev->user && atomic_read(&dev->user->inotify_watches)
>=
                         inotify_max_user_watches)
                 return ERR_PTR(-ENOSPC);
  
 @@ -404,6 +429,7 @@
  
         dev->last_wd = watch->wd;
         watch->mask = mask;
 +       watch->callback_arg = callback_arg;
         atomic_set(&watch->count, 0);
         INIT_LIST_HEAD(&watch->d_list);
         INIT_LIST_HEAD(&watch->i_list);
 @@ -421,7 +447,8 @@
         /* bump our own count, corresponding to our entry in dev->watches */
         get_inotify_watch(watch);
  
 -       atomic_inc(&dev->user->inotify_watches);
 +       if (dev->user)
 +               atomic_inc(&dev->user->inotify_watches);
  
         return watch;
  }
 @@ -453,7 +480,8 @@
         list_del(&watch->i_list);
         list_del(&watch->d_list);
  
 -       atomic_dec(&dev->user->inotify_watches);
 +       if (dev->user)
 +               atomic_dec(&dev->user->inotify_watches);
         idr_remove(&dev->idr, watch->wd);
         put_inotify_watch(watch);
  }
 @@ -471,7 +499,10 @@
   */
  static void remove_watch(struct inotify_watch *watch,struct inotify_device *dev)
  {
 -       inotify_dev_queue_event(dev, watch, IN_IGNORED, 0, NULL);
 +       if (dev->callback)
 +               inotify_callback_event(dev, watch, IN_IGNORED, 0, NULL);
 +       else
 +               inotify_dev_queue_event(dev, watch, IN_IGNORED, 0, NULL);
         remove_watch_no_event(watch, dev);
  }
  
 @@ -484,7 +515,172 @@
         return !list_empty(&inode->inotify_watches);
  }
  
 -/* Kernel API */
 +/* Kernel consumer API */
 +
 +/*
 + * inotify_init - allocates and initializes an inotify device.
 + */
 +struct inotify_device * 
 +inotify_init(void (*callback)(struct inotify_event *, const char *, void *))
 +{
 +       struct inotify_device *dev;
 +
 +       dev = kmalloc(sizeof(struct inotify_device), GFP_KERNEL);
 +       if (unlikely(!dev))
 +               return NULL;
 +
 +       idr_init(&dev->idr);
 +       INIT_LIST_HEAD(&dev->events);
 +       INIT_LIST_HEAD(&dev->watches);
 +       init_waitqueue_head(&dev->wq);
 +       sema_init(&dev->sem, 1);
 +       dev->event_count = 0;
 +       dev->queue_size = 0;
 +       dev->max_events = inotify_max_queued_events;
 +       dev->user = NULL;       /* set in sys_inotify_init */
 +       dev->last_wd = 0;
 +       dev->callback = callback;
 +       atomic_set(&dev->count, 0);
 +       get_inotify_dev(dev);
 +
 +       return dev;
 +}
 +EXPORT_SYMBOL_GPL(inotify_init);
 +
 +/*
 + * inotify_free - clean up and free an inotify device.
 + */
 +int inotify_free(struct inotify_device *dev)
 +{
 +       /*
 +        * Destroy all of the watches on this device.  Unfortunately, not very
 +        * pretty.  We cannot do a simple iteration over the list, because we
 +        * do not know the inode until we iterate to the watch.  But we need to
 +        * hold inode->inotify_sem before dev->sem.  The following works.
 +        */
 +       while (1) {
 +               struct inotify_watch *watch;
 +               struct list_head *watches;
 +               struct inode *inode;
 +
 +               down(&dev->sem);
 +               watches = &dev->watches;
 +               if (list_empty(watches)) {
 +                       up(&dev->sem);
 +                       break;
 +               }
 +               watch = list_entry(watches->next, struct inotify_watch, d_list);
 +               get_inotify_watch(watch);
 +               up(&dev->sem);
 +
 +               inode = watch->inode;
 +               down(&inode->inotify_sem);
 +               down(&dev->sem);
 +               remove_watch_no_event(watch, dev);
 +               up(&dev->sem);
 +               up(&inode->inotify_sem);
 +               put_inotify_watch(watch);
 +       }
 +
 +       /* destroy all of the events on this device */
 +       down(&dev->sem);
 +       while (!list_empty(&dev->events))
 +               inotify_dev_event_dequeue(dev);
 +       up(&dev->sem);
 +
 +       /* free this device: the put matching the get in inotify_init() */
 +       put_inotify_dev(dev);
 +
 +       return 0;
 +}
 +EXPORT_SYMBOL_GPL(inotify_free);
 +
 +/*
 + * inotify_add_watch - add a watch to this inotify instance.
 + */
 +s32 inotify_add_watch(struct inotify_device *dev, struct inode *inode, 
 +                     u32 mask, void *callback_arg)
 +{
 +       struct inotify_watch *watch, *old;
 +       int ret;
 +
 +       down(&inode->inotify_sem); 
What happens if the inode is deleted when attempting to hold down the semaphore?
Perhaps a comment reminding the caller of this function they must hold the proper
inode locks around this function?.
 +       down(&dev->sem);
 +
 +       /* don't let consumers set invalid bits: we don't want flags set */
 +       mask &= IN_ALL_EVENTS;
 +       if (unlikely(!mask)) {
 +               ret = -EINVAL;
 +               goto out;
 +       }
 +
 +       /*
 +        * Handle the case of re-adding a watch on an (inode,dev) pair that we
 +        * are already watching.  We just update the mask and callback_arg and
 +        * return its wd.
 +        */
 +       old = inode_find_dev(inode, dev);
 +       if (unlikely(old)) {
 +               old->mask = mask;
 +               old->callback_arg = callback_arg; 
You can leak memory here, right?  Maybe there needs to be a old->callback_free
field that, when set, knows how to clean up old->callback_arg, before pointing it
else where.  Not sure we can assume that the callback_arg is always going to be
statically allocated.
 +               ret = old->wd;
 +               goto out;
 +       }
 +
 +       watch = create_watch(dev, mask, inode, callback_arg);
 +       if (unlikely(IS_ERR(watch))) {
 +               ret = PTR_ERR(watch);
 +               goto out;
 +       }
 +
 +       /* Add the watch to the device's and the inode's list */
 +       list_add(&watch->d_list, &dev->watches);
 +       list_add(&watch->i_list, &inode->inotify_watches);
 +       ret = watch->wd;
 +out:
 +       up(&dev->sem);
 +       up(&inode->inotify_sem);
 +       return ret;
 +}
 +EXPORT_SYMBOL_GPL(inotify_add_watch);
 +
 +/*
 + * inotify_ignore - remove a given wd from this inotify instance.
 + *
 + * Can sleep.
 + */
 +int inotify_ignore(struct inotify_device *dev, s32 wd)
 +{
 +       struct inotify_watch *watch;
 +       struct inode *inode;
 +
 +       down(&dev->sem);
 +       watch = idr_find(&dev->idr, wd);
 +       if (unlikely(!watch)) {
 +               up(&dev->sem);
 +               return -EINVAL;
 +       }
 +       get_inotify_watch(watch);
 +       inode = watch->inode;
 +       up(&dev->sem);
 +
 +       down(&inode->inotify_sem);
 +       down(&dev->sem);
 +
 +       /* make sure that we did not race */
 +       watch = idr_find(&dev->idr, wd);
 +       if (likely(watch))
 +               remove_watch(watch, dev);
 +
 +       up(&dev->sem);
 +       up(&inode->inotify_sem);
 +       put_inotify_watch(watch);
 +
 +       return 0;
 +}
 +EXPORT_SYMBOL_GPL(inotify_ignore);
 +
 +/* Kernel producer API */
  
  /**
   * inotify_inode_queue_event - queue an event to all watches on this inode 
Since the purpose of this patch is to add a kernel API to Inotify, I think it should
be as clean as possible.   With this in mind, I think that since we _might_ be calling
inotify_callback_event that the calling function not be inotify_inode_queue_event;
it's not all that intuitive.  Instead, I think this function should be renamed to 
something like, inotify_inode_handle_event...
 @@ -508,7 +704,12 @@
                         struct inotify_device *dev = watch->dev;
                         get_inotify_watch(watch);
                         down(&dev->sem);
 -                       inotify_dev_queue_event(dev, watch, mask, cookie, name);
 +                       if (dev->callback)
 +                               inotify_callback_event(dev, watch, mask, 
 +                                                      cookie, name);
 +                       else
 +                               inotify_dev_queue_event(dev, watch, mask,
 +                                                       cookie, name);
                         if (watch_mask & IN_ONESHOT)
                                 remove_watch_no_event(watch, dev);
                         up(&dev->sem);
 @@ -622,7 +823,12 @@
                 list_for_each_entry_safe(watch, next_w, watches, i_list) {
                         struct inotify_device *dev = watch->dev;
                         down(&dev->sem);
 -                       inotify_dev_queue_event(dev, watch, IN_UNMOUNT,0,NULL);
 +                       if (dev->callback)
 +                               inotify_callback_event(dev, watch, IN_UNMOUNT, 
 +                                                      0, NULL);
 +                       else
 +                               inotify_dev_queue_event(dev, watch, IN_UNMOUNT,
 +                                                       0, NULL);
                         remove_watch(watch, dev);
                         up(&dev->sem);
                 }
 @@ -748,83 +954,7 @@
  
  static int inotify_release(struct inode *ignored, struct file *file)
  {
 -       struct inotify_device *dev = file->private_data;
 -
 -       /*
 -        * Destroy all of the watches on this device.  Unfortunately, not very
 -        * pretty.  We cannot do a simple iteration over the list, because we
 -        * do not know the inode until we iterate to the watch.  But we need to
 -        * hold inode->inotify_sem before dev->sem.  The following works.
 -        */
 -       while (1) {
 -               struct inotify_watch *watch;
 -               struct list_head *watches;
 -               struct inode *inode;
 -
 -               down(&dev->sem);
 -               watches = &dev->watches;
 -               if (list_empty(watches)) {
 -                       up(&dev->sem);
 -                       break;
 -               }
 -               watch = list_entry(watches->next, struct inotify_watch, d_list);
 -               get_inotify_watch(watch);
 -               up(&dev->sem);
 -
 -               inode = watch->inode;
 -               down(&inode->inotify_sem);
 -               down(&dev->sem);
 -               remove_watch_no_event(watch, dev);
 -               up(&dev->sem);
 -               up(&inode->inotify_sem);
 -               put_inotify_watch(watch);
 -       }
 -
 -       /* destroy all of the events on this device */
 -       down(&dev->sem);
 -       while (!list_empty(&dev->events))
 -               inotify_dev_event_dequeue(dev);
 -       up(&dev->sem);
 -
 -       /* free this device: the put matching the get in inotify_init() */
 -       put_inotify_dev(dev);
 -
 -       return 0;
 -}
 -
 -/*
 - * inotify_ignore - remove a given wd from this inotify instance.
 - *
 - * Can sleep.
 - */
 -static int inotify_ignore(struct inotify_device *dev, s32 wd)
 -{
 -       struct inotify_watch *watch;
 -       struct inode *inode;
 -
 -       down(&dev->sem);
 -       watch = idr_find(&dev->idr, wd);
 -       if (unlikely(!watch)) {
 -               up(&dev->sem);
 -               return -EINVAL;
 -       }
 -       get_inotify_watch(watch);
 -       inode = watch->inode;
 -       up(&dev->sem);
 -
 -       down(&inode->inotify_sem);
 -       down(&dev->sem);
 -
 -       /* make sure that we did not race */
 -       watch = idr_find(&dev->idr, wd);
 -       if (likely(watch))
 -               remove_watch(watch, dev);
 -
 -       up(&dev->sem);
 -       up(&inode->inotify_sem);
 -       put_inotify_watch(watch);
 -
 -       return 0;
 +       return inotify_free(file->private_data);
  }
  
  static long inotify_ioctl(struct file *file, unsigned int cmd,
 @@ -878,11 +1008,14 @@
                 goto out_free_uid;
         }
  
 -       dev = kmalloc(sizeof(struct inotify_device), GFP_KERNEL);
 +       dev = inotify_init(NULL);
         if (unlikely(!dev)) {
                 ret = -ENOMEM;
                 goto out_free_uid;
         }
 +
 +       dev->user = user;
 +       atomic_inc(&user->inotify_devs);
  
         filp->f_op = &inotify_fops;
         filp->f_vfsmnt = mntget(inotify_mnt);
 @@ -892,20 +1025,6 @@
         filp->f_flags = O_RDONLY;
         filp->private_data = dev;
  
 -       idr_init(&dev->idr);
 -       INIT_LIST_HEAD(&dev->events);
 -       INIT_LIST_HEAD(&dev->watches);
 -       init_waitqueue_head(&dev->wq);
 -       sema_init(&dev->sem, 1);
 -       dev->event_count = 0;
 -       dev->queue_size = 0;
 -       dev->max_events = inotify_max_queued_events;
 -       dev->user = user;
 -       dev->last_wd = 0;
 -       atomic_set(&dev->count, 0);
 -
 -       get_inotify_dev(dev);
 -       atomic_inc(&user->inotify_devs);
         fd_install(fd, filp);
  
         return fd;
 @@ -919,7 +1038,6 @@
  
  asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask)
  {
 -       struct inotify_watch *watch, *old;
         struct inode *inode;
         struct inotify_device *dev;
         struct nameidata nd;
 @@ -938,46 +1056,14 @@
  
         ret = find_inode(path, &nd);
         if (unlikely(ret))
 -               goto fput_and_out;
 +               goto out;
  
         /* inode held in place by reference to nd; dev by fget on fd */
         inode = nd.dentry->d_inode;
         dev = filp->private_data;
  
 -       down(&inode->inotify_sem);
 -       down(&dev->sem);
 -
 -       /* don't let user-space set invalid bits: we don't want flags set */
 -       mask &= IN_ALL_EVENTS;
 -       if (unlikely(!mask)) {
 -               ret = -EINVAL;
 -               goto out;
 -       }
 -
 -       /*
 -        * Handle the case of re-adding a watch on an (inode,dev) pair that we
 -        * are already watching.  We just update the mask and return its wd.
 -        */
 -       old = inode_find_dev(inode, dev);
 -       if (unlikely(old)) {
 -               old->mask = mask;
 -               ret = old->wd;
 -               goto out;
 -       }
 -
 -       watch = create_watch(dev, mask, inode);
 -       if (unlikely(IS_ERR(watch))) {
 -               ret = PTR_ERR(watch);
 -               goto out;
 -       }
 -
 -       /* Add the watch to the device's and the inode's list */
 -       list_add(&watch->d_list, &dev->watches);
 -       list_add(&watch->i_list, &inode->inotify_watches);
 -       ret = watch->wd;
 +       ret = inotify_add_watch(dev, inode, mask, NULL);
  out:
 -       up(&dev->sem);
 -       up(&inode->inotify_sem);
         path_release(&nd);
  fput_and_out:
         fput_light(filp, fput_needed);
 diff -r 8ecff93e704a -r 58e1301e9661 include/linux/inotify.h
 --- a/include/linux/inotify.h   Thu Aug 18 19:53:59 2005
 +++ b/include/linux/inotify.h   Thu Aug 18 23:19:52 2005
 @@ -14,6 +14,9 @@
   *
   * When you are watching a directory, you will receive the filename for events
   * such as IN_CREATE, IN_DELETE, IN_OPEN, IN_CLOSE, ..., relative to the wd.
 + *
 + * When using inotify from the kernel, len will always be zero.  Instead you
 + * should check the path for non-NULL in your callback.
   */
  struct inotify_event {
         __s32           wd;             /* watch descriptor */
 @@ -68,6 +71,17 @@
  
  #ifdef CONFIG_INOTIFY
  
 +/* Kernel consumer API */
 +
 +extern struct inotify_device *inotify_init(void (*)(struct inotify_event *, 
 +                                                   const char *, void *));
 +extern int inotify_free(struct inotify_device *);
 +extern __s32 inotify_add_watch(struct inotify_device *, struct inode *, __u32,
 +                              void *);
 +extern int inotify_ignore(struct inotify_device *, __s32);
 +
 +/* Kernel producer API */
 +
  extern void inotify_inode_queue_event(struct inode *, __u32, __u32,
                                       const char *);
  extern void inotify_dentry_parent_queue_event(struct dentry *, __u32, __u32,
 @@ -77,6 +91,8 @@
  extern u32 inotify_get_cookie(void);
  
  #else
 +
 +/* Kernel producer API stubs */
  
  static inline void inotify_inode_queue_event(struct inode *inode,
                                              __u32 mask, __u32 cookie,