On Thu, 2005-06-02 at 12:03 +0100, David Woodhouse wrote:
> Two things left that I can think of:
> * Getting rid of blanket allocations of audit_inode_data
I have this half-done, as you saw in the half-complete patch I threw
over the wall last night
(
http://david.woodhou.se/audit-abolish-wentry-4.patch)
Should hopefully have it working today and then will do an audit.53.
--- linux-2.6.9/include/linux/audit.h.p20057 2005-06-02 18:02:20.000000000 +0100
+++ linux-2.6.9/include/linux/audit.h 2005-06-02 18:02:20.000000000 +0100
@@ -225,10 +225,10 @@ struct audit_watch {
char *w_name; /* Watch point beneath parent */
char *w_path; /* Insertion path */
char *w_filterkey; /* An arbitrary filtering key */
- struct audit_inode_data *w_iaudit; /* Preallocated inode data */
};
struct audit_inode_data {
+ int count;
struct audit_inode_data *next_hash; /* Watch data hash table */
struct inode *inode; /* Inode to which it belongs */
struct audit_watch *watch; /* Watch for this inode */
@@ -299,7 +299,6 @@ extern int audit_filesystem_init(void);
extern int audit_list_watches(int pid, int seq);
extern int audit_receive_watch(int type, int pid, int uid, int seq,
struct watch_transport *req, uid_t loginuid);
-extern int audit_inode_alloc(struct inode *inode);
extern void audit_inode_free(struct inode *inode);
extern void audit_update_watch(struct dentry *dentry, int remove);
extern void audit_watch_put(struct audit_watch *watch);
@@ -312,7 +311,6 @@ extern int auditfs_attach_wdata(struct i
#define audit_filesystem_init() ({ 0; })
#define audit_list_watches(p,s) ({ -EOPNOTSUPP; })
#define audit_receive_watch(t,p,u,s,r,l) ({ -EOPNOTSUPP; })
-#define audit_inode_alloc(i) ({ 0; })
#define audit_inode_free(i) do { ; } while(0)
#define audit_update_watch(d,r) do { ; } while (0)
#define audit_watch_put(w) do { ; } while(0)
--- linux-2.6.9/fs/inode.c.p20057 2005-06-02 18:02:20.000000000 +0100
+++ linux-2.6.9/fs/inode.c 2005-06-02 18:02:20.000000000 +0100
@@ -137,8 +137,7 @@ static struct inode *alloc_inode(struct
inode->i_rdev = 0;
inode->i_security = NULL;
inode->dirtied_when = 0;
- if (audit_inode_alloc(inode) || security_inode_alloc(inode)) {
- audit_inode_free(inode);
+ if (security_inode_alloc(inode)) {
if (inode->i_sb->s_op->destroy_inode)
inode->i_sb->s_op->destroy_inode(inode);
else
--- linux-2.6.9/kernel/auditfs.c.p20057 2005-06-02 18:02:20.000000000 +0100
+++ linux-2.6.9/kernel/auditfs.c 2005-06-02 18:02:20.000000000 +0100
@@ -36,6 +36,14 @@
#include <linux/module.h>
#include <asm/uaccess.h>
+#if 1
+#define dprintk(...) do { } while(0)
+#define __print_symbol(x, y) do { } while(0)
+#else
+#define dprintk(...) printk(KERN_DEBUG __VA_ARGS__);
+extern void __print_symbol(char *, void *);
+#define inline
+#endif
extern int audit_enabled;
@@ -52,6 +60,9 @@ struct audit_skb_list {
};
+static int audit_nr_watches;
+static int audit_pool_size;
+static struct audit_inode_data *audit_data_pool;
static struct audit_inode_data **auditfs_hash_table;
static spinlock_t auditfs_hash_lock = SPIN_LOCK_UNLOCKED;
static int auditfs_hash_bits;
@@ -59,32 +70,73 @@ static int auditfs_cache_buckets = 16384
module_param(auditfs_cache_buckets, int, 0);
MODULE_PARM_DESC(auditfs_cache_buckets, "Number of auditfs cache entries to allocate
(default 16384)\n");
-struct audit_inode_data *inode_audit_data(struct inode *inode)
+static int audit_data_pool_grow(void)
+{
+ struct audit_inode_data *new;
+
+ new = kmalloc(sizeof(*new), GFP_KERNEL);
+ if (!new)
+ return -ENOMEM;
+ new->next_hash = kmalloc(sizeof(*new), GFP_KERNEL);
+ if (!new->next_hash) {
+ kfree(new);
+ return -ENOMEM;
+ }
+
+ spin_lock(&auditfs_hash_lock);
+ new->next_hash->next_hash = audit_data_pool;
+ audit_data_pool = new;
+ audit_nr_watches++;
+ audit_pool_size += 2;
+ spin_unlock(&auditfs_hash_lock);
+ return 0;
+}
+static void audit_data_pool_shrink(void)
+{
+ spin_lock(&auditfs_hash_lock);
+ audit_nr_watches--;
+ spin_unlock(&auditfs_hash_lock);
+}
+
+static struct audit_inode_data *audit_data_get(struct inode *inode, int allocate)
{
struct audit_inode_data **list;
- int h = hash_ptr(inode, auditfs_hash_bits);
struct audit_inode_data *ret = NULL;
+ int h = hash_ptr(inode, auditfs_hash_bits);
list = &auditfs_hash_table[h];
spin_lock(&auditfs_hash_lock);
- while (*list && (unsigned long)((*list)->inode) < (unsigned long)inode)
+ while (*list && (unsigned long)((*list)->inode) < (unsigned long)inode) {
+ dprintk("list %p -> %p\n", list, *list);
list = &(*list)->next_hash;
-
+ }
if (*list && (*list)->inode == inode)
ret = *list;
- if (!ret) {
- /* Hm. At the moment, we should never see an inode which doesn't have audit data
*/
- printk("Hm. No audit data for inode %p. Hash bucket for hash %d
follows...\n", inode, h);
- list = &auditfs_hash_table[h];
- while (*list) {
- printk("ino=%p data=%p\n", (*list)->inode, *list);
- list = &(*list)->next_hash;
- }
+ if (ret) {
+ ret->count++;
+ } else if (allocate) {
+ ret = audit_data_pool;
+ audit_data_pool = ret->next_hash;
+ audit_pool_size--;
+ dprintk("allocate from pool. %d left\n", audit_pool_size);
+
+ INIT_HLIST_HEAD(&ret->watchlist);
+ ret->watch = NULL;
+ ret->lock = RW_LOCK_UNLOCKED;
+ ret->inode = inode;
+ ret->next_hash = *list;
+ ret->count = 2;
+ *list = ret;
+ }
+ if (ret) {
+ dprintk("Got audit data %p for inode %p (%lu), count++ now %d. From %p: ",
+ ret, ret->inode, ret->inode->i_ino, ret->count,
__builtin_return_address(0));
+ __print_symbol("%s\n", __builtin_return_address(0));
}
-
spin_unlock(&auditfs_hash_lock);
+
return ret;
}
@@ -108,7 +160,7 @@ static inline void audit_pin(struct audi
}
static inline struct audit_watch *audit_watch_fetch(const char *name,
- struct audit_inode_data *data)
+ struct audit_inode_data *data)
{
struct audit_watch *watch, *ret = NULL;
struct hlist_node *pos;
@@ -123,7 +175,7 @@ static inline struct audit_watch *audit_
}
static inline struct audit_watch *audit_watch_fetch_lock(const char *name,
- struct audit_inode_data *data)
+ struct audit_inode_data *data)
{
struct audit_watch *ret = NULL;
@@ -163,11 +215,6 @@ static inline void audit_watch_free(stru
BUG_ON(watch->w_dentry);
BUG_ON(!hlist_unhashed(&watch->w_node));
BUG_ON(!hlist_unhashed(&watch->w_master));
- if (watch->w_iaudit) {
- BUG_ON(watch->w_iaudit->inode);
- BUG_ON(!hlist_empty(&watch->w_iaudit->watchlist));
- kfree(watch->w_iaudit);
- }
kmem_cache_free(audit_watch_cache, watch);
}
}
@@ -248,7 +295,7 @@ audit_to_transport_exit:
* Because we're protected by audit_netlink_sem, there will never be a race on
* current insertions of the same watch. However, we do lock both the master
* watchlist and the local watchlist to ensure that insertions and removals of
- * watches are seralized.
+ * watches are serialized.
*/
static inline int audit_create_watch(const char *path,
const char *name,
@@ -336,83 +383,111 @@ static inline void audit_destroy_watch(s
}
}
-
-/*
- * We only drain the watchlist in two ways. They are:
- *
- * 1) Upon deletion of the inode (no contention)
- * 2) Upon rename()'ing a directory with an "active" watchlist
(contention)
- *
- * Caller should hold data->lock where contention is possible.
- */
-static inline void audit_drain_watchlist(struct audit_inode_data *data)
+static void audit_data_put(struct audit_inode_data *data)
{
- struct audit_watch *watch;
- struct hlist_node *pos, *tmp;
+ if (!data)
+ return;
- hlist_for_each_entry_safe(watch, pos, tmp, &data->watchlist, w_node)
- audit_destroy_watch(watch);
-}
+ spin_lock(&auditfs_hash_lock);
+ data->count--;
+ dprintk("Put audit_data %p for inode %p (%lu), count-- now %d. From %p:",
data,
+ data->inode, data->inode?data->inode->i_ino:0, data->count,
__builtin_return_address(0));
+ __print_symbol("%s\n", __builtin_return_address(0));
+
+ if (data->count == 1 && data->inode &&
+ !data->watch && hlist_empty(&data->watchlist)) {
+ dprintk("Last put.\n");
+ data->count--;
+ }
+
+ if (!data->count) {
+ /* We are last user. Remove it from the hash table to
+ disassociate it from its inode */
+ struct audit_watch *watch;
+ struct hlist_node *pos, *tmp;
+ int h = hash_ptr(data->inode, auditfs_hash_bits);
+ struct audit_inode_data **list = &auditfs_hash_table[h];
-static inline void audit_data_free(struct audit_inode_data *data)
-{
- struct audit_inode_data *wdata = NULL;
+ while (*list && (unsigned long)((*list)->inode) < (unsigned
long)data->inode)
+ list = &(*list)->next_hash;
- audit_drain_watchlist(data);
+ BUG_ON(*list != data);
+ *list = data->next_hash;
+ data->inode = NULL;
- /* If this inode was watched itself, we may have been using
- the preallocated inode audit data. So don't free it */
- if (data->watch) {
- wdata = data->watch->w_iaudit;
- audit_watch_put(data->watch);
+ spin_unlock(&auditfs_hash_lock);
+
+ /* Drain watchlist */
+ hlist_for_each_entry_safe(watch, pos, tmp, &data->watchlist, w_node) {
+ audit_destroy_watch(watch);
+ audit_data_pool_shrink();
+ }
+ spin_lock(&auditfs_hash_lock);
+
+ /* Check whether to free it or return it to the pool */
+ if (audit_nr_watches > audit_pool_size) {
+ dprintk("Back to pool. %d watches, %d in pool\n", audit_nr_watches,
audit_pool_size);
+ data->next_hash = audit_data_pool;
+ audit_data_pool = data;
+ audit_pool_size++;
+ } else {
+ dprintk("Freed. %d watches, %d in pool\n", audit_nr_watches,
audit_pool_size);
+ kfree(data);
+ }
}
- if (data != wdata)
- kfree(data);
- else
- memset(data, 0, sizeof(*data));
+
+ spin_unlock(&auditfs_hash_lock);
}
static inline int audit_insert_watch(struct audit_watch *watch, uid_t loginuid)
{
int ret;
struct nameidata nd;
+ struct audit_inode_data *pdata;
+
+ /* Grow the pool by two -- one for the watch itself, and
+ one for the parent directory */
+ if (audit_data_pool_grow())
+ return -ENOMEM;
ret = path_lookup(watch->w_path, LOOKUP_PARENT, &nd);
if (ret < 0)
- goto audit_insert_watch_exit;
-
- /* FIXME: Allocate w_iaudit */
+ goto out;
ret = -EPERM;
if (nd.last_type != LAST_NORM || !nd.last.name)
- goto audit_insert_watch_release;
+ goto release;
+
+ ret = -ENOMEM;
+ pdata = audit_data_get(nd.dentry->d_inode, 1);
+ if (!pdata)
+ goto put_pdata;
ret = audit_create_watch(watch->w_path,
nd.last.name,
watch->w_filterkey,
watch->w_perms,
nd.dentry->d_inode->i_sb->s_dev,
- inode_audit_data(nd.dentry->d_inode));
+ pdata);
if (ret < 0)
- goto audit_insert_watch_release;
+ goto put_pdata;
audit_log(NULL, AUDIT_CONFIG_CHANGE, "auid=%u inserted watch\n", loginuid);
/* __d_lookup will attach the audit data, if nd.last exists. */
dput(d_lookup(nd.dentry, &nd.last));
-audit_insert_watch_release:
+ put_pdata:
+ audit_data_put(pdata);
+ release:
path_release(&nd);
-audit_insert_watch_exit:
+ out:
+ if (ret)
+ audit_data_pool_shrink();
+
return ret;
}
-/*
- * We hold the data->lock here to eliminate contention between this user space
- * request and an action taken by audit_update_watch() in which watch removal
- * is also possible when a directory with an "active" watchlist is
rename()'ed.
- *
- */
static inline int audit_remove_watch(struct audit_watch *watch, uid_t loginuid)
{
int ret;
@@ -428,7 +503,9 @@ static inline int audit_remove_watch(str
if (nd.last_type != LAST_NORM || !nd.last.name)
goto audit_remove_watch_release;
- data = inode_audit_data(nd.dentry->d_inode);
+ data = audit_data_get(nd.dentry->d_inode, 0);
+ if (!data)
+ goto audit_remove_watch_release;
write_lock(&data->lock);
real = audit_watch_fetch(nd.last.name, data);
@@ -440,7 +517,8 @@ static inline int audit_remove_watch(str
audit_watch_put(real);
audit_log(NULL, AUDIT_CONFIG_CHANGE, "auid=%u removed watch\n", loginuid);
write_unlock(&data->lock);
-
+ audit_data_put(data);
+ audit_data_pool_shrink();
ret = 0;
audit_remove_watch_release:
@@ -480,7 +558,7 @@ void audit_watch_put(struct audit_watch
*/
void audit_update_watch(struct dentry *dentry, int remove)
{
- struct audit_watch *watch;
+ struct audit_watch *watch = NULL;
struct audit_inode_data *data, *parent;
if (likely(!audit_enabled))
@@ -492,23 +570,25 @@ void audit_update_watch(struct dentry *d
if (!dentry->d_parent || !dentry->d_parent->d_inode)
return;
- data = inode_audit_data(dentry->d_inode);
- if (!data) {
- printk(KERN_WARNING "Hmmm. No audit data for inode #%lu at %p. name %s\n",
- dentry->d_inode->i_ino, dentry->d_inode, dentry->d_name.name);
+ /* If there's no audit data on the parent inode, then there can
+ be no watches to add or remove */
+ parent = audit_data_get(dentry->d_parent->d_inode, 0);
+ if (!parent)
return;
- }
- parent = inode_audit_data(dentry->d_parent->d_inode);
watch = audit_watch_fetch_lock(dentry->d_name.name, parent);
+ /* Fetch audit data, using the preallocated one from the watch if
+ there is actually a relevant watch and the inode didn't already
+ have any audit data */
+ data = audit_data_get(dentry->d_inode, !!watch);
+
+ /* If there's no data, then there wasn't a watch either.
+ Nothing to see here; move along */
+ if (!data)
+ goto put_watch;
+
write_lock(&data->lock);
- /* FIXME: long watchlist == too much spinning? */
- if (remove == 1) {
- /* Inode actually being removed. If it was a directory, drain
- its watchlist too. */
- audit_drain_watchlist(data);
- }
if (remove) {
/* If the inode was being watched for _this_ pathname, clear the watch */
/* FIXME: If there was _another_ path to the same inode which was
@@ -528,9 +608,12 @@ void audit_update_watch(struct dentry *d
data->watch = audit_watch_get(watch);
audit_pin(data->watch, dentry);
}
+ /* FIXME: If inode's i_audit is no longer used, clear it. */
write_unlock(&data->lock);
-
+ audit_data_put(data);
+ put_watch:
audit_watch_put(watch);
+ audit_data_put(parent);
}
/* Convert a watch to a audit_skb_list */
@@ -678,58 +761,16 @@ audit_receive_watch_exit:
return ret;
}
-int audit_inode_alloc(struct inode *inode)
-{
- struct audit_inode_data *data;
- struct audit_inode_data **list;
- int h = hash_ptr(inode, auditfs_hash_bits);
-
- data = kmalloc(sizeof(struct audit_inode_data), GFP_KERNEL);
- if (!data)
- return -ENOMEM;
-
- INIT_HLIST_HEAD(&data->watchlist);
- data->watch = NULL;
- data->lock = RW_LOCK_UNLOCKED;
- data->inode = inode;
- data->next_hash = NULL;
-
- /* Add it to the hash table */
- list = &auditfs_hash_table[h];
- spin_lock(&auditfs_hash_lock);
-
- while (*list && (unsigned long)((*list)->inode) < (unsigned long)inode)
- list = &(*list)->next_hash;
-
- data->next_hash = *list;
- *list = data;
-
- spin_unlock(&auditfs_hash_lock);
- return 0;
-}
-
void audit_inode_free(struct inode *inode)
{
- struct audit_inode_data *data = NULL;
- int h = hash_ptr(inode, auditfs_hash_bits);
- struct audit_inode_data **list;
+ struct audit_inode_data *data = audit_data_get(inode, 0);
- if (!inode)
- return;
-
- spin_lock(&auditfs_hash_lock);
- list = &auditfs_hash_table[h];
- while (*list && (unsigned long)((*list)->inode) < (unsigned long)inode)
- list = &(*list)->next_hash;
-
- if (*list && (*list)->inode == inode) {
- data = *list;
- *list = data->next_hash;
+ if (data) {
+ audit_drain_watchlist(data);
+ audit_watch_put(data->watch);
+ data->watch = NULL;
+ audit_data_put(data);
}
- spin_unlock(&auditfs_hash_lock);
-
- if (data)
- audit_data_free(data);
}
/*
@@ -744,14 +785,21 @@ void audit_dentry_unpin(struct dentry *d
struct audit_inode_data *parent;
struct audit_watch *watch;
- parent = inode_audit_data(dentry->d_parent->d_inode);
+ parent = audit_data_get(dentry->d_parent->d_inode, 0);
+ if (!parent)
+ return;
+
watch = audit_watch_fetch_lock(dentry->d_name.name, parent);
if (watch) {
- struct audit_inode_data *data = inode_audit_data(dentry->d_inode);
+ struct audit_inode_data *data = audit_data_get(dentry->d_inode, 0);
- audit_unpin(data->watch);
+ if (data) {
+ audit_unpin(data->watch);
+ audit_data_put(data);
+ }
audit_watch_put(watch);
}
+ audit_data_put(parent);
}
int audit_filesystem_init(void)
@@ -806,7 +854,7 @@ int audit_notify_watch(struct inode *ino
if (!inode || !current->audit_context)
return 0;
- data = inode_audit_data(inode);
+ data = audit_data_get(inode, 0);
if (!data)
return 0;
@@ -818,17 +866,18 @@ int audit_notify_watch(struct inode *ino
watch = audit_watch_get(data->watch);
read_unlock(&data->lock);
if (!watch)
- return 0;
+ goto out;
if (mask && (watch->w_perms && !(watch->w_perms & mask)))
- goto audit_notify_watch_fail;
+ goto fail;
ret = auditfs_attach_wdata(inode, watch, mask);
- if (!ret)
- return 0;
-
- audit_notify_watch_fail:
- audit_watch_put(watch);
+ if (ret) {
+ fail:
+ audit_watch_put(watch);
+ }
+ out:
+ audit_data_put(data);
return ret;
}
--
dwmw2