The following patch improves audit syscall filtering performance when
the exit filter list contains many rules with an inode number field.
This implements Al Viro's idea from:
https://www.redhat.com/archives/linux-audit/2006-April/msg00087.html
Rules containing a single inode field are placed in one of 32 lists,
determined by hashing the inode number. Rules containing more than
one inode field, or using an operator other than equality, remain in
the exit filter list.
At syscall exit time, rules in the exit filter list are evaluated
first. The inode number hash is then checked for matching rules if
any audit_names[] have been collected during syscall processing.
Signed-off-by: Amy Griffis <amy.griffis(a)hp.com>
--
audit.c | 8 ++
audit.h | 11 +++
auditfilter.c | 168 +++++++++++++++++++++++++++++++++++++++++++++-------------
auditsc.c | 96 +++++++++++++++++++++++----------
4 files changed, 218 insertions(+), 65 deletions(-)
diff --git a/kernel/audit.c b/kernel/audit.c
index 7637410..40b3807 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -106,6 +106,9 @@ static struct sock *audit_sock;
/* Inotify handle. */
struct inotify_handle *audit_ih;
+/* Hash for inode-based rules */
+struct list_head audit_inode_hash[AUDIT_INODE_BUCKETS];
+
/* The audit_freelist is a list of pre-allocated audit buffers (if more
* than AUDIT_MAXFREE are in use, the audit buffer is freed instead of
* being placed on the freelist). */
@@ -669,6 +672,8 @@ static void audit_receive(struct sock *s
/* Initialize audit support at boot time. */
static int __init audit_init(void)
{
+ int i;
+
printk(KERN_INFO "audit: initializing netlink socket (%s)\n",
audit_default ? "enabled" : "disabled");
audit_sock = netlink_kernel_create(NETLINK_AUDIT, 0, audit_receive,
@@ -692,6 +697,9 @@ #ifdef CONFIG_AUDITSYSCALL
audit_ih = inotify_init(audit_handle_ievent);
if (IS_ERR(audit_ih))
audit_panic("cannot initialize inotify handle");
+
+ for (i = 0; i < AUDIT_INODE_BUCKETS; i++)
+ INIT_LIST_HEAD(&audit_inode_hash[i]);
#endif
return 0;
diff --git a/kernel/audit.h b/kernel/audit.h
index 771833d..507998a 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -81,6 +81,7 @@ struct audit_krule {
u32 buflen; /* for data alloc on list rules */
u32 field_count;
struct audit_field *fields;
+ struct audit_field *inode_f; /* quick access to an inode field */
struct audit_watch *watch; /* associated watch */
struct list_head rlist; /* entry in audit_watch.rules list */
};
@@ -91,8 +92,16 @@ struct audit_entry {
struct audit_krule rule;
};
-
extern int audit_pid;
+
+#define AUDIT_INODE_BUCKETS 32
+extern struct list_head audit_inode_hash[AUDIT_INODE_BUCKETS];
+
+static inline int audit_hash_ino(u32 ino)
+{
+ return (ino & (AUDIT_INODE_BUCKETS-1));
+}
+
extern int audit_comparator(const u32 left, const u32 op, const u32 right);
extern int audit_compare_dname_path(const char *dname, const char *path);
extern struct sk_buff * audit_make_reply(int pid, int seq, int type,
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index 4cc6406..e6c45fe 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -234,22 +234,40 @@ static char *audit_unpack_string(void **
return str;
}
+/* Translate an inode field to kernel respresentation. */
+static inline int audit_to_inode(struct audit_krule *krule,
+ struct audit_field *f)
+{
+ if (krule->listnr != AUDIT_FILTER_EXIT ||
+ krule->watch)
+ return -EINVAL;
+
+ /* if >1 inode field or op is not '=', rule goes on exit filter list,
+ * otherwise it goes in the inode hash table */
+ if (f->op & ~AUDIT_EQUAL ||
+ krule->inode_f)
+ krule->inode_f = NULL;
+ else
+ krule->inode_f = f;
+
+ return 0;
+}
+
/* Translate a watch string to kernel respresentation. */
static int audit_to_watch(struct audit_krule *krule, char *path, int len,
u32 op)
{
struct audit_watch *watch;
+ if (!audit_ih)
+ return -EOPNOTSUPP;
+
if (path[0] != '/' || path[len-1] == '/' ||
krule->listnr != AUDIT_FILTER_EXIT ||
op & ~AUDIT_EQUAL ||
- krule->watch) /* allow only 1 watch per rule */
+ krule->inode_f || krule->watch) /* 1 inode # per rule, for hash */
return -EINVAL;
- /* ensure inotify handle was initialized */
- if (!audit_ih)
- return -EOPNOTSUPP;
-
watch = audit_init_watch(path);
if (unlikely(IS_ERR(watch)))
return PTR_ERR(watch);
@@ -325,15 +343,23 @@ static struct audit_entry *audit_rule_to
f->type = rule->fields[i] & ~(AUDIT_NEGATE|AUDIT_OPERATORS);
f->val = rule->values[i];
- if (f->type & AUDIT_UNUSED_BITS ||
- f->type == AUDIT_SE_USER ||
- f->type == AUDIT_SE_ROLE ||
- f->type == AUDIT_SE_TYPE ||
- f->type == AUDIT_SE_SEN ||
- f->type == AUDIT_SE_CLR ||
- f->type == AUDIT_WATCH) {
- err = -EINVAL;
+ err = -EINVAL;
+ if (f->type & AUDIT_UNUSED_BITS)
+ goto exit_free;
+
+ switch(f->type) {
+ case AUDIT_SE_USER:
+ case AUDIT_SE_ROLE:
+ case AUDIT_SE_TYPE:
+ case AUDIT_SE_SEN:
+ case AUDIT_SE_CLR:
+ case AUDIT_WATCH:
goto exit_free;
+ case AUDIT_INODE:
+ err = audit_to_inode(&entry->rule, f);
+ if (err)
+ goto exit_free;
+ break;
}
entry->rule.vers_ops = (f->op & AUDIT_OPERATORS) ? 2 : 1;
@@ -426,6 +452,11 @@ static struct audit_entry *audit_data_to
goto exit_free;
}
break;
+ case AUDIT_INODE:
+ err = audit_to_inode(&entry->rule, f);
+ if (err)
+ goto exit_free;
+ break;
}
}
@@ -645,6 +676,7 @@ static struct audit_entry *audit_dupe_ru
for (i = 0; i < AUDIT_BITMASK_SIZE; i++)
new->mask[i] = old->mask[i];
new->buflen = old->buflen;
+ new->inode_f = old->inode_f;
new->watch = NULL;
new->field_count = old->field_count;
memcpy(new->fields, old->fields, sizeof(struct audit_field) * fcount);
@@ -700,18 +732,20 @@ static inline void audit_update_watch(st
nwatch->ino = ino;
list_for_each_entry_safe(r, nextr, &owatch->rules, rlist) {
+
oentry = container_of(r, struct audit_entry, rule);
+ list_del(&oentry->rule.rlist);
+ list_del_rcu(&oentry->list);
nentry = audit_dupe_rule(&oentry->rule, nwatch);
- if (unlikely(IS_ERR(nentry))) {
+ if (unlikely(IS_ERR(nentry)))
audit_panic("error updating watch, removing");
- list_del(&oentry->rule.rlist);
- list_del_rcu(&oentry->list);
- } else {
+ else {
+ int h = audit_hash_ino((u32)ino);
list_add(&nentry->rule.rlist, &nwatch->rules);
- list_del(&oentry->rule.rlist);
- list_replace_rcu(&oentry->list, &nentry->list);
+ list_add_rcu(&nentry->list, &audit_inode_hash[h]);
}
+
call_rcu(&oentry->rcu, audit_free_rule_rcu);
}
@@ -950,10 +984,11 @@ static inline int audit_add_rule(struct
struct list_head *list)
{
struct audit_entry *e;
+ struct audit_field *inode_f = entry->rule.inode_f;
struct audit_watch *watch = entry->rule.watch;
struct nameidata *ndp, *ndw;
LIST_HEAD(inotify_list);
- int err, putnd_needed = 0;
+ int h, err, putnd_needed = 0;
/* Taking audit_filter_mutex protects from stale rule data. */
mutex_lock(&audit_filter_mutex);
@@ -980,6 +1015,11 @@ static inline int audit_add_rule(struct
err = audit_add_watch(&entry->rule, ndp, ndw, &inotify_list);
if (err)
goto error;
+ h = audit_hash_ino((u32)watch->ino);
+ list = &audit_inode_hash[h];
+ } else if (inode_f) {
+ h = audit_hash_ino(inode_f->val);
+ list = &audit_inode_hash[h];
}
if (entry->rule.flags & AUDIT_FILTER_PREPEND) {
@@ -1031,38 +1071,66 @@ static inline void audit_remove_watch(st
}
}
-/* Remove an existing rule from filterlist. */
-static inline int audit_del_rule(struct audit_entry *entry,
- struct list_head *list)
+/* Rule removal helper.
+ * Caller must hold audit_filter_mutex. */
+static inline int audit_do_del_rule(struct audit_entry *entry,
+ struct list_head *list,
+ struct list_head *inotify_list)
{
struct audit_entry *e;
- LIST_HEAD(inotify_list);
- mutex_lock(&audit_filter_mutex);
list_for_each_entry(e, list, list) {
if (audit_compare_rule(&entry->rule, &e->rule))
continue;
- if (e->rule.watch) {
- audit_remove_watch(&e->rule, &inotify_list);
- /* match initial get for tmp watch */
- audit_put_watch(entry->rule.watch);
- }
+ if (e->rule.watch)
+ audit_remove_watch(&e->rule, inotify_list);
list_del_rcu(&e->list);
call_rcu(&e->rcu, audit_free_rule_rcu);
+
+ return 0;
+ }
+ return -ENOENT; /* No matching rule */
+}
+
+/* Remove an existing rule from filterlist. */
+static inline int audit_del_rule(struct audit_entry *entry,
+ struct list_head *list)
+{
+ int h, ret;
+ struct audit_field *inode_f = entry->rule.inode_f;
+ struct audit_watch *watch = entry->rule.watch;
+ LIST_HEAD(inotify_list);
+
+ if (watch) {
+ mutex_lock(&audit_filter_mutex);
+ /* we don't know the inode number, so must walk entire hash */
+ for (h = 0; h < AUDIT_INODE_BUCKETS; h++) {
+ list = &audit_inode_hash[h];
+ ret = audit_do_del_rule(entry, list, &inotify_list);
+ if (!ret)
+ break;
+ }
mutex_unlock(&audit_filter_mutex);
if (!list_empty(&inotify_list))
audit_inotify_unregister(&inotify_list);
- return 0;
+ /* match initial get for tmp watch */
+ audit_put_watch(watch);
+
+ } else {
+ if (inode_f) {
+ h = audit_hash_ino(inode_f->val);
+ list = &audit_inode_hash[h];
+ }
+ mutex_lock(&audit_filter_mutex);
+ ret = audit_do_del_rule(entry, list, &inotify_list);
+ mutex_unlock(&audit_filter_mutex);
}
- mutex_unlock(&audit_filter_mutex);
- /* match initial get for tmp watch */
- if (entry->rule.watch)
- audit_put_watch(entry->rule.watch);
- return -ENOENT; /* No matching rule */
+
+ return ret;
}
/* List rules using struct audit_rule. Exists for backward
@@ -1089,6 +1157,20 @@ static void audit_list(int pid, int seq,
kfree(rule);
}
}
+ for (i = 0; i < AUDIT_INODE_BUCKETS; i++) {
+ list_for_each_entry(entry, &audit_inode_hash[i], list) {
+ struct audit_rule *rule;
+
+ rule = audit_krule_to_rule(&entry->rule);
+ if (unlikely(!rule))
+ break;
+ skb = audit_make_reply(pid, seq, AUDIT_LIST, 0, 1,
+ rule, sizeof(*rule));
+ if (skb)
+ skb_queue_tail(q, skb);
+ kfree(rule);
+ }
+ }
skb = audit_make_reply(pid, seq, AUDIT_LIST, 1, 1, NULL, 0);
if (skb)
skb_queue_tail(q, skb);
@@ -1117,6 +1199,20 @@ static void audit_list_rules(int pid, in
kfree(data);
}
}
+ for (i=0; i< AUDIT_INODE_BUCKETS; i++) {
+ list_for_each_entry(e, &audit_inode_hash[i], list) {
+ struct audit_rule_data *data;
+
+ data = audit_krule_to_data(&e->rule);
+ if (unlikely(!data))
+ break;
+ skb = audit_make_reply(pid, seq, AUDIT_LIST_RULES, 0, 1,
+ data, sizeof(*data) + data->buflen);
+ if (skb)
+ skb_queue_tail(q, skb);
+ kfree(data);
+ }
+ }
skb = audit_make_reply(pid, seq, AUDIT_LIST_RULES, 1, 1, NULL, 0);
if (skb)
skb_queue_tail(q, skb);
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 43512c1..ea7bafc 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -169,32 +169,12 @@ #endif
};
/* Determine if any context name data matches a rule's watch data */
-static inline int audit_match_watch(struct audit_context *ctx,
- struct audit_watch *watch)
-{
- int i;
-
- if (!ctx)
- return 0;
-
- if (watch->ino == (unsigned long)-1)
- return 0;
-
- for (i = 0; i < ctx->name_count; i++) {
- if (ctx->names[i].dev == watch->dev &&
- (ctx->names[i].ino == watch->ino ||
- ctx->names[i].pino == watch->ino))
- return 1;
- }
-
- return 0;
-}
-
/* Compare a task_struct with an audit_rule. Return 1 on match, 0
* otherwise. */
static int audit_filter_rules(struct task_struct *tsk,
struct audit_krule *rule,
struct audit_context *ctx,
+ struct audit_names *name,
enum audit_state *state)
{
int i, j, need_sid = 1;
@@ -253,7 +233,10 @@ static int audit_filter_rules(struct tas
}
break;
case AUDIT_DEVMAJOR:
- if (ctx) {
+ if (name)
+ result = audit_comparator(MAJOR(name->dev),
+ f->op, f->val);
+ else if (ctx) {
for (j = 0; j < ctx->name_count; j++) {
if (audit_comparator(MAJOR(ctx->names[j].dev), f->op, f->val)) {
++result;
@@ -263,7 +246,10 @@ static int audit_filter_rules(struct tas
}
break;
case AUDIT_DEVMINOR:
- if (ctx) {
+ if (name)
+ result = audit_comparator(MINOR(name->dev),
+ f->op, f->val);
+ else if (ctx) {
for (j = 0; j < ctx->name_count; j++) {
if (audit_comparator(MINOR(ctx->names[j].dev), f->op, f->val)) {
++result;
@@ -273,7 +259,10 @@ static int audit_filter_rules(struct tas
}
break;
case AUDIT_INODE:
- if (ctx) {
+ if (name)
+ result = (name->ino == f->val ||
+ name->pino == f->val);
+ else if (ctx) {
for (j = 0; j < ctx->name_count; j++) {
if (audit_comparator(ctx->names[j].ino, f->op, f->val) ||
audit_comparator(ctx->names[j].pino, f->op, f->val)) {
@@ -284,7 +273,10 @@ static int audit_filter_rules(struct tas
}
break;
case AUDIT_WATCH:
- result = audit_match_watch(ctx, rule->watch);
+ if (name && rule->watch->ino != (unsigned long)-1)
+ result = (name->dev == rule->watch->dev &&
+ (name->ino == rule->watch->ino ||
+ name->pino == rule->watch->ino));
break;
case AUDIT_LOGINUID:
result = 0;
@@ -343,7 +335,7 @@ static enum audit_state audit_filter_tas
rcu_read_lock();
list_for_each_entry_rcu(e, &audit_filter_list[AUDIT_FILTER_TASK], list) {
- if (audit_filter_rules(tsk, &e->rule, NULL, &state)) {
+ if (audit_filter_rules(tsk, &e->rule, NULL, NULL, &state)) {
rcu_read_unlock();
return state;
}
@@ -373,8 +365,47 @@ static enum audit_state audit_filter_sys
int bit = AUDIT_BIT(ctx->major);
list_for_each_entry_rcu(e, list, list) {
- if ((e->rule.mask[word] & bit) == bit
- && audit_filter_rules(tsk, &e->rule, ctx, &state)) {
+ if ((e->rule.mask[word] & bit) == bit &&
+ audit_filter_rules(tsk, &e->rule, ctx, NULL,
+ &state)) {
+ rcu_read_unlock();
+ return state;
+ }
+ }
+ }
+ rcu_read_unlock();
+ return AUDIT_BUILD_CONTEXT;
+}
+
+/* At syscall exit time, this filter is called if any audit_names[] have been
+ * collected during syscall processing. We only check rules in sublists at hash
+ * buckets applicable to the inode numbers in audit_names[].
+ * Regarding audit_state, same rules apply as for audit_filter_syscall().
+ */
+static enum audit_state audit_filter_inodes(struct task_struct *tsk,
+ struct audit_context *ctx)
+{
+ int i;
+ struct audit_entry *e;
+ enum audit_state state;
+
+ if (audit_pid && tsk->tgid == audit_pid)
+ return AUDIT_DISABLED;
+
+ rcu_read_lock();
+ for (i = 0; i < ctx->name_count; i++) {
+ int word = AUDIT_WORD(ctx->major);
+ int bit = AUDIT_BIT(ctx->major);
+ struct audit_names *n = &ctx->names[i];
+ int h = audit_hash_ino((u32)n->ino);
+ struct list_head *list = &audit_inode_hash[h];
+
+ if (list_empty(list))
+ continue;
+
+ list_for_each_entry_rcu(e, list, list) {
+ if ((e->rule.mask[word] & bit) == bit &&
+ audit_filter_rules(tsk, &e->rule, ctx, n, &state)) {
rcu_read_unlock();
return state;
}
@@ -397,11 +428,20 @@ static inline struct audit_context *audi
if (context->in_syscall && !context->auditable) {
enum audit_state state;
+
state = audit_filter_syscall(tsk, context, &audit_filter_list[AUDIT_FILTER_EXIT]);
+ if (state == AUDIT_RECORD_CONTEXT) {
+ context->auditable = 1;
+ goto get_context;
+ }
+
+ state = audit_filter_inodes(tsk, context);
if (state == AUDIT_RECORD_CONTEXT)
context->auditable = 1;
+
}
+get_context:
context->pid = tsk->pid;
context->uid = tsk->uid;
context->gid = tsk->gid;