On Tue, 2005-05-17 at 18:04 +0100, David Woodhouse wrote:
I'm really not fond of the refcount trick -- I suspect I'd be
happier if
we were just to try to keep track of sk_rmem_alloc so we never hit the
condition in netlink_attachskb() which might cause it to fail.
Or even better, use a kernel thread and set an infinite timeout so it'll
never fail...
--- linux-2.6.9/kernel/audit.c~ 2005-05-18 13:54:03.000000000 +0100
+++ linux-2.6.9/kernel/audit.c 2005-05-18 17:40:17.000000000 +0100
@@ -46,6 +46,8 @@
#include <asm/types.h>
#include <linux/mm.h>
#include <linux/module.h>
+#include <linux/err.h>
+#include <linux/kthread.h>
#include <linux/audit.h>
@@ -77,7 +79,6 @@ static int audit_rate_limit;
/* Number of outstanding audit_buffers allowed. */
static int audit_backlog_limit = 64;
-static atomic_t audit_backlog = ATOMIC_INIT(0);
/* The identity of the user shutting down the audit system. */
uid_t audit_sig_uid = -1;
@@ -95,19 +96,17 @@ static atomic_t audit_lost = ATOMIC_I
/* The netlink socket. */
static struct sock *audit_sock;
-/* There are two lists of audit buffers. The txlist contains audit
- * buffers that cannot be sent immediately to the netlink device because
- * we are in an irq context (these are sent later in a tasklet).
- *
- * The second list is a list of pre-allocated audit buffers (if more
+/* The audit_freelist is a list of pre-allocated audit buffers (if more
* than AUDIT_MAXFREE are in use, the audit buffer is freed instead of
* being placed on the freelist). */
-static spinlock_t audit_txlist_lock = SPIN_LOCK_UNLOCKED;
static spinlock_t audit_freelist_lock = SPIN_LOCK_UNLOCKED;
static int audit_freelist_count = 0;
-static LIST_HEAD(audit_txlist);
static LIST_HEAD(audit_freelist);
+static struct sk_buff_head audit_skb_queue;
+static struct task_struct *kauditd_task;
+static DECLARE_WAIT_QUEUE_HEAD(kauditd_wait);
+
/* There are three lists of rules -- one to search at task creation
* time, one to search at syscall entry time, and another to search at
* syscall exit time. */
@@ -141,11 +140,10 @@ static DECLARE_MUTEX(audit_netlink_sem);
* use simultaneously. */
struct audit_buffer {
struct list_head list;
- struct sk_buff *skb; /* formatted skb ready to send */
struct audit_context *ctx; /* NULL or associated context */
int len; /* used area of tmp */
int size; /* size of tmp */
- char *tmp;
+ char *tmp; /* Always NUL-terminated */
int type;
int pid;
};
@@ -225,10 +223,8 @@ void audit_log_lost(const char *message)
if (print) {
printk(KERN_WARNING
- "audit: audit_lost=%d audit_backlog=%d"
- " audit_rate_limit=%d audit_backlog_limit=%d\n",
+ "audit: audit_lost=%d audit_rate_limit=%d audit_backlog_limit=%d\n",
atomic_read(&audit_lost),
- atomic_read(&audit_backlog),
audit_rate_limit,
audit_backlog_limit);
audit_panic(message);
@@ -283,6 +279,64 @@ int audit_set_failure(int state, uid_t l
}
#ifdef CONFIG_NET
+int kauditd_thread(void *dummy)
+{
+ struct sk_buff *skb;
+
+ while (1) {
+ skb = skb_dequeue(&audit_skb_queue);
+ if (skb) {
+ int err;
+#if 1 /* Actually can probably use the else version now but it's late... */
+ struct sock *rsk;
+ retry:
+ rsk = NULL;
+ if (audit_pid) {
+ rsk = netlink_getsockbypid(audit_sock, audit_pid);
+ if (IS_ERR(rsk)) {
+ /* It has to be -ECONNREFUSED. Auditd went away */
+ printk(KERN_ERR "audit: *NO* daemon at audit_pid=%d\n", audit_pid);
+ audit_pid = 0;
+ }
+ }
+
+ if (!audit_pid) {
+ printk(KERN_ERR "%s\n", skb->data + NLMSG_SPACE(0));
+ dev_kfree_skb(skb);
+ continue;
+ }
+
+ err = netlink_attachskb(rsk, skb, 0, MAX_SCHEDULE_TIMEOUT);
+ if (err == 1)
+ goto retry;
+
+ BUG_ON(err); /* Cannot happen */
+
+ netlink_sendskb(rsk, skb, audit_sock->sk_protocol);
+#else
+ if (audit_pid) {
+ err = netlink_unicast(audit_sock, skb, audit_pid, 0);
+ if (err < 0) {
+ BUG_ON(err != -ECONNREFUSED);
+ printk(KERN_ERR "audit: *NO* daemon at audit_pid=%d\n", audit_pid);
+ audit_pid = 0;
+ }
+ }
+#endif
+ } else {
+ DECLARE_WAITQUEUE(wait, current);
+ set_current_state(TASK_INTERRUPTIBLE);
+ add_wait_queue(&kauditd_wait, &wait);
+
+ if (!skb_queue_len(&audit_skb_queue))
+ schedule();
+
+ __set_current_state(TASK_RUNNING);
+ remove_wait_queue(&kauditd_wait, &wait);
+ }
+ }
+}
+
void audit_send_reply(int pid, int seq, int type, int done, int multi,
void *payload, int size)
{
@@ -295,13 +349,16 @@ void audit_send_reply(int pid, int seq,
skb = alloc_skb(len, GFP_KERNEL);
if (!skb)
- goto nlmsg_failure;
+ return;
- nlh = NLMSG_PUT(skb, pid, seq, t, len - sizeof(*nlh));
+ nlh = NLMSG_PUT(skb, pid, seq, t, size);
nlh->nlmsg_flags = flags;
data = NLMSG_DATA(nlh);
memcpy(data, payload, size);
- netlink_unicast(audit_sock, skb, pid, MSG_DONTWAIT);
+
+ /* Ignore failure. It'll only happen if the sender goes away,
+ because our timeout is set to infinite. */
+ netlink_unicast(audit_sock, skb, pid, 0);
return;
nlmsg_failure: /* Used by NLMSG_PUT */
@@ -356,6 +413,15 @@ static int audit_receive_msg(struct sk_b
if (err)
return err;
+ /* As soon as there's any sign of userspace auditd, start kauditd to talk to it */
+ if (!kauditd_task)
+ kauditd_task = kthread_run(kauditd_thread, NULL, "kauditd");
+ if (IS_ERR(kauditd_task)) {
+ err = PTR_ERR(kauditd_task);
+ kauditd_task = NULL;
+ return err;
+ }
+
pid = NETLINK_CREDS(skb)->pid;
uid = NETLINK_CREDS(skb)->uid;
loginuid = NETLINK_CB(skb).loginuid;
@@ -370,7 +436,7 @@ static int audit_receive_msg(struct sk_b
status_set.rate_limit = audit_rate_limit;
status_set.backlog_limit = audit_backlog_limit;
status_set.lost = atomic_read(&audit_lost);
- status_set.backlog = atomic_read(&audit_backlog);
+ status_set.backlog = skb_queue_len(&audit_skb_queue);
audit_send_reply(NETLINK_CB(skb).pid, seq, AUDIT_GET, 0, 0,
&status_set, sizeof(status_set));
break;
@@ -490,30 +556,35 @@ static void audit_receive(struct sock *s
up(&audit_netlink_sem);
}
-/* Move data from tmp buffer into an skb. This is an extra copy, and
- * that is unfortunate. However, the copy will only occur when a record
- * is being written to user space, which is already a high-overhead
- * operation. (Elimination of the copy is possible, for example, by
- * writing directly into a pre-allocated skb, at the cost of wasting
- * memory. */
-static void audit_log_move(struct audit_buffer *ab)
+/* Move data from tmp buffer into an skb. This is an extra copy, but
+ * there's no point in trying to log directly into an skb because
+ * netlink_trim() would only reallocate and copy it anyway. So we use
+ * the temporary buffer, then allocate optimally-sized skbs for netlink
+ * and check against the receiving socket's sk_rmem_alloc to ensure
+ * that we don't ever call netlink_unicast() if it would fail. */
+static void audit_log_move(struct audit_buffer *ab, int gfp_mask)
{
- struct sk_buff *skb;
+ struct sk_buff *skb = NULL;
struct nlmsghdr *nlh;
- char *start;
- int len = NLMSG_SPACE(0) + ab->len + 1;
-
- /* possible resubmission */
- if (ab->skb)
- return;
+ char *start;
+ int len = NLMSG_SPACE(0) + ab->len + 1;
- skb = alloc_skb(len, GFP_ATOMIC);
+ if (!audit_pid) {
+ skb = NULL;
+ } else if (skb_queue_len(&audit_skb_queue) > audit_backlog_limit) {
+ if (audit_rate_check())
+ printk(KERN_WARNING "audit: audit_backlog_limit %d reached\n",
audit_backlog_limit);
+ audit_log_lost("backlog limit exceeded");
+ skb = NULL;
+ } else {
+ skb = alloc_skb(len, gfp_mask);
+ if (!skb)
+ audit_log_lost("out of memory in audit_log_move");
+ }
if (!skb) {
- /* Lose information in ab->tmp */
- audit_log_lost("out of memory in audit_log_move");
+ printk(KERN_ERR "%s\n", ab->tmp);
return;
}
- ab->skb = skb;
nlh = (struct nlmsghdr *)skb_put(skb, NLMSG_SPACE(0));
nlh->nlmsg_type = ab->type;
nlh->nlmsg_len = ab->len;
@@ -522,47 +593,13 @@ static void audit_log_move(struct audit_
nlh->nlmsg_seq = 0;
start = skb_put(skb, ab->len);
memcpy(start, ab->tmp, ab->len);
-}
+ start[ab->len]=0;
-/* Iterate over the skbuff in the audit_buffer, sending their contents
- * to user space. */
-static inline int audit_log_drain(struct audit_buffer *ab)
-{
- struct sk_buff *skb = ab->skb;
-
- if (skb) {
- int retval = 0;
-
- if (audit_pid) {
- skb_get(skb); /* because netlink_* frees */
- retval = netlink_unicast(audit_sock, skb, audit_pid,
- MSG_DONTWAIT);
- }
- if (retval == -EAGAIN &&
- (atomic_read(&audit_backlog)) < audit_backlog_limit) {
- audit_log_end_irq(ab);
- return 1;
- }
- if (retval < 0) {
- if (retval == -ECONNREFUSED) {
- printk(KERN_ERR
- "audit: *NO* daemon at audit_pid=%d\n",
- audit_pid);
- audit_pid = 0;
- } else
- audit_log_lost("netlink socket too busy");
- }
- if (!audit_pid) { /* No daemon */
- int offset = NLMSG_SPACE(0);
- int len = skb->len - offset;
- skb->data[offset + len] = '\0';
- printk(KERN_ERR "%s\n", skb->data + offset);
- }
- kfree_skb(skb);
- }
- return 0;
+ skb_queue_tail(&audit_skb_queue, skb);
+ wake_up_interruptible(&kauditd_wait);
}
+
/* Initialize audit support at boot time. */
int __init audit_init(void)
{
@@ -572,7 +609,9 @@ int __init audit_init(void)
if (!audit_sock)
audit_panic("cannot initialize netlink socket");
+ audit_sock->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
audit_initialized = 1;
+ skb_queue_head_init(&audit_skb_queue);
audit_filesystem_init();
audit_enabled = audit_default;
audit_log(NULL, AUDIT_KERNEL, "initialized");
@@ -582,15 +621,9 @@ int __init audit_init(void)
#else
/* Without CONFIG_NET, we have no skbuffs. For now, print what we have
* in the buffer. */
-static void audit_log_move(struct audit_buffer *ab)
-{
- printk(KERN_ERR "%*.*s\n", ab->len, ab->len, ab->tmp);
- ab->len = 0;
-}
-
-static inline int audit_log_drain(struct audit_buffer *ab)
+static void audit_log_move(struct audit_buffer *ab, int gfp_mask)
{
- return 0;
+ printk(KERN_ERR "%s\n", ab->tmp);
}
/* Initialize audit support at boot time. */
@@ -632,7 +665,7 @@ static void audit_buffer_free(struct aud
return;
kfree(ab->tmp);
- atomic_dec(&audit_backlog);
+
spin_lock_irqsave(&audit_freelist_lock, flags);
if (++audit_freelist_count > AUDIT_MAXFREE)
kfree(ab);
@@ -661,13 +694,11 @@ static struct audit_buffer * audit_buffe
if (!ab)
goto err;
}
- atomic_inc(&audit_backlog);
ab->tmp = kmalloc(AUDIT_BUFSIZ, gfp_mask);
if (!ab->tmp)
goto err;
- ab->skb = NULL;
ab->ctx = ctx;
ab->len = 0;
ab->size = AUDIT_BUFSIZ;
@@ -694,18 +725,6 @@ struct audit_buffer *audit_log_start(str
if (!audit_initialized)
return NULL;
- if (audit_backlog_limit
- && atomic_read(&audit_backlog) > audit_backlog_limit) {
- if (audit_rate_check())
- printk(KERN_WARNING
- "audit: audit_backlog=%d > "
- "audit_backlog_limit=%d\n",
- atomic_read(&audit_backlog),
- audit_backlog_limit);
- audit_log_lost("backlog limit exceeded");
- return NULL;
- }
-
ab = audit_buffer_alloc(ctx, GFP_ATOMIC, type);
if (!ab) {
audit_log_lost("out of memory in audit_log_start");
@@ -868,41 +887,19 @@ void audit_log_d_path(struct audit_buffe
kfree(path);
}
-/* Remove queued messages from the audit_txlist and send them to user space. */
-static void audit_tasklet_handler(unsigned long arg)
-{
- LIST_HEAD(list);
- struct audit_buffer *ab;
- unsigned long flags;
-
- spin_lock_irqsave(&audit_txlist_lock, flags);
- list_splice_init(&audit_txlist, &list);
- spin_unlock_irqrestore(&audit_txlist_lock, flags);
-
- while (!list_empty(&list)) {
- ab = list_entry(list.next, struct audit_buffer, list);
- list_del(&ab->list);
- audit_log_end_fast(ab);
- }
-}
-
-static DECLARE_TASKLET(audit_tasklet, audit_tasklet_handler, 0);
-
/* The netlink_* functions cannot be called inside an irq context, so
- * the audit buffer is places on a queue and a tasklet is scheduled to
- * remove them from the queue outside the irq context. May be called in
- * any context. */
+ * the skb is placed on a queue and the kernel thread is woken to handle
+ * actually sending it. */
void audit_log_end_irq(struct audit_buffer *ab)
{
- unsigned long flags;
-
if (!ab)
return;
- spin_lock_irqsave(&audit_txlist_lock, flags);
- list_add_tail(&ab->list, &audit_txlist);
- spin_unlock_irqrestore(&audit_txlist_lock, flags);
-
- tasklet_schedule(&audit_tasklet);
+ if (!audit_rate_check()) {
+ audit_log_lost("rate limit exceeded");
+ } else {
+ audit_log_move(ab, GFP_ATOMIC);
+ }
+ audit_buffer_free(ab);
}
/* Send the message in the audit buffer directly to user space. May not
@@ -915,9 +912,7 @@ void audit_log_end_fast(struct audit_buf
if (!audit_rate_check()) {
audit_log_lost("rate limit exceeded");
} else {
- audit_log_move(ab);
- if (audit_log_drain(ab))
- return;
+ audit_log_move(ab, GFP_KERNEL);
}
audit_buffer_free(ab);
}
@@ -927,10 +922,8 @@ void audit_log_end_fast(struct audit_buf
* context.) */
void audit_log_end(struct audit_buffer *ab)
{
- if (in_irq())
- audit_log_end_irq(ab);
- else
- audit_log_end_fast(ab);
+ /* In a non-preemptible kernel, we have no way of knowing if a spinlock is held. */
+ audit_log_end_irq(ab);
}
/* Log an audit record. This is a convenience function that calls
--
dwmw2