Re: 2.6.12-rc4-mm2 - sleeping function called from invalid context at mm/slab.c:2502

Wednesday, 18 May 2005

On Tue, 2005-05-17 at 18:04 +0100, David Woodhouse wrote:
...
 I'm really not fond of the refcount trick -- I suspect I'd be
happier if
 we were just to try to keep track of sk_rmem_alloc so we never hit the
 condition in netlink_attachskb() which might cause it to fail. 
Or even better, use a kernel thread and set an infinite timeout so it'll
never fail...

--- linux-2.6.9/kernel/audit.c~	2005-05-18 13:54:03.000000000 +0100
+++ linux-2.6.9/kernel/audit.c	2005-05-18 17:40:17.000000000 +0100
@@ -46,6 +46,8 @@
 #include <asm/types.h>
 #include <linux/mm.h>
 #include <linux/module.h>
+#include <linux/err.h>
+#include <linux/kthread.h>
 
 #include <linux/audit.h>
 
@@ -77,7 +79,6 @@ static int	audit_rate_limit;
 
 /* Number of outstanding audit_buffers allowed. */
 static int	audit_backlog_limit = 64;
-static atomic_t	audit_backlog	    = ATOMIC_INIT(0);
 
 /* The identity of the user shutting down the audit system. */
 uid_t		audit_sig_uid = -1;
@@ -95,19 +96,17 @@ static atomic_t    audit_lost = ATOMIC_I
 /* The netlink socket. */
 static struct sock *audit_sock;
 
-/* There are two lists of audit buffers.  The txlist contains audit
- * buffers that cannot be sent immediately to the netlink device because
- * we are in an irq context (these are sent later in a tasklet).
- *
- * The second list is a list of pre-allocated audit buffers (if more
+/* The audit_freelist is a list of pre-allocated audit buffers (if more
  * than AUDIT_MAXFREE are in use, the audit buffer is freed instead of
  * being placed on the freelist). */
-static spinlock_t  audit_txlist_lock = SPIN_LOCK_UNLOCKED;
 static spinlock_t  audit_freelist_lock = SPIN_LOCK_UNLOCKED;
 static int	   audit_freelist_count = 0;
-static LIST_HEAD(audit_txlist);
 static LIST_HEAD(audit_freelist);
 
+static struct sk_buff_head audit_skb_queue;
+static struct task_struct *kauditd_task;
+static DECLARE_WAIT_QUEUE_HEAD(kauditd_wait);
+
 /* There are three lists of rules -- one to search at task creation
  * time, one to search at syscall entry time, and another to search at
  * syscall exit time. */
@@ -141,11 +140,10 @@ static DECLARE_MUTEX(audit_netlink_sem);
  * use simultaneously. */
 struct audit_buffer {
 	struct list_head     list;
-	struct sk_buff       *skb;	/* formatted skb ready to send */
 	struct audit_context *ctx;	/* NULL or associated context */
 	int		     len;	/* used area of tmp */
 	int		     size;	/* size of tmp */
-	char		     *tmp;	
+	char		     *tmp;	/* Always NUL-terminated */
 	int		     type;
 	int		     pid;
 };
@@ -225,10 +223,8 @@ void audit_log_lost(const char *message)
 
 	if (print) {
 		printk(KERN_WARNING
-		       "audit: audit_lost=%d audit_backlog=%d"
-		       " audit_rate_limit=%d audit_backlog_limit=%d\n",
+		       "audit: audit_lost=%d audit_rate_limit=%d audit_backlog_limit=%d\n",
 		       atomic_read(&audit_lost),
-		       atomic_read(&audit_backlog),
 		       audit_rate_limit,
 		       audit_backlog_limit);
 		audit_panic(message);
@@ -283,6 +279,64 @@ int audit_set_failure(int state, uid_t l
 }
 
 #ifdef CONFIG_NET
+int kauditd_thread(void *dummy)
+{
+	struct sk_buff *skb;
+
+	while (1) {
+		skb = skb_dequeue(&audit_skb_queue);
+		if (skb) {
+			int err;
+#if 1 /* Actually can probably use the else version now but it's late... */
+			struct sock *rsk;
+		retry:
+			rsk = NULL;
+			if (audit_pid) {
+				rsk = netlink_getsockbypid(audit_sock, audit_pid);
+				if (IS_ERR(rsk)) {
+					/* It has to be -ECONNREFUSED. Auditd went away */
+					printk(KERN_ERR "audit: *NO* daemon at audit_pid=%d\n", audit_pid);
+					audit_pid = 0;
+				}
+			}
+
+			if (!audit_pid) {
+				printk(KERN_ERR "%s\n", skb->data + NLMSG_SPACE(0));
+				dev_kfree_skb(skb);
+				continue;
+			}
+
+			err = netlink_attachskb(rsk, skb, 0, MAX_SCHEDULE_TIMEOUT);
+			if (err == 1)
+				goto retry;
+
+			BUG_ON(err); /* Cannot happen */
+
+			netlink_sendskb(rsk, skb, audit_sock->sk_protocol);
+#else
+			if (audit_pid) {
+				err = netlink_unicast(audit_sock, skb, audit_pid, 0);
+				if (err < 0) {
+					BUG_ON(err != -ECONNREFUSED);
+					printk(KERN_ERR "audit: *NO* daemon at audit_pid=%d\n", audit_pid);
+					audit_pid = 0;
+				}
+			}
+#endif					
+		} else {
+			DECLARE_WAITQUEUE(wait, current);
+			set_current_state(TASK_INTERRUPTIBLE);
+			add_wait_queue(&kauditd_wait, &wait);
+
+			if (!skb_queue_len(&audit_skb_queue))
+				schedule();
+
+			__set_current_state(TASK_RUNNING);
+			remove_wait_queue(&kauditd_wait, &wait);
+		}
+	}
+}
+
 void audit_send_reply(int pid, int seq, int type, int done, int multi,
 		      void *payload, int size)
 {
@@ -295,13 +349,16 @@ void audit_send_reply(int pid, int seq, 
 
 	skb = alloc_skb(len, GFP_KERNEL);
 	if (!skb)
-		goto nlmsg_failure;
+		return;
 
-	nlh		 = NLMSG_PUT(skb, pid, seq, t, len - sizeof(*nlh));
+	nlh		 = NLMSG_PUT(skb, pid, seq, t, size);
 	nlh->nlmsg_flags = flags;
 	data		 = NLMSG_DATA(nlh);
 	memcpy(data, payload, size);
-	netlink_unicast(audit_sock, skb, pid, MSG_DONTWAIT);
+
+	/* Ignore failure. It'll only happen if the sender goes away,
+	   because our timeout is set to infinite. */
+	netlink_unicast(audit_sock, skb, pid, 0);
 	return;
 
 nlmsg_failure:			/* Used by NLMSG_PUT */
@@ -356,6 +413,15 @@ static int audit_receive_msg(struct sk_b
 	if (err)
 		return err;
 
+	/* As soon as there's any sign of userspace auditd, start kauditd to talk to it */
+	if (!kauditd_task)
+		kauditd_task = kthread_run(kauditd_thread, NULL, "kauditd");
+	if (IS_ERR(kauditd_task)) {
+		err = PTR_ERR(kauditd_task);
+		kauditd_task = NULL;
+		return err;
+	}
+
 	pid  = NETLINK_CREDS(skb)->pid;
 	uid  = NETLINK_CREDS(skb)->uid;
 	loginuid = NETLINK_CB(skb).loginuid;
@@ -370,7 +436,7 @@ static int audit_receive_msg(struct sk_b
 		status_set.rate_limit	 = audit_rate_limit;
 		status_set.backlog_limit = audit_backlog_limit;
 		status_set.lost		 = atomic_read(&audit_lost);
-		status_set.backlog	 = atomic_read(&audit_backlog);
+		status_set.backlog	 = skb_queue_len(&audit_skb_queue);
 		audit_send_reply(NETLINK_CB(skb).pid, seq, AUDIT_GET, 0, 0,
 				 &status_set, sizeof(status_set));
 		break;
@@ -490,30 +556,35 @@ static void audit_receive(struct sock *s
 	up(&audit_netlink_sem);
 }
 
-/* Move data from tmp buffer into an skb.  This is an extra copy, and
- * that is unfortunate.  However, the copy will only occur when a record
- * is being written to user space, which is already a high-overhead
- * operation.  (Elimination of the copy is possible, for example, by
- * writing directly into a pre-allocated skb, at the cost of wasting
- * memory. */
-static void audit_log_move(struct audit_buffer *ab)
+/* Move data from tmp buffer into an skb. This is an extra copy, but
+ * there's no point in trying to log directly into an skb because
+ * netlink_trim() would only reallocate and copy it anyway. So we use
+ * the temporary buffer, then allocate optimally-sized skbs for netlink
+ * and check against the receiving socket's sk_rmem_alloc to ensure
+ * that we don't ever call netlink_unicast() if it would fail. */
+static void audit_log_move(struct audit_buffer *ab, int gfp_mask)
 {
-	struct sk_buff	*skb;
+	struct sk_buff *skb = NULL;
 	struct nlmsghdr *nlh;
-	char		*start;
-	int		len = NLMSG_SPACE(0) + ab->len + 1;
-
-	/* possible resubmission */
-	if (ab->skb)
-		return;
+	char *start;
+	int len = NLMSG_SPACE(0) + ab->len + 1;
 
-	skb = alloc_skb(len, GFP_ATOMIC);
+	if (!audit_pid) {
+		skb = NULL;
+	} else if (skb_queue_len(&audit_skb_queue) > audit_backlog_limit) {
+		if (audit_rate_check())
+			printk(KERN_WARNING "audit: audit_backlog_limit %d reached\n",
audit_backlog_limit);
+		audit_log_lost("backlog limit exceeded");
+		skb = NULL;
+	} else {
+		skb = alloc_skb(len, gfp_mask);
+		if (!skb)
+			audit_log_lost("out of memory in audit_log_move");
+	}
 	if (!skb) {
-		/* Lose information in ab->tmp */
-		audit_log_lost("out of memory in audit_log_move");
+		printk(KERN_ERR "%s\n", ab->tmp);
 		return;
 	}
-	ab->skb = skb;
 	nlh = (struct nlmsghdr *)skb_put(skb, NLMSG_SPACE(0));
 	nlh->nlmsg_type = ab->type;
 	nlh->nlmsg_len = ab->len;
@@ -522,47 +593,13 @@ static void audit_log_move(struct audit_
 	nlh->nlmsg_seq = 0;
 	start = skb_put(skb, ab->len);
 	memcpy(start, ab->tmp, ab->len);
-}
+	start[ab->len]=0;
 
-/* Iterate over the skbuff in the audit_buffer, sending their contents
- * to user space. */
-static inline int audit_log_drain(struct audit_buffer *ab)
-{
-	struct sk_buff *skb = ab->skb;
-
-	if (skb) {
-		int retval = 0;
-
-		if (audit_pid) {
-			skb_get(skb); /* because netlink_* frees */
-			retval = netlink_unicast(audit_sock, skb, audit_pid,
-						 MSG_DONTWAIT);
-		}
-		if (retval == -EAGAIN &&
-		    (atomic_read(&audit_backlog)) < audit_backlog_limit) {
-			audit_log_end_irq(ab);
-			return 1;
-		}
-		if (retval < 0) {
-			if (retval == -ECONNREFUSED) {
-				printk(KERN_ERR
-				       "audit: *NO* daemon at audit_pid=%d\n",
-				       audit_pid);
-				audit_pid = 0;
-			} else
-				audit_log_lost("netlink socket too busy");
-		}
-		if (!audit_pid) { /* No daemon */
-			int offset = NLMSG_SPACE(0);
-			int len    = skb->len - offset;
-			skb->data[offset + len] = '\0';
-			printk(KERN_ERR "%s\n", skb->data + offset);
-		}
-		kfree_skb(skb);
-	}
-	return 0;
+	skb_queue_tail(&audit_skb_queue, skb);
+	wake_up_interruptible(&kauditd_wait);
 }
 
+
 /* Initialize audit support at boot time. */
 int __init audit_init(void)
 {
@@ -572,7 +609,9 @@ int __init audit_init(void)
 	if (!audit_sock)
 		audit_panic("cannot initialize netlink socket");
 
+	audit_sock->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
 	audit_initialized = 1;
+	skb_queue_head_init(&audit_skb_queue);
 	audit_filesystem_init();
 	audit_enabled = audit_default;
 	audit_log(NULL, AUDIT_KERNEL, "initialized");
@@ -582,15 +621,9 @@ int __init audit_init(void)
 #else
 /* Without CONFIG_NET, we have no skbuffs.  For now, print what we have
  * in the buffer. */
-static void audit_log_move(struct audit_buffer *ab)
-{
-	printk(KERN_ERR "%*.*s\n", ab->len, ab->len, ab->tmp);
-	ab->len = 0;
-}
-
-static inline int audit_log_drain(struct audit_buffer *ab)
+static void audit_log_move(struct audit_buffer *ab, int gfp_mask)
 {
-	return 0;
+	printk(KERN_ERR "%s\n", ab->tmp);
 }
 
 /* Initialize audit support at boot time. */
@@ -632,7 +665,7 @@ static void audit_buffer_free(struct aud
 		return;
 
 	kfree(ab->tmp);
-	atomic_dec(&audit_backlog);
+
 	spin_lock_irqsave(&audit_freelist_lock, flags);
 	if (++audit_freelist_count > AUDIT_MAXFREE)
 		kfree(ab);
@@ -661,13 +694,11 @@ static struct audit_buffer * audit_buffe
 		if (!ab)
 			goto err;
 	}
-	atomic_inc(&audit_backlog);
 
 	ab->tmp = kmalloc(AUDIT_BUFSIZ, gfp_mask);
 	if (!ab->tmp)
 		goto err;
 
-	ab->skb   = NULL;
 	ab->ctx   = ctx;
 	ab->len   = 0;
 	ab->size  = AUDIT_BUFSIZ;
@@ -694,18 +725,6 @@ struct audit_buffer *audit_log_start(str
 	if (!audit_initialized)
 		return NULL;
 
-	if (audit_backlog_limit
-	    && atomic_read(&audit_backlog) > audit_backlog_limit) {
-		if (audit_rate_check())
-			printk(KERN_WARNING
-			       "audit: audit_backlog=%d > "
-			       "audit_backlog_limit=%d\n",
-			       atomic_read(&audit_backlog),
-			       audit_backlog_limit);
-		audit_log_lost("backlog limit exceeded");
-		return NULL;
-	}
-
 	ab = audit_buffer_alloc(ctx, GFP_ATOMIC, type);
 	if (!ab) {
 		audit_log_lost("out of memory in audit_log_start");
@@ -868,41 +887,19 @@ void audit_log_d_path(struct audit_buffe
 	kfree(path);
 }
 
-/* Remove queued messages from the audit_txlist and send them to user space. */
-static void audit_tasklet_handler(unsigned long arg)
-{
-	LIST_HEAD(list);
-	struct audit_buffer *ab;
-	unsigned long	    flags;
-
-	spin_lock_irqsave(&audit_txlist_lock, flags);
-	list_splice_init(&audit_txlist, &list);
-	spin_unlock_irqrestore(&audit_txlist_lock, flags);
-
-	while (!list_empty(&list)) {
-		ab = list_entry(list.next, struct audit_buffer, list);
-		list_del(&ab->list);
-		audit_log_end_fast(ab);
-	}
-}
-
-static DECLARE_TASKLET(audit_tasklet, audit_tasklet_handler, 0);
-
 /* The netlink_* functions cannot be called inside an irq context, so
- * the audit buffer is places on a queue and a tasklet is scheduled to
- * remove them from the queue outside the irq context.  May be called in
- * any context. */
+ * the skb is placed on a queue and the kernel thread is woken to handle
+ * actually sending it. */
 void audit_log_end_irq(struct audit_buffer *ab)
 {
-	unsigned long flags;
-
 	if (!ab)
 		return;
-	spin_lock_irqsave(&audit_txlist_lock, flags);
-	list_add_tail(&ab->list, &audit_txlist);
-	spin_unlock_irqrestore(&audit_txlist_lock, flags);
-
-	tasklet_schedule(&audit_tasklet);
+	if (!audit_rate_check()) {
+		audit_log_lost("rate limit exceeded");
+	} else {
+		audit_log_move(ab, GFP_ATOMIC);
+	}
+	audit_buffer_free(ab);
 }
 
 /* Send the message in the audit buffer directly to user space.  May not
@@ -915,9 +912,7 @@ void audit_log_end_fast(struct audit_buf
 	if (!audit_rate_check()) {
 		audit_log_lost("rate limit exceeded");
 	} else {
-		audit_log_move(ab);
-		if (audit_log_drain(ab))
-			return;
+		audit_log_move(ab, GFP_KERNEL);
 	}
 	audit_buffer_free(ab);
 }
@@ -927,10 +922,8 @@ void audit_log_end_fast(struct audit_buf
  * context.) */
 void audit_log_end(struct audit_buffer *ab)
 {
-	if (in_irq())
-		audit_log_end_irq(ab);
-	else
-		audit_log_end_fast(ab);
+	/* In a non-preemptible kernel, we have no way of knowing if a spinlock is held. */
+	audit_log_end_irq(ab);
 }
 
 /* Log an audit record.  This is a convenience function that calls


-- 
dwmw2


    

2025

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

2010

2009

2008

2007

2006

2005

2004

Re: 2.6.12-rc4-mm2 - sleeping function called from invalid context at mm/slab.c:2502