On Wed, 2005-06-29 at 16:07 -0400, Steve Grubb wrote:
So, my guess is that there is something in the draining of the
backlog that
causes this problem. The audit logs from .62 shows that recvfrom() for auditd
is being audited over and over.
I also wonder if this is related to the problem Debbie & Denise were
reporting? It seems similar - system hung, no visible indicators that it hit
the backlog draining.
I've cleaned up the backlog waiting, and made sure that it will _stop_
waiting once it's called audit_panic() once. That'll avoid the illusion
of a hung system if auditd isn't making progress. We should probably
allow auditctl to set both the audit_backlog_wait_time and
audit_backlog_wait_overflow variables, but you weren't about on IRC to
discuss the necessary changes to the AUDIT_SET layout, so this'll do for
now.
--- linux-2.6.9/kernel/audit.c~ 2005-06-24 15:13:54.000000000 +0100
+++ linux-2.6.9/kernel/audit.c 2005-06-30 17:45:22.000000000 +0100
@@ -79,6 +79,8 @@ static int audit_rate_limit;
/* Number of outstanding audit_buffers allowed. */
static int audit_backlog_limit = 64;
+static int audit_backlog_wait_time = 60 * HZ;
+static int audit_backlog_wait_overflow = 0;
/* The identity of the user shutting down the audit system. */
uid_t audit_sig_uid = -1;
@@ -723,6 +725,7 @@ struct audit_buffer *audit_log_start(str
struct timespec t;
unsigned int serial;
int reserve;
+ unsigned long timeout_start = jiffies;
if (!audit_initialized)
return NULL;
@@ -735,8 +738,9 @@ struct audit_buffer *audit_log_start(str
while (audit_backlog_limit
&& skb_queue_len(&audit_skb_queue) > audit_backlog_limit +
reserve) {
- if (gfp_mask & __GFP_WAIT) {
- int ret = 1;
+ if (gfp_mask & __GFP_WAIT && audit_backlog_wait_time
+ && time_before(jiffies, timeout_start + audit_backlog_wait_time)) {
+
/* Wait for auditd to drain the queue a little */
DECLARE_WAITQUEUE(wait, current);
set_current_state(TASK_INTERRUPTIBLE);
@@ -744,12 +748,11 @@ struct audit_buffer *audit_log_start(str
if (audit_backlog_limit &&
skb_queue_len(&audit_skb_queue) > audit_backlog_limit)
- ret = schedule_timeout(HZ * 60);
+ schedule_timeout(timeout_start + audit_backlog_wait_time - jiffies);
__set_current_state(TASK_RUNNING);
remove_wait_queue(&audit_backlog_wait, &wait);
- if (ret)
- continue;
+ continue;
}
if (audit_rate_check())
printk(KERN_WARNING
@@ -758,6 +761,8 @@ struct audit_buffer *audit_log_start(str
skb_queue_len(&audit_skb_queue),
audit_backlog_limit);
audit_log_lost("backlog limit exceeded");
+ audit_backlog_wait_time = audit_backlog_wait_overflow;
+ wake_up(&audit_backlog_wait);
return NULL;
}
--
dwmw2