[PATCH ghak124 v3] audit: log nftables configuration change events
by Richard Guy Briggs
iptables, ip6tables, arptables and ebtables table registration,
replacement and unregistration configuration events are logged for the
native (legacy) iptables setsockopt api, but not for the
nftables netlink api which is used by the nft-variant of iptables in
addition to nftables itself.
Add calls to log the configuration actions in the nftables netlink api.
This uses the same NETFILTER_CFG record format but overloads the table
field.
type=NETFILTER_CFG msg=audit(2020-05-28 17:46:41.878:162) : table=?:0;?:0 family=unspecified entries=2 op=nft_register_gen pid=396 subj=system_u:system_r:firewalld_t:s0 comm=firewalld
...
type=NETFILTER_CFG msg=audit(2020-05-28 17:46:41.878:162) : table=firewalld:1;?:0 family=inet entries=0 op=nft_register_table pid=396 subj=system_u:system_r:firewalld_t:s0 comm=firewalld
...
type=NETFILTER_CFG msg=audit(2020-05-28 17:46:41.911:163) : table=firewalld:1;filter_FORWARD:85 family=inet entries=8 op=nft_register_chain pid=396 subj=system_u:system_r:firewalld_t:s0 comm=firewalld
...
type=NETFILTER_CFG msg=audit(2020-05-28 17:46:41.911:163) : table=firewalld:1;filter_FORWARD:85 family=inet entries=101 op=nft_register_rule pid=396 subj=system_u:system_r:firewalld_t:s0 comm=firewalld
...
type=NETFILTER_CFG msg=audit(2020-05-28 17:46:41.911:163) : table=firewalld:1;__set0:87 family=inet entries=87 op=nft_register_setelem pid=396 subj=system_u:system_r:firewalld_t:s0 comm=firewalld
...
type=NETFILTER_CFG msg=audit(2020-05-28 17:46:41.911:163) : table=firewalld:1;__set0:87 family=inet entries=0 op=nft_register_set pid=396 subj=system_u:system_r:firewalld_t:s0 comm=firewalld
For further information please see issue
https://github.com/linux-audit/audit-kernel/issues/124
Signed-off-by: Richard Guy Briggs <rgb(a)redhat.com>
---
Changelog:
v3:
- inline message type rather than table
v2:
- differentiate between xtables and nftables
- add set, setelem, obj, flowtable, gen
- use nentries field as appropriate per type
- overload the "tables" field with table handle and chain/set/flowtable
include/linux/audit.h | 18 ++++++++
kernel/auditsc.c | 24 ++++++++--
net/netfilter/nf_tables_api.c | 103 ++++++++++++++++++++++++++++++++++++++++++
3 files changed, 142 insertions(+), 3 deletions(-)
diff --git a/include/linux/audit.h b/include/linux/audit.h
index 3fcd9ee49734..604ede630580 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -12,6 +12,7 @@
#include <linux/sched.h>
#include <linux/ptrace.h>
#include <uapi/linux/audit.h>
+#include <uapi/linux/netfilter/nf_tables.h>
#define AUDIT_INO_UNSET ((unsigned long)-1)
#define AUDIT_DEV_UNSET ((dev_t)-1)
@@ -98,6 +99,23 @@ enum audit_nfcfgop {
AUDIT_XT_OP_REGISTER,
AUDIT_XT_OP_REPLACE,
AUDIT_XT_OP_UNREGISTER,
+ AUDIT_NFT_OP_TABLE_REGISTER,
+ AUDIT_NFT_OP_TABLE_UNREGISTER,
+ AUDIT_NFT_OP_CHAIN_REGISTER,
+ AUDIT_NFT_OP_CHAIN_UNREGISTER,
+ AUDIT_NFT_OP_RULE_REGISTER,
+ AUDIT_NFT_OP_RULE_UNREGISTER,
+ AUDIT_NFT_OP_SET_REGISTER,
+ AUDIT_NFT_OP_SET_UNREGISTER,
+ AUDIT_NFT_OP_SETELEM_REGISTER,
+ AUDIT_NFT_OP_SETELEM_UNREGISTER,
+ AUDIT_NFT_OP_GEN_REGISTER,
+ AUDIT_NFT_OP_OBJ_REGISTER,
+ AUDIT_NFT_OP_OBJ_UNREGISTER,
+ AUDIT_NFT_OP_OBJ_RESET,
+ AUDIT_NFT_OP_FLOWTABLE_REGISTER,
+ AUDIT_NFT_OP_FLOWTABLE_UNREGISTER,
+ AUDIT_NFT_OP_INVALID,
};
extern int is_audit_feature_set(int which);
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 468a23390457..3a9100e95fda 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -75,6 +75,7 @@
#include <linux/uaccess.h>
#include <linux/fsnotify_backend.h>
#include <uapi/linux/limits.h>
+#include <uapi/linux/netfilter/nf_tables.h>
#include "audit.h"
@@ -136,9 +137,26 @@ struct audit_nfcfgop_tab {
};
static const struct audit_nfcfgop_tab audit_nfcfgs[] = {
- { AUDIT_XT_OP_REGISTER, "register" },
- { AUDIT_XT_OP_REPLACE, "replace" },
- { AUDIT_XT_OP_UNREGISTER, "unregister" },
+ { AUDIT_XT_OP_REGISTER, "xt_register" },
+ { AUDIT_XT_OP_REPLACE, "xt_replace" },
+ { AUDIT_XT_OP_UNREGISTER, "xt_unregister" },
+ { AUDIT_NFT_OP_TABLE_REGISTER, "nft_register_table" },
+ { AUDIT_NFT_OP_TABLE_UNREGISTER, "nft_unregister_table" },
+ { AUDIT_NFT_OP_CHAIN_REGISTER, "nft_register_chain" },
+ { AUDIT_NFT_OP_CHAIN_UNREGISTER, "nft_unregister_chain" },
+ { AUDIT_NFT_OP_RULE_REGISTER, "nft_register_rule" },
+ { AUDIT_NFT_OP_RULE_UNREGISTER, "nft_unregister_rule" },
+ { AUDIT_NFT_OP_SET_REGISTER, "nft_register_set" },
+ { AUDIT_NFT_OP_SET_UNREGISTER, "nft_unregister_set" },
+ { AUDIT_NFT_OP_SETELEM_REGISTER, "nft_register_setelem" },
+ { AUDIT_NFT_OP_SETELEM_UNREGISTER, "nft_unregister_setelem" },
+ { AUDIT_NFT_OP_GEN_REGISTER, "nft_register_gen" },
+ { AUDIT_NFT_OP_OBJ_REGISTER, "nft_register_obj" },
+ { AUDIT_NFT_OP_OBJ_UNREGISTER, "nft_unregister_obj" },
+ { AUDIT_NFT_OP_OBJ_RESET, "nft_reset_obj" },
+ { AUDIT_NFT_OP_FLOWTABLE_REGISTER, "nft_register_flowtable" },
+ { AUDIT_NFT_OP_FLOWTABLE_UNREGISTER, "nft_unregister_flowtable" },
+ { AUDIT_NFT_OP_INVALID, "nft_invalid" },
};
static int audit_match_perm(struct audit_context *ctx, int mask)
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 3558e76e2733..b9e7440cc87d 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -12,6 +12,7 @@
#include <linux/netlink.h>
#include <linux/vmalloc.h>
#include <linux/rhashtable.h>
+#include <linux/audit.h>
#include <linux/netfilter.h>
#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/nf_tables.h>
@@ -693,6 +694,16 @@ static void nf_tables_table_notify(const struct nft_ctx *ctx, int event)
{
struct sk_buff *skb;
int err;
+ char *buf = kasprintf(GFP_KERNEL, "%s:%llu;?:0",
+ ctx->table->name, ctx->table->handle);
+
+ audit_log_nfcfg(buf,
+ ctx->family,
+ ctx->table->use,
+ event == NFT_MSG_NEWTABLE ?
+ AUDIT_NFT_OP_TABLE_REGISTER :
+ AUDIT_NFT_OP_TABLE_UNREGISTER);
+ kfree(buf);
if (!ctx->report &&
!nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
@@ -1428,6 +1439,17 @@ static void nf_tables_chain_notify(const struct nft_ctx *ctx, int event)
{
struct sk_buff *skb;
int err;
+ char *buf = kasprintf(GFP_KERNEL, "%s:%llu;%s:%llu",
+ ctx->table->name, ctx->table->handle,
+ ctx->chain->name, ctx->chain->handle);
+
+ audit_log_nfcfg(buf,
+ ctx->family,
+ ctx->chain->use,
+ event == NFT_MSG_NEWCHAIN ?
+ AUDIT_NFT_OP_CHAIN_REGISTER :
+ AUDIT_NFT_OP_CHAIN_UNREGISTER);
+ kfree(buf);
if (!ctx->report &&
!nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
@@ -2691,6 +2713,17 @@ static void nf_tables_rule_notify(const struct nft_ctx *ctx,
{
struct sk_buff *skb;
int err;
+ char *buf = kasprintf(GFP_KERNEL, "%s:%llu;%s:%llu",
+ ctx->table->name, ctx->table->handle,
+ ctx->chain->name, ctx->chain->handle);
+
+ audit_log_nfcfg(buf,
+ ctx->family,
+ rule->handle,
+ event == NFT_MSG_NEWRULE ?
+ AUDIT_NFT_OP_RULE_REGISTER :
+ AUDIT_NFT_OP_RULE_UNREGISTER);
+ kfree(buf);
if (!ctx->report &&
!nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
@@ -3693,6 +3726,17 @@ static void nf_tables_set_notify(const struct nft_ctx *ctx,
struct sk_buff *skb;
u32 portid = ctx->portid;
int err;
+ char *buf = kasprintf(gfp_flags, "%s:%llu;%s:%llu",
+ ctx->table->name, ctx->table->handle,
+ set->name, set->handle);
+
+ audit_log_nfcfg(buf,
+ ctx->family,
+ set->field_count,
+ event == NFT_MSG_NEWSET ?
+ AUDIT_NFT_OP_SET_REGISTER :
+ AUDIT_NFT_OP_SET_UNREGISTER);
+ kfree(buf);
if (!ctx->report &&
!nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
@@ -4809,6 +4853,17 @@ static void nf_tables_setelem_notify(const struct nft_ctx *ctx,
u32 portid = ctx->portid;
struct sk_buff *skb;
int err;
+ char *buf = kasprintf(GFP_KERNEL, "%s:%llu;%s:%llu",
+ ctx->table->name, ctx->table->handle,
+ set->name, set->handle);
+
+ audit_log_nfcfg(buf,
+ ctx->family,
+ set->handle,
+ event == NFT_MSG_NEWSETELEM ?
+ AUDIT_NFT_OP_SETELEM_REGISTER :
+ AUDIT_NFT_OP_SETELEM_UNREGISTER);
+ kfree(buf);
if (!ctx->report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
return;
@@ -5890,6 +5945,19 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb)
obj->ops->type->type != filter->type)
goto cont;
+ if (reset) {
+ char *buf = kasprintf(GFP_KERNEL,
+ "%s:%llu;?:0",
+ table->name,
+ table->handle);
+
+ audit_log_nfcfg(buf,
+ family,
+ obj->handle,
+ AUDIT_NFT_OP_OBJ_RESET);
+ kfree(buf);
+ }
+
if (nf_tables_fill_obj_info(skb, net, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
NFT_MSG_NEWOBJ,
@@ -6000,6 +6068,17 @@ static int nf_tables_getobj(struct net *net, struct sock *nlsk,
if (NFNL_MSG_TYPE(nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET)
reset = true;
+ if (reset) {
+ char *buf = kasprintf(GFP_KERNEL, "%s:%llu;?:0",
+ table->name, table->handle);
+
+ audit_log_nfcfg(buf,
+ family,
+ obj->handle,
+ AUDIT_NFT_OP_OBJ_RESET);
+ kfree(buf);
+ }
+
err = nf_tables_fill_obj_info(skb2, net, NETLINK_CB(skb).portid,
nlh->nlmsg_seq, NFT_MSG_NEWOBJ, 0,
family, table, obj, reset);
@@ -6075,6 +6154,16 @@ void nft_obj_notify(struct net *net, const struct nft_table *table,
{
struct sk_buff *skb;
int err;
+ char *buf = kasprintf(GFP_KERNEL, "%s:%llu;?:0",
+ table->name, table->handle);
+
+ audit_log_nfcfg(buf,
+ family,
+ obj->handle,
+ event == NFT_MSG_NEWOBJ ?
+ AUDIT_NFT_OP_OBJ_REGISTER :
+ AUDIT_NFT_OP_OBJ_UNREGISTER);
+ kfree(buf);
if (!report &&
!nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
@@ -6701,6 +6790,17 @@ static void nf_tables_flowtable_notify(struct nft_ctx *ctx,
{
struct sk_buff *skb;
int err;
+ char *buf = kasprintf(GFP_KERNEL, "%s:%llu;%s:%llu",
+ flowtable->table->name, flowtable->table->handle,
+ flowtable->name, flowtable->handle);
+
+ audit_log_nfcfg(buf,
+ ctx->family,
+ flowtable->hooknum,
+ event == NFT_MSG_NEWFLOWTABLE ?
+ AUDIT_NFT_OP_FLOWTABLE_REGISTER :
+ AUDIT_NFT_OP_FLOWTABLE_UNREGISTER);
+ kfree(buf);
if (ctx->report &&
!nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
@@ -6822,6 +6922,9 @@ static void nf_tables_gen_notify(struct net *net, struct sk_buff *skb,
struct sk_buff *skb2;
int err;
+ audit_log_nfcfg("?:0;?:0", 0, net->nft.base_seq,
+ AUDIT_NFT_OP_GEN_REGISTER);
+
if (nlmsg_report(nlh) &&
!nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
return;
--
1.8.3.1
3 years, 10 months
[PATCH v2] audit: report audit wait metric in audit status reply
by Max Englander
In environments where the preservation of audit events and predictable
usage of system memory are prioritized, admins may use a combination of
--backlog_wait_time and -b options at the risk of degraded performance
resulting from backlog waiting. In some cases, this risk may be
preferred to lost events or unbounded memory usage. Ideally, this risk
can be mitigated by making adjustments when backlog waiting is detected.
However, detection can be diffult using the currently available metrics.
For example, an admin attempting to debug degraded performance may
falsely believe a full backlog indicates backlog waiting. It may turn
out the backlog frequently fills up but drains quickly.
To make it easier to reliably track degraded performance to backlog
waiting, this patch makes the following changes:
Add a new field backlog_wait_sum to the audit status reply. Initialize
this field to zero. Add to this field the total time spent by the
current task on scheduled timeouts while the backlog limit is exceeded.
Tested on Ubuntu 18.04 using complementary changes to the audit
userspace: https://github.com/linux-audit/audit-userspace/pull/134.
Signed-off-by: Max Englander <max.englander(a)gmail.com>
---
Patch changelogs between v1 and v2:
- Instead of printing a warning when backlog waiting occurs, add
duration of backlog waiting to cumulative sum, and report this
sum in audit status reply.
include/uapi/linux/audit.h | 7 ++++++-
kernel/audit.c | 9 +++++++++
2 files changed, 15 insertions(+), 1 deletion(-)
diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
index a534d71e689a..ea0cc364beca 100644
--- a/include/uapi/linux/audit.h
+++ b/include/uapi/linux/audit.h
@@ -340,6 +340,7 @@ enum {
#define AUDIT_STATUS_BACKLOG_LIMIT 0x0010
#define AUDIT_STATUS_BACKLOG_WAIT_TIME 0x0020
#define AUDIT_STATUS_LOST 0x0040
+#define AUDIT_STATUS_BACKLOG_WAIT_SUM 0x0080
#define AUDIT_FEATURE_BITMAP_BACKLOG_LIMIT 0x00000001
#define AUDIT_FEATURE_BITMAP_BACKLOG_WAIT_TIME 0x00000002
@@ -348,6 +349,7 @@ enum {
#define AUDIT_FEATURE_BITMAP_SESSIONID_FILTER 0x00000010
#define AUDIT_FEATURE_BITMAP_LOST_RESET 0x00000020
#define AUDIT_FEATURE_BITMAP_FILTER_FS 0x00000040
+#define AUDIT_FEATURE_BITMAP_BACKLOG_WAIT_SUM 0x00000080
#define AUDIT_FEATURE_BITMAP_ALL (AUDIT_FEATURE_BITMAP_BACKLOG_LIMIT | \
AUDIT_FEATURE_BITMAP_BACKLOG_WAIT_TIME | \
@@ -355,12 +357,14 @@ enum {
AUDIT_FEATURE_BITMAP_EXCLUDE_EXTEND | \
AUDIT_FEATURE_BITMAP_SESSIONID_FILTER | \
AUDIT_FEATURE_BITMAP_LOST_RESET | \
- AUDIT_FEATURE_BITMAP_FILTER_FS)
+ AUDIT_FEATURE_BITMAP_FILTER_FS | \
+ AUDIT_FEATURE_BITMAP_BACKLOG_WAIT_SUM)
/* deprecated: AUDIT_VERSION_* */
#define AUDIT_VERSION_LATEST AUDIT_FEATURE_BITMAP_ALL
#define AUDIT_VERSION_BACKLOG_LIMIT AUDIT_FEATURE_BITMAP_BACKLOG_LIMIT
#define AUDIT_VERSION_BACKLOG_WAIT_TIME AUDIT_FEATURE_BITMAP_BACKLOG_WAIT_TIME
+#define AUDIT_VERSION_BACKLOG_WAIT_SUM AUDIT_FEATURE_BITMAP_BACKLOG_WAIT_SUM
/* Failure-to-log actions */
#define AUDIT_FAIL_SILENT 0
@@ -466,6 +470,7 @@ struct audit_status {
__u32 feature_bitmap; /* bitmap of kernel audit features */
};
__u32 backlog_wait_time;/* message queue wait timeout */
+ __u32 backlog_wait_sum;/* time spent waiting while message limit exceeded */
};
struct audit_features {
diff --git a/kernel/audit.c b/kernel/audit.c
index 87f31bf1f0a0..301ea4f3d750 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -136,6 +136,11 @@ u32 audit_sig_sid = 0;
*/
static atomic_t audit_lost = ATOMIC_INIT(0);
+/* Monotonically increasing sum of time the kernel has spent
+ * waiting while the backlog limit is exceeded.
+ */
+static atomic_t audit_backlog_wait_sum = ATOMIC_INIT(0);
+
/* Hash for inode-based rules */
struct list_head audit_inode_hash[AUDIT_INODE_BUCKETS];
@@ -1204,6 +1209,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
s.backlog = skb_queue_len(&audit_queue);
s.feature_bitmap = AUDIT_FEATURE_BITMAP_ALL;
s.backlog_wait_time = audit_backlog_wait_time;
+ s.backlog_wait_sum = atomic_read(&audit_backlog_wait_sum);
audit_send_reply(skb, seq, AUDIT_GET, 0, 0, &s, sizeof(s));
break;
}
@@ -1794,6 +1800,9 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask,
return NULL;
}
}
+
+ if (stime != audit_backlog_wait_time)
+ atomic_add(audit_backlog_wait_time - stime, &audit_backlog_wait_sum);
}
ab = audit_buffer_alloc(ctx, gfp_mask, type);
--
2.17.1
4 years
Identifying thread/process termination
by Natan Yellin
Hello,
I've been tracking all process terminations using a rule for the exit and
exit_group syscalls. However, by looking at the audit events for exit it is
impossible to differentiate between the death of different threads in the
same thread group. Is there an alternative way to track this?
For my use case, I would like to know when either processes or individual
threads execute and terminate. (I'm fine tracking at either granularity.)
Right now I can track the creation properly using fork/clone/etc but for
termination I receive multiple exit events with identical information that
doesn't let me know which thread died.
Thanks,
Natan
4 years, 1 month
[PATCH 00/34] fs: idmapped mounts
by Christian Brauner
Hey everyone,
I vanished for a little while to focus on this work here so sorry for
not being available by mail for a while.
Since quite a long time we have issues with sharing mounts between
multiple unprivileged containers with different id mappings, sharing a
rootfs between multiple containers with different id mappings, and also
sharing regular directories and filesystems between users with different
uids and gids. The latter use-cases have become even more important with
the availability and adoption of systemd-homed (cf. [1]) to implement
portable home directories.
The solutions we have tried and proposed so far include the introduction
of fsid mappings, a tiny overlay based filesystem, and an approach to
call override creds in the vfs. None of these solutions have covered all
of the above use-cases.
The solution proposed here has it's origins in multiple discussions
during Linux Plumbers 2017 during and after the end of the containers
microconference.
To the best of my knowledge this involved Aleksa, Stéphane, Eric, David,
James, and myself. A variant of the solution proposed here has also been
discussed, again to the best of my knowledge, after a Linux conference
in St. Petersburg in Russia between Christoph, Tycho, and myself in 2017
after Linux Plumbers.
I've taken the time to finally implement a working version of this
solution over the last weeks to the best of my abilities. Tycho has
signed up for this sligthly crazy endeavour as well and he has helped
with the conversion of the xattr codepaths.
The core idea is to make idmappings a property of struct vfsmount
instead of tying it to a process being inside of a user namespace which
has been the case for all other proposed approaches.
It means that idmappings become a property of bind-mounts, i.e. each
bind-mount can have a separate idmapping. This has the obvious advantage
that idmapped mounts can be created inside of the initial user
namespace, i.e. on the host itself instead of requiring the caller to be
located inside of a user namespace. This enables such use-cases as e.g.
making a usb stick available in multiple locations with different
idmappings (see the vfat port that is part of this patch series).
The vfsmount struct gains a new struct user_namespace member. The
idmapping of the user namespace becomes the idmapping of the mount. A
caller that is either privileged with respect to the user namespace of
the superblock of the underlying filesystem or a caller that is
privileged with respect to the user namespace a mount has been idmapped
with can create a new bind-mount and mark it with a user namespace. The
user namespace the mount will be marked with can be specified by passing
a file descriptor refering to the user namespace as an argument to the
new mount_setattr() syscall together with the new MOUNT_ATTR_IDMAP flag.
By default vfsmounts are marked with the initial user namespace and no
behavioral or performance changes should be observed. All mapping
operations are nops for the initial user namespace.
When a file/inode is accessed through an idmapped mount the i_uid and
i_gid of the inode will be remapped according to the user namespace the
mount has been marked with. When a new object is created based on the
fsuid and fsgid of the caller they will similarly be remapped according
to the user namespace of the mount they care created from.
This means the user namespace of the mount needs to be passed down into
a few relevant inode_operations. This mostly includes inode operations
that create filesystem objects or change file attributes. Some of them
such as ->getattr() don't even need to change since they pass down a
struct path and thus the struct vfsmount is already available. Other
inode operations need to be adapted to pass down the user namespace the
vfsmount has been marked with. Al was nice enough to point out that he
will not tolerate struct vfsmount being passed to filesystems and that I
should pass down the user namespace directly; which is what I did.
The inode struct itself is never altered whenever the i_uid and i_gid
need to be mapped, i.e. i_uid and i_gid are only remapped at the time of
the check. An inode once initialized (during lookup or object creation)
is never altered when accessed through an idmapped mount.
To limit the amount of noise in this first iteration we have not changed
the existing inode operations but rather introduced a few new struct
inode operation methods such as ->mkdir_mapped which pass down the user
namespace of the mount they have been called from. Should this solution
be worth pursuing we have no problem adapting the existing inode
operations instead.
In order to support idmapped mounts, filesystems need to be changed and
mark themselves with the FS_ALLOW_IDMAP flag in fs_flags. In this first
iteration I tried to illustrate this by changing three different
filesystem with different levels of complexity. Of course with some bias
towards urgent use-cases and filesystems I was at least a little more
familiar with. However, Tycho and I (and others) have no problem
converting each filesystem one-by-one. This first iteration includes fat
(msdos and vfat), ext4, and overlayfs (both with idmapped lower and
upper directories and idmapped merged directories). I'm sure I haven't
gotten everything right for all three of them in the first version of
this patch.
I have written a simple tool that allows to create idmapped mounts so
people can play with this patch series. Here are a few illustrations:
1. Create a simple idmapped mount of another user's home directory
u1001@f2-vm:/$ sudo ./mount-idmapped --map-mount b:1000:1001:1 /home/ubuntu/ /mnt
u1001@f2-vm:/$ ls -al /home/ubuntu/
total 28
drwxr-xr-x 2 ubuntu ubuntu 4096 Oct 28 22:07 .
drwxr-xr-x 4 root root 4096 Oct 28 04:00 ..
-rw------- 1 ubuntu ubuntu 3154 Oct 28 22:12 .bash_history
-rw-r--r-- 1 ubuntu ubuntu 220 Feb 25 2020 .bash_logout
-rw-r--r-- 1 ubuntu ubuntu 3771 Feb 25 2020 .bashrc
-rw-r--r-- 1 ubuntu ubuntu 807 Feb 25 2020 .profile
-rw-r--r-- 1 ubuntu ubuntu 0 Oct 16 16:11 .sudo_as_admin_successful
-rw------- 1 ubuntu ubuntu 1144 Oct 28 00:43 .viminfo
u1001@f2-vm:/$ ls -al /mnt/
total 28
drwxr-xr-x 2 u1001 u1001 4096 Oct 28 22:07 .
drwxr-xr-x 29 root root 4096 Oct 28 22:01 ..
-rw------- 1 u1001 u1001 3154 Oct 28 22:12 .bash_history
-rw-r--r-- 1 u1001 u1001 220 Feb 25 2020 .bash_logout
-rw-r--r-- 1 u1001 u1001 3771 Feb 25 2020 .bashrc
-rw-r--r-- 1 u1001 u1001 807 Feb 25 2020 .profile
-rw-r--r-- 1 u1001 u1001 0 Oct 16 16:11 .sudo_as_admin_successful
-rw------- 1 u1001 u1001 1144 Oct 28 00:43 .viminfo
u1001@f2-vm:/$ touch /mnt/my-file
u1001@f2-vm:/$ setfacl -m u:1001:rwx /mnt/my-file
u1001@f2-vm:/$ sudo setcap -n 1001 cap_net_raw+ep /mnt/my-file
u1001@f2-vm:/$ ls -al /mnt/my-file
-rw-rwxr--+ 1 u1001 u1001 0 Oct 28 22:14 /mnt/my-file
u1001@f2-vm:/$ ls -al /home/ubuntu/my-file
-rw-rwxr--+ 1 ubuntu ubuntu 0 Oct 28 22:14 /home/ubuntu/my-file
u1001@f2-vm:/$ getfacl /mnt/my-file
getfacl: Removing leading '/' from absolute path names
# file: mnt/my-file
# owner: u1001
# group: u1001
user::rw-
user:u1001:rwx
group::rw-
mask::rwx
other::r--
u1001@f2-vm:/$ getfacl /home/ubuntu/my-file
getfacl: Removing leading '/' from absolute path names
# file: home/ubuntu/my-file
# owner: ubuntu
# group: ubuntu
user::rw-
user:ubuntu:rwx
group::rw-
mask::rwx
other::r--
2. Create mapping of the whole ext4 rootfs without a mapping for uid and gid 0
ubuntu@f2-vm:~$ sudo /mount-idmapped --map-mount b:1:1:65536 / /mnt/
ubuntu@f2-vm:~$ findmnt | grep mnt
└─/mnt /dev/sda2 ext4 rw,relatime
└─/mnt/mnt /dev/sda2 ext4 rw,relatime
ubuntu@f2-vm:~$ sudo mkdir /AS-ROOT-CAN-CREATE
ubuntu@f2-vm:~$ sudo mkdir /mnt/AS-ROOT-CANT-CREATE
mkdir: cannot create directory ‘/mnt/AS-ROOT-CANT-CREATE’: Value too large for defined data type
ubuntu@f2-vm:~$ mkdir /mnt/home/ubuntu/AS-USER-1000-CAN-CREATE
3. Create a vfat usb mount and expose to user 1001 and 5000
ubuntu@f2-vm:/$ sudo mount /dev/sdb /mnt
ubuntu@f2-vm:/$ findmnt | grep mnt
└─/mnt /dev/sdb vfat rw,relatime,fmask=0022,dmask=0022,codepage=437,iocharset=iso8859-1,shortname=mixed,errors=remount-ro
ubuntu@f2-vm:/$ ls -al /mnt
total 12
drwxr-xr-x 2 root root 4096 Jan 1 1970 .
drwxr-xr-x 34 root root 4096 Oct 28 22:24 ..
-rwxr-xr-x 1 root root 4 Oct 28 03:44 aaa
-rwxr-xr-x 1 root root 0 Oct 28 01:09 bbb
ubuntu@f2-vm:/$ sudo /mount-idmapped --map-mount b:0:1001:1 /mnt /mnt-1001/
ubuntu@f2-vm:/$ ls -al /mnt-1001/
total 12
drwxr-xr-x 2 u1001 u1001 4096 Jan 1 1970 .
drwxr-xr-x 34 root root 4096 Oct 28 22:24 ..
-rwxr-xr-x 1 u1001 u1001 4 Oct 28 03:44 aaa
-rwxr-xr-x 1 u1001 u1001 0 Oct 28 01:09 bbb
ubuntu@f2-vm:/$ sudo /mount-idmapped --map-mount b:0:5000:1 /mnt /mnt-5000/
ubuntu@f2-vm:/$ ls -al /mnt-5000/
total 12
drwxr-xr-x 2 5000 5000 4096 Jan 1 1970 .
drwxr-xr-x 34 root root 4096 Oct 28 22:24 ..
-rwxr-xr-x 1 5000 5000 4 Oct 28 03:44 aaa
-rwxr-xr-x 1 5000 5000 0 Oct 28 01:09 bbb
4. Create an idmapped rootfs mount for a container
root@f2-vm:~# ls -al /var/lib/lxc/f2/rootfs/
total 68
drwxr-xr-x 17 20000 20000 4096 Sep 24 07:48 .
drwxrwx--- 3 20000 20000 4096 Oct 16 19:26 ..
lrwxrwxrwx 1 20000 20000 7 Sep 24 07:43 bin -> usr/bin
drwxr-xr-x 2 20000 20000 4096 Apr 15 2020 boot
drwxr-xr-x 3 20000 20000 4096 Oct 16 19:26 dev
drwxr-xr-x 61 20000 20000 4096 Oct 16 19:26 etc
drwxr-xr-x 3 20000 20000 4096 Sep 24 07:45 home
lrwxrwxrwx 1 20000 20000 7 Sep 24 07:43 lib -> usr/lib
lrwxrwxrwx 1 20000 20000 9 Sep 24 07:43 lib32 -> usr/lib32
lrwxrwxrwx 1 20000 20000 9 Sep 24 07:43 lib64 -> usr/lib64
lrwxrwxrwx 1 20000 20000 10 Sep 24 07:43 libx32 -> usr/libx32
drwxr-xr-x 2 20000 20000 4096 Sep 24 07:43 media
drwxr-xr-x 2 20000 20000 4096 Sep 24 07:43 mnt
drwxr-xr-x 2 20000 20000 4096 Sep 24 07:43 opt
drwxr-xr-x 2 20000 20000 4096 Apr 15 2020 proc
drwx------ 2 20000 20000 4096 Sep 24 07:43 root
drwxr-xr-x 2 20000 20000 4096 Sep 24 07:45 run
lrwxrwxrwx 1 20000 20000 8 Sep 24 07:43 sbin -> usr/sbin
drwxr-xr-x 2 20000 20000 4096 Sep 24 07:43 srv
drwxr-xr-x 2 20000 20000 4096 Apr 15 2020 sys
drwxrwxrwt 2 20000 20000 4096 Sep 24 07:44 tmp
drwxr-xr-x 13 20000 20000 4096 Sep 24 07:43 usr
drwxr-xr-x 12 20000 20000 4096 Sep 24 07:44 var
root@f2-vm:~# /mount-idmapped --map-mount b:20000:10000:100000 /var/lib/lxc/f2/rootfs/ /mnt
root@f2-vm:~# ls -al /mnt
total 68
drwxr-xr-x 17 10000 10000 4096 Sep 24 07:48 .
drwxr-xr-x 34 root root 4096 Oct 28 22:24 ..
lrwxrwxrwx 1 10000 10000 7 Sep 24 07:43 bin -> usr/bin
drwxr-xr-x 2 10000 10000 4096 Apr 15 2020 boot
drwxr-xr-x 3 10000 10000 4096 Oct 16 19:26 dev
drwxr-xr-x 61 10000 10000 4096 Oct 16 19:26 etc
drwxr-xr-x 3 10000 10000 4096 Sep 24 07:45 home
lrwxrwxrwx 1 10000 10000 7 Sep 24 07:43 lib -> usr/lib
lrwxrwxrwx 1 10000 10000 9 Sep 24 07:43 lib32 -> usr/lib32
lrwxrwxrwx 1 10000 10000 9 Sep 24 07:43 lib64 -> usr/lib64
lrwxrwxrwx 1 10000 10000 10 Sep 24 07:43 libx32 -> usr/libx32
drwxr-xr-x 2 10000 10000 4096 Sep 24 07:43 media
drwxr-xr-x 2 10000 10000 4096 Sep 24 07:43 mnt
drwxr-xr-x 2 10000 10000 4096 Sep 24 07:43 opt
drwxr-xr-x 2 10000 10000 4096 Apr 15 2020 proc
drwx------ 2 10000 10000 4096 Sep 24 07:43 root
drwxr-xr-x 2 10000 10000 4096 Sep 24 07:45 run
lrwxrwxrwx 1 10000 10000 8 Sep 24 07:43 sbin -> usr/sbin
drwxr-xr-x 2 10000 10000 4096 Sep 24 07:43 srv
drwxr-xr-x 2 10000 10000 4096 Apr 15 2020 sys
drwxrwxrwt 2 10000 10000 4096 Sep 24 07:44 tmp
drwxr-xr-x 13 10000 10000 4096 Sep 24 07:43 usr
drwxr-xr-x 12 10000 10000 4096 Sep 24 07:44 var
root@f2-vm:~# lxc-start f2 # uses /mnt as rootfs
root@f2-vm:~# lxc-attach f2 -- cat /proc/1/uid_map
0 10000 10000
root@f2-vm:~# lxc-attach f2 -- cat /proc/1/gid_map
0 10000 10000
root@f2-vm:~# lxc-attach f2 -- ls -al /
total 52
drwxr-xr-x 17 root root 4096 Sep 24 07:48 .
drwxr-xr-x 17 root root 4096 Sep 24 07:48 ..
lrwxrwxrwx 1 root root 7 Sep 24 07:43 bin -> usr/bin
drwxr-xr-x 2 root root 4096 Apr 15 2020 boot
drwxr-xr-x 5 root root 500 Oct 28 23:39 dev
drwxr-xr-x 61 root root 4096 Oct 28 23:39 etc
drwxr-xr-x 3 root root 4096 Sep 24 07:45 home
lrwxrwxrwx 1 root root 7 Sep 24 07:43 lib -> usr/lib
lrwxrwxrwx 1 root root 9 Sep 24 07:43 lib32 -> usr/lib32
lrwxrwxrwx 1 root root 9 Sep 24 07:43 lib64 -> usr/lib64
lrwxrwxrwx 1 root root 10 Sep 24 07:43 libx32 -> usr/libx32
drwxr-xr-x 2 root root 4096 Sep 24 07:43 media
drwxr-xr-x 2 root root 4096 Sep 24 07:43 mnt
drwxr-xr-x 2 root root 4096 Sep 24 07:43 opt
dr-xr-xr-x 232 nobody nogroup 0 Oct 28 23:39 proc
drwx------ 2 root root 4096 Oct 28 23:41 root
drwxr-xr-x 12 root root 360 Oct 28 23:39 run
lrwxrwxrwx 1 root root 8 Sep 24 07:43 sbin -> usr/sbin
drwxr-xr-x 2 root root 4096 Sep 24 07:43 srv
dr-xr-xr-x 13 nobody nogroup 0 Oct 28 23:39 sys
drwxrwxrwt 11 root root 4096 Oct 28 23:40 tmp
drwxr-xr-x 13 root root 4096 Sep 24 07:43 usr
drwxr-xr-x 12 root root 4096 Sep 24 07:44 var
root@f2-vm:~# lxc-attach f2 -- ls -al /my-file
-rw-r--r-- 1 root root 0 Oct 28 23:43 /my-file
root@f2-vm:~# ls -al /var/lib/lxc/f2/rootfs/my-file
-rw-r--r-- 1 20000 20000 0 Oct 28 23:43 /var/lib/lxc/f2/rootfs/my-file
[1]: https://systemd.io/HOME_DIRECTORY/
"If the UID assigned to a user does not match the owner of the home
directory in the file system, the home directory is automatically
and recursively chown()ed to the correct UID."
This has huge performance impact and is also problematic since it
chowns all files independent of ownership.
[2]: https://github.com/brauner/mount-idmapped
In no particular order I'd like to say thanks to:
Al for pointing me into the direction to avoid inode alias issues during
lookup. David for various discussions around this. Tycho for helping
with this series and on future patches if this is in any shape or form
acceptable. Alban Crequy for pointing out more application container
use-cases. Stéphane for various valuable input on various use-cases and
letting me work on this. Amir for explaining and discussing aspects of
overlayfs with me.
I'd like to especially thank Seth Forshee because he provided a lot of
good analysis, suggestions, and participated in short-notice discussions
in both chat and video.
This series can be found and pulled in three locations:
https://git.kernel.org/pub/scm/linux/kernel/git/brauner/linux.git/log/?h=...
https://github.com/brauner/linux/tree/idmapped_mounts
https://gitlab.com/brauner/linux/-/commits/idmapped_mounts
Thanks!
Christian
Christian Brauner (32):
namespace: take lock_mount_hash() directly when changing flags
namespace: only take read lock in do_reconfigure_mnt()
fs: add mount_setattr()
tests: add mount_setattr() selftests
fs: introduce MOUNT_ATTR_IDMAP
fs: add id translation helpers
capability: handle idmapped mounts
namei: add idmapped mount aware permission helpers
inode: add idmapped mount aware init and permission helpers
attr: handle idmapped mounts
acl: handle idmapped mounts
commoncap: handle idmapped mounts
stat: add mapped_generic_fillattr()
namei: handle idmapped mounts in may_*() helpers
namei: introduce struct renamedata
namei: prepare for idmapped mounts
namei: add lookup helpers with idmapped mounts aware permission
checking
open: handle idmapped mounts in do_truncate()
open: handle idmapped mounts
af_unix: handle idmapped mounts
utimes: handle idmapped mounts
would_dump: handle idmapped mounts
exec: handle idmapped mounts
fs: add helpers for idmap mounts
apparmor: handle idmapped mounts
audit: handle idmapped mounts
ima: handle idmapped mounts
ext4: support idmapped mounts
expfs: handle idmapped mounts
overlayfs: handle idmapped lower directories
overlayfs: handle idmapped merged mounts
fat: handle idmapped mounts
Tycho Andersen (2):
xattr: handle idmapped mounts
selftests: add idmapped mounts xattr selftest
arch/alpha/kernel/syscalls/syscall.tbl | 1 +
arch/arm/tools/syscall.tbl | 1 +
arch/arm64/include/asm/unistd32.h | 2 +
arch/ia64/kernel/syscalls/syscall.tbl | 1 +
arch/m68k/kernel/syscalls/syscall.tbl | 1 +
arch/microblaze/kernel/syscalls/syscall.tbl | 1 +
arch/mips/kernel/syscalls/syscall_n32.tbl | 1 +
arch/mips/kernel/syscalls/syscall_n64.tbl | 1 +
arch/mips/kernel/syscalls/syscall_o32.tbl | 1 +
arch/parisc/kernel/syscalls/syscall.tbl | 1 +
arch/powerpc/kernel/syscalls/syscall.tbl | 1 +
arch/s390/kernel/syscalls/syscall.tbl | 1 +
arch/sh/kernel/syscalls/syscall.tbl | 1 +
arch/sparc/kernel/syscalls/syscall.tbl | 1 +
arch/x86/entry/syscalls/syscall_32.tbl | 1 +
arch/x86/entry/syscalls/syscall_64.tbl | 1 +
arch/xtensa/kernel/syscalls/syscall.tbl | 1 +
fs/Kconfig | 6 +
fs/attr.c | 142 ++-
fs/coredump.c | 12 +-
fs/exec.c | 12 +-
fs/exportfs/expfs.c | 4 +-
fs/ext4/acl.c | 11 +-
fs/ext4/acl.h | 3 +
fs/ext4/ext4.h | 14 +-
fs/ext4/file.c | 4 +
fs/ext4/ialloc.c | 7 +-
fs/ext4/inode.c | 27 +-
fs/ext4/ioctl.c | 18 +-
fs/ext4/namei.c | 145 ++-
fs/ext4/super.c | 4 +
fs/ext4/symlink.c | 9 +
fs/ext4/xattr_hurd.c | 22 +-
fs/ext4/xattr_security.c | 18 +-
fs/ext4/xattr_trusted.c | 18 +-
fs/fat/fat.h | 2 +
fs/fat/file.c | 27 +-
fs/fat/namei_msdos.c | 7 +
fs/fat/namei_vfat.c | 7 +
fs/inode.c | 66 +-
fs/internal.h | 9 +
fs/namei.c | 597 ++++++++----
fs/namespace.c | 446 ++++++++-
fs/open.c | 52 +-
fs/overlayfs/copy_up.c | 104 +-
fs/overlayfs/dir.c | 219 +++--
fs/overlayfs/export.c | 3 +-
fs/overlayfs/file.c | 23 +-
fs/overlayfs/inode.c | 121 ++-
fs/overlayfs/namei.c | 64 +-
fs/overlayfs/overlayfs.h | 158 +++-
fs/overlayfs/ovl_entry.h | 1 +
fs/overlayfs/readdir.c | 34 +-
fs/overlayfs/super.c | 109 ++-
fs/overlayfs/util.c | 38 +-
fs/posix_acl.c | 130 ++-
fs/stat.c | 18 +-
fs/utimes.c | 4 +-
fs/xattr.c | 264 ++++--
include/linux/audit.h | 10 +-
include/linux/capability.h | 12 +-
include/linux/fs.h | 254 ++++-
include/linux/ima.h | 15 +-
include/linux/lsm_hook_defs.h | 10 +-
include/linux/lsm_hooks.h | 1 +
include/linux/mount.h | 20 +-
include/linux/namei.h | 6 +
include/linux/posix_acl.h | 14 +-
include/linux/posix_acl_xattr.h | 12 +-
include/linux/security.h | 36 +-
include/linux/syscalls.h | 3 +
include/linux/xattr.h | 29 +
include/uapi/asm-generic/unistd.h | 4 +-
include/uapi/linux/mount.h | 26 +
ipc/mqueue.c | 8 +-
kernel/auditsc.c | 29 +-
kernel/capability.c | 22 +-
net/unix/af_unix.c | 2 +-
security/apparmor/domain.c | 9 +-
security/apparmor/file.c | 5 +-
security/apparmor/lsm.c | 12 +-
security/commoncap.c | 50 +-
security/integrity/ima/ima.h | 19 +-
security/integrity/ima/ima_api.c | 10 +-
security/integrity/ima/ima_appraise.c | 14 +-
security/integrity/ima/ima_asymmetric_keys.c | 2 +-
security/integrity/ima/ima_main.c | 28 +-
security/integrity/ima/ima_policy.c | 17 +-
security/integrity/ima/ima_queue_keys.c | 2 +-
security/security.c | 18 +-
security/selinux/hooks.c | 13 +-
security/smack/smack_lsm.c | 11 +-
tools/include/uapi/asm-generic/unistd.h | 4 +-
tools/testing/selftests/Makefile | 1 +
.../testing/selftests/idmap_mounts/.gitignore | 1 +
tools/testing/selftests/idmap_mounts/Makefile | 8 +
tools/testing/selftests/idmap_mounts/config | 1 +
tools/testing/selftests/idmap_mounts/xattr.c | 389 ++++++++
.../selftests/mount_setattr/.gitignore | 1 +
.../testing/selftests/mount_setattr/Makefile | 7 +
tools/testing/selftests/mount_setattr/config | 1 +
.../mount_setattr/mount_setattr_test.c | 888 ++++++++++++++++++
102 files changed, 4109 insertions(+), 912 deletions(-)
create mode 100644 tools/testing/selftests/idmap_mounts/.gitignore
create mode 100644 tools/testing/selftests/idmap_mounts/Makefile
create mode 100644 tools/testing/selftests/idmap_mounts/config
create mode 100644 tools/testing/selftests/idmap_mounts/xattr.c
create mode 100644 tools/testing/selftests/mount_setattr/.gitignore
create mode 100644 tools/testing/selftests/mount_setattr/Makefile
create mode 100644 tools/testing/selftests/mount_setattr/config
create mode 100644 tools/testing/selftests/mount_setattr/mount_setattr_test.c
base-commit: 3650b228f83adda7e5ee532e2b90429c03f7b9ec
--
2.29.0
4 years, 1 month
[PATCH v21 00/23] LSM: Module stacking for AppArmor
by Casey Schaufler
This patchset provides the changes required for
the AppArmor security module to stack safely with any other.
v21: Rebase to 5.9-rc4
Incorporate feedback from v20
- Further revert UDS SO_PEERSEC to use scaffolding around
the interfaces that use lsmblobs and store only a single
secid. The possibility of multiple security modules
requiring data here is still a future problem.
- Incorporate Richard Guy Briggs' non-syscall auxiliary
records patch (patch 0019-0021) in place of my "supplimental"
records implementation. [I'm not sure I've given proper
attestation. I will correct as appropriate]
v20: Rebase to 5.9-rc1
Change the BPF security module to use the lsmblob data. (patch 0002)
Repair length logic in subject label processing (patch 0015)
Handle -EINVAL from the empty BPF setprocattr hook (patch 0020)
Correct length processing in append_ctx() (patch 0022)
v19: Rebase to 5.8-rc6
Incorporate feedback from v18
- Revert UDS SO_PEERSEC implementation to use lsmblobs
directly, rather than allocating as needed. The correct
treatment of out-of-memory conditions in the later case
is difficult to define. (patch 0005)
- Use a size_t in append_ctx() (patch 0021)
- Fix a memory leak when creating compound contexts. (patch 0021)
Fix build error when CONFIG_SECURITY isn't set (patch 0013)
Fix build error when CONFIG_SECURITY isn't set (patch 0020)
Fix build error when CONFIG_SECURITY isn't set (patch 0021)
v18: Rebase to 5.8-rc3
Incorporate feedback from v17
- Null pointer checking in UDS (patch 0005)
Match changes in IMA code (patch 0012)
Fix the behavior of LSM context supplimental audit
records so that there's always exactly one when it's
appropriate for there to be one. This is a substantial
change that requires extention of the audit_context beyond
syscall events. (patch 0020)
v17: Rebase to 5.7-rc4
v16: Rebase to 5.6
Incorporate feedback from v15 - Thanks Stephen, Mimi and Paul
- Generally improve commit messages WRT scaffolding
- Comment ima_lsm_isset() (patch 0002)
- Some question may remain on IMA warning (patch 0002)
- Mark lsm_slot as __lsm_ro_after_init not __init_data (patch 0002)
- Change name of lsmblob variable in ima_match_rules() (patch 0003)
- Instead of putting a struct lsmblob into the unix_skb_parms
structure put a pointer to an allocated instance. There is
currently only space for 5 u32's in unix_skb_parms and it is
likely to get even tighter. Fortunately, the lifecycle
management of the allocated lsmblob is simple. (patch 0005)
- Dropped Acks due to the above change (patch 0005)
- Improved commentary on secmark labeling scaffolding. (patch 0006)
- Reduced secmark related labeling scaffolding. (patch 0006)
- Replace use of the zeroth entry of an lsmblob in scaffolding
with a function lsmblob_value() to hopefully make it less
obscure. (patch 0006)
- Convert security_secmark_relabel_packet to use lsmblob as
this reduces much of the most contentious scaffolding. (patch 0006)
- Dropped Acks due to the above change (patch 0006)
- Added BUILD_BUG_ON() for CIPSO tag 6. (patch 0018)
- Reworked audit subject information. Instead of adding fields in
the middle of existing records add a new record to the event. When
a separate record is required use subj="?". (patch 0020)
- Dropped Acks due to the above change (patch 0020)
- Reworked audit object information. Instead of adding fields in
the middle of existing records add a new record to the event. When
a separate record is required use obj="?". (patch 0021)
- Dropped Acks due to the above change (patch 0021)
- Enhanced documentation (patch 0022)
- Removed unnecessary error code check in security_getprocattr()
(patch 0021)
v15: Rebase to 5.6-rc1
- Revise IMA data use (patch 0002)
Incorporate feedback from v14
- Fix lockdown module registration naming (patch 0002)
- Revise how /proc/self/attr/context is gathered. (patch 0022)
- Revise access modes on /proc/self/attr/context. (patch 0022)
- Revise documentation on LSM external interfaces. (patch 0022)
v14: Rebase to 5.5-rc5
Incorporate feedback from v13
- Use an array of audit rules (patch 0002)
- Significant change, removed Acks (patch 0002)
- Remove unneeded include (patch 0013)
- Use context.len correctly (patch 0015)
- Reorder code to be more sensible (patch 0016)
- Drop SO_PEERCONTEXT as it's not needed yet (patch 0023)
v13: Rebase to 5.5-rc2
Incorporate feedback from v12
- Print lsmblob size with %z (Patch 0002)
- Convert lockdown LSM initialization. (Patch 0002)
- Restore error check in nft_secmark_compute_secid (Patch 0006)
- Correct blob scaffolding in ima_must_appraise() (Patch 0009)
- Make security_setprocattr() clearer (Patch 0013)
- Use lsm_task_display more widely (Patch 0013)
- Use passed size in lsmcontext_init() (Patch 0014)
- Don't add a smack_release_secctx() hook (Patch 0014)
- Don't print warning in security_release_secctx() (Patch 0014)
- Don't duplicate the label in nfs4_label_init_security() (Patch 0016)
- Remove reviewed-by as code has significant change (Patch 0016)
- Send the entire lsmblob for Tag 6 (Patch 0019)
- Fix description of socket_getpeersec_stream parameters (Patch 0023)
- Retain LSMBLOB_FIRST. What was I thinking? (Patch 0023)
- Add compound context to LSM documentation (Patch 0023)
v12: Rebase to 5.5-rc1
Fixed a couple of incorrect contractions in the text.
v11: Rebase to 5.4-rc6
Incorporate feedback from v10
- Disambiguate reading /proc/.../attr/display by restricting
all use of the interface to the current process.
- Fix a merge error in AppArmor's display attribute check
v10: Ask the security modules if the display can be changed.
v9: There is no version 9
v8: Incorporate feedback from v7
- Minor clean-up in display value management
- refactor "compound" context creation to use a common
append_ctx() function.
v7: Incorporate feedback from v6
- Make setting the display a privileged operation. The
availability of compound contexts reduces the need for
setting the display.
v6: Incorporate feedback from v5
- Add subj_<lsm>= and obj_<lsm>= fields to audit records
- Add /proc/.../attr/context to get the full context in
lsmname\0value\0... format as suggested by Simon McVittie
- Add SO_PEERCONTEXT for getsockopt() to get the full context
in the same format, also suggested by Simon McVittie.
- Add /sys/kernel/security/lsm_display_default to provide
the display default value.
v5: Incorporate feedback from v4
- Initialize the lsmcontext in security_secid_to_secctx()
- Clear the lsmcontext in all security_release_secctx() cases
- Don't use the "display" on strictly internal context
interfaces.
- The SELinux binder hooks check for cases where the context
"display" isn't compatible with SELinux.
v4: Incorporate feedback from v3
- Mark new lsm_<blob>_alloc functions static
- Replace the lsm and slot fields of the security_hook_list
with a pointer to a LSM allocated lsm_id structure. The
LSM identifies if it needs a slot explicitly. Use the
lsm_id rather than make security_add_hooks return the
slot value.
- Validate slot values used in security.c
- Reworked the "display" process attribute handling so that
it works right and doesn't use goofy list processing.
- fix display value check in dentry_init_security
- Replace audit_log of secids with '?' instead of deleting
the audit log
v3: Incorporate feedback from v2
- Make lsmblob parameter and variable names more
meaningful, changing "le" and "l" to "blob".
- Improve consistency of constant naming.
- Do more sanity checking during LSM initialization.
- Be a bit clearer about what is temporary scaffolding.
- Rather than clutter security_getpeersec_dgram with
otherwise unnecessary checks remove the apparmor
stub, which does nothing useful.
Patch 0001 moves management of the sock security blob
from the individual modules to the infrastructure.
Patches 0002-0011 replace system use of a "secid" with
a structure "lsmblob" containing information from the
security modules to be held and reused later. At this
point lsmblob contains an array of u32 secids, one "slot"
for each of the security modules compiled into the
kernel that used secids. A "slot" is allocated when
a security module requests one.
The infrastructure is changed to use the slot number
to pass the correct secid to or from the security module
hooks.
It is important that the lsmblob be a fixed size entity
that does not have to be allocated. Several of the places
where it is used would have performance and/or locking
issues with dynamic allocation.
Patch 0012 provides a mechanism for a process to
identify which security module's hooks should be used
when displaying or converting a security context string.
A new interface /proc/self/attr/display contains the name
of the security module to show. Reading from this file
will present the name of the module, while writing to
it will set the value. Only names of active security
modules are accepted. Internally, the name is translated
to the appropriate "slot" number for the module which
is then stored in the task security blob. Setting the
display requires that all modules using the /proc interfaces
allow the transition. The "display" of other processess
can be neither read nor written. All suggested cases
for reading the display of a different process have race
conditions.
Patch 0013 Starts the process of changing how a security
context is represented. Since it is possible for a
security context to have been generated by more than one
security module it is now necessary to note which module
created a security context so that the correct "release"
hook can be called. There are several places where the
module that created a security context cannot be inferred.
This is achieved by introducing a "lsmcontext" structure
which contains the context string, its length and the
"slot" number of the security module that created it.
The security_release_secctx() interface is changed,
replacing the (string,len) pointer pair with a lsmcontext
pointer.
Patches 0014-0016 convert the security interfaces from
(string,len) pointer pairs to a lsmcontext pointer.
The slot number identifying the creating module is
added by the infrastructure. Where the security context
is stored for extended periods the data type is changed.
The Netlabel code is converted to save lsmblob structures
instead of secids in Patch 0017. This is not strictly
necessary as there can only be one security module that
uses Netlabel at this point. Using a lsmblob is much
cleaner, as the interfaces that use the data have all
been converted.
Patch 0018 adds checks to the binder hooks which verify
that both ends of a transaction use the same "display".
Patches 0019-0021 add addition audit records for subject
and object LSM data when there are multiple security modules
with such data. The AUDIT_MAC_TASK_CONTEXTS record is
used in conjuction with a "subj=?" field to identify the
subject data. The AUDIT_MAC_OBJ_CONTEXTS record is used in
conjuction with a "obj=?" field to identify the object data.
The AUDIT_MAC_TASK_CONTEXTS record identifies the security
module with the data: "subj_selinux=xyz_t subj_apparmor=abc".
The AUDIT_MAC_OBJ_CONTEXTS record identifies the security
module with the data: "obj_selinux=xyz_t obj_apparmor=abc".
While AUDIT_MAC_TASK_CONTEXTS records will always contain
an entry for each possible security modules, AUDIT_MAC_OBJ_CONTEXTS
records will only contain entries for security modules for
which the object in question has data.
An example of the MAC_TASK_CONTEXTS (1420) record is:
type=UNKNOWN[1420]
msg=audit(1600880931.832:113)
subj_apparmor==unconfined
subj_smack=_
An example of the MAC_OBJ_CONTEXTS (1421) record is:
type=UNKNOWN[1421]
msg=audit(1601152467.009:1050):
obj_selinux=unconfined_u:object_r:user_home_t:s0
Patch 0022 adds a new interface for getting the
compound security contexts, /proc/self/attr/context.
An example of the content of this file is:
selinux\0one_u:one_r:one_t:s0-s0:c0.c1023\0apparmor\0unconfined\0
Finally, with all interference on the AppArmor hooks
removed, Patch 0023 removes the exclusive bit from
AppArmor. An unnecessary stub hook was also removed.
The Ubuntu project is using an earlier version of
this patchset in their distribution to enable stacking
for containers.
Performance measurements to date have the change
within the "noise". The sockperf and dbench results
are on the order of 0.2% to 0.8% difference, with
better performance being as common as worse. The
benchmarks were run with AppArmor and Smack on Ubuntu.
https://github.com/cschaufler/lsm-stacking.git#stack-5.9-rc4-v21
Signed-off-by: Casey Schaufler <casey(a)schaufler-ca.com>
---
4 years, 1 month
[PATCH ghak90 V9 00/13] audit: implement container identifier
by Richard Guy Briggs
Implement kernel audit container identifier.
This patchset is an eighth based on the proposal document (V4) posted:
https://www.redhat.com/archives/linux-audit/2019-September/msg00052.html
The first patch was the last patch from ghak81 that was absorbed into
this patchset since its primary justification is the rest of this
patchset.
The second patch implements the proc fs write to set the audit container
identifier of a process, emitting an AUDIT_CONTAINER_OP record to
announce the registration of that audit container identifier on that
process. This patch requires userspace support for record acceptance
and proper type display. This patch now includes the conversion
over from a simple u64 to a list member that includes owner information
to check for descendancy, allow process injection into a container and
prevent id reuse by other orchestrators.
The third implements reading the audit container identifier from the
proc filesystem for debugging. This patch wasn't planned for upstream
inclusion but is starting to become more likely.
The fourth logs the drop of an audit container identifier once all tasks
using that audit container identifier have exited.
The 5th implements the auxiliary record AUDIT_CONTAINER_ID if an audit
container identifier is associated with an event. This patch requires
userspace support for proper type display.
The 6th adds audit daemon signalling provenance through audit_sig_info2.
The 7th creates a local audit context to be able to bind a standalone
record with a locally created auxiliary record.
The 8th patch adds audit container identifier records to the user
standalone records.
The 9th adds audit container identifier filtering to the exit,
exclude and user lists. This patch adds the AUDIT_CONTID field and
requires auditctl userspace support for the --contid option.
The 10th adds network namespace audit container identifier labelling
based on member tasks' audit container identifier labels which supports
standalone netfilter records that don't have a task context and lists
each container to which that net namespace belongs.
The 11th checks that the target is a descendant for nesting and
refactors to avoid a duplicate of the copied function.
The 12th adds tracking and reporting for container nesting.
This enables kernel filtering and userspace searches of nested audit
container identifiers.
The 13th adds a mechanism to allow a process to be designated as a
container orchestrator/engine in non-init user namespaces.
Example: Set an audit container identifier of 123456 to the "sleep" task:
sleep 2&
child=$!
echo 123456 > /proc/$child/audit_containerid; echo $?
ausearch -ts recent -m container_op
echo child:$child contid:$( cat /proc/$child/audit_containerid)
This should produce a record such as:
type=CONTAINER_OP msg=audit(2018-06-06 12:39:29.636:26949) : op=set opid=2209 contid=123456 old-contid=18446744073709551615
Example: Set a filter on an audit container identifier 123459 on /tmp/tmpcontainerid:
contid=123459
key=tmpcontainerid
auditctl -a exit,always -F dir=/tmp -F perm=wa -F contid=$contid -F key=$key
perl -e "sleep 1; open(my \$tmpfile, '>', \"/tmp/$key\"); close(\$tmpfile);" &
child=$!
echo $contid > /proc/$child/audit_containerid
sleep 2
ausearch -i -ts recent -k $key
auditctl -d exit,always -F dir=/tmp -F perm=wa -F contid=$contid -F key=$key
rm -f /tmp/$key
This should produce an event such as:
type=CONTAINER_ID msg=audit(2018-06-06 12:46:31.707:26953) : contid=123459
type=PROCTITLE msg=audit(2018-06-06 12:46:31.707:26953) : proctitle=perl -e sleep 1; open(my $tmpfile, '>', "/tmp/tmpcontainerid"); close($tmpfile);
type=PATH msg=audit(2018-06-06 12:46:31.707:26953) : item=1 name=/tmp/tmpcontainerid inode=25656 dev=00:26 mode=file,644 ouid=root ogid=root rdev=00:00 obj=unconfined_u:object_r:user_tmp_t:s0 nametype=CREATE cap_fp=none cap_fi=none cap_fe=0 cap_fver=0
type=PATH msg=audit(2018-06-06 12:46:31.707:26953) : item=0 name=/tmp/ inode=8985 dev=00:26 mode=dir,sticky,777 ouid=root ogid=root rdev=00:00 obj=system_u:object_r:tmp_t:s0 nametype=PARENT cap_fp=none cap_fi=none cap_fe=0 cap_fver=0
type=CWD msg=audit(2018-06-06 12:46:31.707:26953) : cwd=/root
type=SYSCALL msg=audit(2018-06-06 12:46:31.707:26953) : arch=x86_64 syscall=openat success=yes exit=3 a0=0xffffffffffffff9c a1=0x5621f2b81900 a2=O_WRONLY|O_CREAT|O_TRUNC a3=0x1b6 items=2 ppid=628 pid=2232 auid=root uid=root gid=root euid=root suid=root fsuid=root egid=root sgid=root fsgid=root tty=ttyS0 ses=1 comm=perl exe=/usr/bin/perl subj=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023 key=tmpcontainerid
Example: Test multiple containers on one netns:
sleep 5 &
child1=$!
containerid1=123451
echo $containerid1 > /proc/$child1/audit_containerid
sleep 5 &
child2=$!
containerid2=123452
echo $containerid2 > /proc/$child2/audit_containerid
iptables -I INPUT -i lo -p icmp --icmp-type echo-request -j AUDIT --type accept
iptables -I INPUT -t mangle -i lo -p icmp --icmp-type echo-request -j MARK --set-mark 0x12345555
sleep 1;
bash -c "ping -q -c 1 127.0.0.1 >/dev/null 2>&1"
sleep 1;
ausearch -i -m NETFILTER_PKT -ts boot|grep mark=0x12345555
ausearch -i -m NETFILTER_PKT -ts boot|grep contid=|grep $containerid1|grep $containerid2
This would produce an event such as:
type=NETFILTER_PKT msg=audit(03/15/2019 14:16:13.369:244) : mark=0x12345555 saddr=127.0.0.1 daddr=127.0.0.1 proto=icmp
type=CONTAINER_ID msg=audit(03/15/2019 14:16:13.369:244) : contid=123452,123451
Includes the last patch of https://github.com/linux-audit/audit-kernel/issues/81
Please see the github audit kernel issue for the main feature:
https://github.com/linux-audit/audit-kernel/issues/90
and the kernel filter code:
https://github.com/linux-audit/audit-kernel/issues/91
and the network support:
https://github.com/linux-audit/audit-kernel/issues/92
Please see the github audit userspace issue for supporting record types:
https://github.com/linux-audit/audit-userspace/issues/51
and filter code:
https://github.com/linux-audit/audit-userspace/issues/40
Please see the github audit testsuiite issue for the test case:
https://github.com/linux-audit/audit-testsuite/issues/64
https://github.com/rgbriggs/audit-testsuite/tree/ghat64-contid
https://githu.com/linux-audit/audit-testsuite/pull/91
Please see the github audit wiki for the feature overview:
https://github.com/linux-audit/audit-kernel/wiki/RFE-Audit-Container-ID
The code is also posted at:
git://toccata2.tricolour.ca/linux-2.6-rgb.git ghak90-audit-containerID.v9
Changelog:
v9
- rebase on v5.8-rc1
- fix whitespace and oversize lines where practicable
- remove harmless duplicate S_IRUSR in capcontid
- return -EBUSY for both threading and children (drop -EALREADY)
- return -EEXIST if already set and not nesting (drop -ECHILD)
- fix unbalanced brace and remove elseif ladder
- drop check for same contid set again as redundant (drop -EADDRINUSE)
- get reference to contobj's parent taskstruct
- protect all contid list updates with audit_contobj_list_lock
- protect refcounts with rcu read lock
- convert _audit_contobj to _audit_contobj_get, which calls _audit_contobj_hold
- convert audit_log_container_id() and audit_log_contid() from u64 to contobj, simplifying
- issue death certificate on contid after exit of last task
- keep contobj ref to block reuse with -ESHUTDOWN until auditd exit or signal info
- report all contids nested
- rework sig_info2 format to accomodate contid list
- fix zero-length array in include/linux/audit.h struct audit_sig_info2 data[]
- found bug in audit_alloc_local, don't check audit_ever_enabled, since all callers check audit_enabled
- remove warning at declaration of audit_sig_cid of reuse since reuse is now blocked
- report descendancy checking errcodes under -EXDEV (drop -EBADSLT)
- add missed check, replace audit_contid_isowner with audit_contid_isnesting
- limit calls to audit_log_format() with if(iter->parent) ...
- list only one contid in contid, nested in old-contid to avoid duplication
- switch to comma delimiter, carrat modifier in nested contid list
- special case -1 for AUDIT_CID_UNSET printing
- drop contid depth limit and netns contid limit patches
- enforce capcontid policy on contid write and read
- squash conversion to contobj into contid intro patch
v8
- rebase on v5.5-rc1 audit/next
- remove subject attrs in CONTAINER_OP record
- group audit_contid_list_lock with audit_contid_hash
- in audit_{set,log}_contid(), break out of loop after finding target
- use target var to size kmalloc
- rework audit_cont_owner() to bool audit_contid_isowner() and move to where used
- create static void audit_cont_hold(struct audit_contobj *cont) { refcount_inc(&cont->refcount); }
- rename audit_cont{,_*} refs to audit_contobj{,_*}
- prefix special local functions with _ [audit_contobj*()]
- protect contid list traversals with rcu_read_lock() and updates with audit_contid_list_lock
- protect real_parent in audit_contid_depth() with rcu_dereference
- give new contid field nesting format in patch description
- squash task_is_descendant()
- squash support for NETFILTER_PKT into network namespaces
- limit nesting depth based on record length overflow, bandwidth and storage
- implent control for audit container identifier nesting depth limit
- make room for audit_bpf patches (bump CONTAINER_ID to 1335)
- squash proc interface into capcontid
- remove netlink access to loginuid/sessionid/contid/capcontid
- delete 32k contid limit patch
- document potential overlap between signal delivery and contid reuse
- document audit_contobj_list_lock coverage
- document disappearing orch task injection limitation
- limit the number of containers that can be associated with a network namespace
- implent control for audit container identifier netns count limit
v7
- remove BUG() in audit_comparator64()
- rebase on v5.2-rc1 audit/next
- resolve merge conflict with ghak111 (signal_info regardless syscall)
- resolve merge conflict with ghak73 (audit_field_valid)
- resolve merge conflict with ghak64 (saddr_fam filter)
- resolve merge conflict with ghak10 (ntp audit) change AUDIT_CONTAINER_ID from 1332 to 1334
- rebase on v5.3-rc1 audit/next
- track container owner
- only permit setting contid of descendants for nesting
- track drop of contid and permit reuse
- track and report container nesting
- permit filtering on any nested contid
- set/get contid and loginuid/sessionid via netlink
- implement capcontid to enable orchestrators in non-init user
namespaces
- limit number of containers
- limit depth of container nesting
v6
- change TMPBUFLEN from 11 to 21 to cover the decimal value of contid
u64 (nhorman)
- fix bug overwriting ctx in struct audit_sig_info, move cid above
ctx[0] (nhorman)
- fix bug skipping remaining fields and not advancing bufp when copying
out contid in audit_krule_to_data (omosnacec)
- add acks, tidy commit descriptions, other formatting fixes (checkpatch
wrong on audit_log_lost)
- cast ull for u64 prints
- target_cid tracking was moved from the ptrace/signal patch to
container_op
- target ptrace and signal records were moved from the ptrace/signal
patch to container_id
- auditd signaller tracking was moved to a new AUDIT_SIGNAL_INFO2
request and record
- ditch unnecessary list_empty() checks
- check for null net and aunet in audit_netns_contid_add()
- swap CONTAINER_OP contid/old-contid order to ease parsing
v5
- address loginuid and sessionid syscall scope in ghak104
- address audit_context in CONFIG_AUDIT vs CONFIG_AUDITSYSCALL in ghak105
- remove tty patch, addressed in ghak106
- rebase on audit/next v5.0-rc1
w/ghak59/ghak104/ghak103/ghak100/ghak107/ghak105/ghak106/ghak105sup
- update CONTAINER_ID to CONTAINER_OP in patch description
- move audit_context in audit_task_info to CONFIG_AUDITSYSCALL
- move audit_alloc() and audit_free() out of CONFIG_AUDITSYSCALL and into
CONFIG_AUDIT and create audit_{alloc,free}_syscall
- use plain kmem_cache_alloc() rather than kmem_cache_zalloc() in audit_alloc()
- fix audit_get_contid() declaration type error
- move audit_set_contid() from auditsc.c to audit.c
- audit_log_contid() returns void
- audit_log_contid() handed contid rather than tsk
- switch from AUDIT_CONTAINER to AUDIT_CONTAINER_ID for aux record
- move audit_log_contid(tsk/contid) & audit_contid_set(tsk)/audit_contid_valid(contid)
- switch from tsk to current
- audit_alloc_local() calls audit_log_lost() on failure to allocate a context
- add AUDIT_USER* non-syscall contid record
- cosmetic cleanup double parens, goto out on err
- ditch audit_get_ns_contid_list_lock(), fix aunet lock race
- switch from all-cpu read spinlock to rcu, keep spinlock for write
- update audit_alloc_local() to use ktime_get_coarse_real_ts64()
- add nft_log support
- add call from do_exit() in audit_free() to remove contid from netns
- relegate AUDIT_CONTAINER ref= field (was op=) to debug patch
v4
- preface set with ghak81:"collect audit task parameters"
- add shallyn and sgrubb acks
- rename feature bitmap macro
- rename cid_valid() to audit_contid_valid()
- rename AUDIT_CONTAINER_ID to AUDIT_CONTAINER_OP
- delete audit_get_contid_list() from headers
- move work into inner if, delete "found"
- change netns contid list function names
- move exports for audit_log_contid audit_alloc_local audit_free_context to non-syscall patch
- list contids CSV
- pass in gfp flags to audit_alloc_local() (fix audit_alloc_context callers)
- use "local" in lieu of abusing in_syscall for auditsc_get_stamp()
- read_lock(&tasklist_lock) around children and thread check
- task_lock(tsk) should be taken before first check of tsk->audit
- add spin lock to contid list in aunet
- restrict /proc read to CAP_AUDIT_CONTROL
- remove set again prohibition and inherited flag
- delete contidion spelling fix from patchset, send to netdev/linux-wireless
v3
- switched from containerid in task_struct to audit_task_info (depends on ghak81)
- drop INVALID_CID in favour of only AUDIT_CID_UNSET
- check for !audit_task_info, throw -ENOPROTOOPT on set
- changed -EPERM to -EEXIST for parent check
- return AUDIT_CID_UNSET if !audit_enabled
- squash child/thread check patch into AUDIT_CONTAINER_ID patch
- changed -EPERM to -EBUSY for child check
- separate child and thread checks, use -EALREADY for latter
- move addition of op= from ptrace/signal patch to AUDIT_CONTAINER patch
- fix && to || bashism in ptrace/signal patch
- uninline and export function for audit_free_context()
- drop CONFIG_CHANGE, FEATURE_CHANGE, ANOM_ABEND, ANOM_SECCOMP patches
- move audit_enabled check (xt_AUDIT)
- switched from containerid list in struct net to net_generic's struct audit_net
- move containerid list iteration into audit (xt_AUDIT)
- create function to move namespace switch into audit
- switched /proc/PID/ entry from containerid to audit_containerid
- call kzalloc with GFP_ATOMIC on in_atomic() in audit_alloc_context()
- call kzalloc with GFP_ATOMIC on in_atomic() in audit_log_container_info()
- use xt_net(par) instead of sock_net(skb->sk) to get net
- switched record and field names: initial CONTAINER_ID, aux CONTAINER, field CONTID
- allow to set own contid
- open code audit_set_containerid
- add contid inherited flag
- ccontainerid and pcontainerid eliminated due to inherited flag
- change name of container list funcitons
- rename containerid to contid
- convert initial container record to syscall aux
- fix spelling mistake of contidion in net/rfkill/core.c to avoid contid name collision
v2
- add check for children and threads
- add network namespace container identifier list
- add NETFILTER_PKT audit container identifier logging
- patch description and documentation clean-up and example
- reap unused ppid
Richard Guy Briggs (13):
audit: collect audit task parameters
audit: add container id
audit: read container ID of a process
audit: log drop of contid on exit of last task
audit: log container info of syscalls
audit: add contid support for signalling the audit daemon
audit: add support for non-syscall auxiliary records
audit: add containerid support for user records
audit: add containerid filtering
audit: add support for containerid to network namespaces
audit: contid check descendancy and nesting
audit: track container nesting
audit: add capcontid to set contid outside init_user_ns
fs/proc/base.c | 112 +++++++-
include/linux/audit.h | 135 +++++++++-
include/linux/sched.h | 10 +-
include/uapi/linux/audit.h | 10 +-
init/init_task.c | 3 +-
init/main.c | 2 +
kernel/audit.c | 621 +++++++++++++++++++++++++++++++++++++++++++-
kernel/audit.h | 23 ++
kernel/auditfilter.c | 61 +++++
kernel/auditsc.c | 110 ++++++--
kernel/fork.c | 1 -
kernel/nsproxy.c | 4 +
kernel/sched/core.c | 33 +++
net/netfilter/nft_log.c | 11 +-
net/netfilter/xt_AUDIT.c | 11 +-
security/selinux/nlmsgtab.c | 1 +
security/yama/yama_lsm.c | 33 ---
17 files changed, 1085 insertions(+), 96 deletions(-)
--
1.8.3.1
4 years, 1 month
[PATCH ghak120 V5] audit: trigger accompanying records when no rules present
by Richard Guy Briggs
When there are no audit rules registered, mandatory records (config,
etc.) are missing their accompanying records (syscall, proctitle, etc.).
This is due to audit context dummy set on syscall entry based on absence
of rules that signals that no other records are to be printed. Clear the dummy
bit if any record is generated, open coding this in audit_log_start().
The proctitle context and dummy checks are pointless since the
proctitle record will not be printed if no syscall records are printed.
The fds array is reset to -1 after the first syscall to indicate it
isn't valid any more, but was never set to -1 when the context was
allocated to indicate it wasn't yet valid.
Check ctx->pwd in audit_log_name().
The audit_inode* functions can be called without going through
getname_flags() or getname_kernel() that sets audit_names and cwd, so
set the cwd in audit_alloc_name() if it has not already been done so due to
audit_names being valid and purge all other audit_getcwd() calls.
Revert the LSM dump_common_audit_data() LSM_AUDIT_DATA_* cases from the
ghak96 patch since they are no longer necessary due to cwd coverage in
audit_alloc_name().
Thanks to bauen1 <j2468h(a)googlemail.com> for reporting LSM situations in
which context->cwd is not valid, inadvertantly fixed by the ghak96 patch.
Please see upstream github issue
https://github.com/linux-audit/audit-kernel/issues/120
This is also related to upstream github issue
https://github.com/linux-audit/audit-kernel/issues/96
Signed-off-by: Richard Guy Briggs <rgb(a)redhat.com>
---
Chagelog:
v5:
- open code audit_clear_dummy() in audit_log_start()
- fix check for ctx->pwd in audit_log_name()
- open code _audit_getcwd() contents in audit_alloc_name()
- ditch all *audit_getcwd() calls
v4:
- resubmit after revert
v3:
- initialize fds[0] to -1
- init cwd for ghak96 LSM_AUDIT_DATA_NET:AF_UNIX case
- init cwd for audit_inode{,_child}
v2:
- unconditionally clear dummy
- create audit_clear_dummy accessor function
- remove proctitle context and dummy checks
include/linux/audit.h | 8 --------
kernel/audit.c | 3 +++
kernel/auditsc.c | 27 +++++++--------------------
security/lsm_audit.c | 5 -----
4 files changed, 10 insertions(+), 33 deletions(-)
diff --git a/include/linux/audit.h b/include/linux/audit.h
index b3d859831a31..82b7c1116a85 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -292,7 +292,6 @@ extern void __audit_syscall_entry(int major, unsigned long a0, unsigned long a1,
extern void __audit_syscall_exit(int ret_success, long ret_value);
extern struct filename *__audit_reusename(const __user char *uptr);
extern void __audit_getname(struct filename *name);
-extern void __audit_getcwd(void);
extern void __audit_inode(struct filename *name, const struct dentry *dentry,
unsigned int flags);
extern void __audit_file(const struct file *);
@@ -351,11 +350,6 @@ static inline void audit_getname(struct filename *name)
if (unlikely(!audit_dummy_context()))
__audit_getname(name);
}
-static inline void audit_getcwd(void)
-{
- if (unlikely(audit_context()))
- __audit_getcwd();
-}
static inline void audit_inode(struct filename *name,
const struct dentry *dentry,
unsigned int aflags) {
@@ -584,8 +578,6 @@ static inline struct filename *audit_reusename(const __user char *name)
}
static inline void audit_getname(struct filename *name)
{ }
-static inline void audit_getcwd(void)
-{ }
static inline void audit_inode(struct filename *name,
const struct dentry *dentry,
unsigned int aflags)
diff --git a/kernel/audit.c b/kernel/audit.c
index 68cee3bc8cfe..dd9d22ba4fd2 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -1865,6 +1865,9 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask,
}
audit_get_stamp(ab->ctx, &t, &serial);
+ /* cancel dummy context to enable supporting records */
+ if (ctx)
+ ctx->dummy = 0;
audit_log_format(ab, "audit(%llu.%03lu:%u): ",
(unsigned long long)t.tv_sec, t.tv_nsec/1000000, serial);
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 8dba8f0983b5..183d79cc2e12 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -929,6 +929,7 @@ static inline struct audit_context *audit_alloc_context(enum audit_state state)
context->prio = state == AUDIT_RECORD_CONTEXT ? ~0ULL : 0;
INIT_LIST_HEAD(&context->killed_trees);
INIT_LIST_HEAD(&context->names_list);
+ context->fds[0] = -1;
return context;
}
@@ -1367,7 +1368,10 @@ static void audit_log_name(struct audit_context *context, struct audit_names *n,
/* name was specified as a relative path and the
* directory component is the cwd
*/
- audit_log_d_path(ab, " name=", &context->pwd);
+ if (context->pwd.dentry && context->pwd.mnt)
+ audit_log_d_path(ab, " name=", &context->pwd);
+ else
+ audit_log_format(ab, " name=(null)");
break;
default:
/* log the name's directory component */
@@ -1435,9 +1439,6 @@ static void audit_log_proctitle(void)
struct audit_context *context = audit_context();
struct audit_buffer *ab;
- if (!context || context->dummy)
- return;
-
ab = audit_log_start(context, GFP_KERNEL, AUDIT_PROCTITLE);
if (!ab)
return; /* audit_panic or being filtered */
@@ -1866,6 +1867,8 @@ static struct audit_names *audit_alloc_name(struct audit_context *context,
list_add_tail(&aname->list, &context->names_list);
context->name_count++;
+ if (!context->pwd.dentry)
+ get_fs_pwd(current->fs, &context->pwd);
return aname;
}
@@ -1894,20 +1897,6 @@ __audit_reusename(const __user char *uptr)
return NULL;
}
-inline void _audit_getcwd(struct audit_context *context)
-{
- if (!context->pwd.dentry)
- get_fs_pwd(current->fs, &context->pwd);
-}
-
-void __audit_getcwd(void)
-{
- struct audit_context *context = audit_context();
-
- if (context->in_syscall)
- _audit_getcwd(context);
-}
-
/**
* __audit_getname - add a name to the list
* @name: name to add
@@ -1931,8 +1920,6 @@ void __audit_getname(struct filename *name)
n->name_len = AUDIT_NAME_FULL;
name->aname = n;
name->refcnt++;
-
- _audit_getcwd(context);
}
static inline int audit_copy_fcaps(struct audit_names *name,
diff --git a/security/lsm_audit.c b/security/lsm_audit.c
index 53d0d183db8f..221370794d14 100644
--- a/security/lsm_audit.c
+++ b/security/lsm_audit.c
@@ -241,7 +241,6 @@ static void dump_common_audit_data(struct audit_buffer *ab,
audit_log_untrustedstring(ab, inode->i_sb->s_id);
audit_log_format(ab, " ino=%lu", inode->i_ino);
}
- audit_getcwd();
break;
}
case LSM_AUDIT_DATA_FILE: {
@@ -255,7 +254,6 @@ static void dump_common_audit_data(struct audit_buffer *ab,
audit_log_untrustedstring(ab, inode->i_sb->s_id);
audit_log_format(ab, " ino=%lu", inode->i_ino);
}
- audit_getcwd();
break;
}
case LSM_AUDIT_DATA_IOCTL_OP: {
@@ -271,7 +269,6 @@ static void dump_common_audit_data(struct audit_buffer *ab,
}
audit_log_format(ab, " ioctlcmd=0x%hx", a->u.op->cmd);
- audit_getcwd();
break;
}
case LSM_AUDIT_DATA_DENTRY: {
@@ -286,7 +283,6 @@ static void dump_common_audit_data(struct audit_buffer *ab,
audit_log_untrustedstring(ab, inode->i_sb->s_id);
audit_log_format(ab, " ino=%lu", inode->i_ino);
}
- audit_getcwd();
break;
}
case LSM_AUDIT_DATA_INODE: {
@@ -304,7 +300,6 @@ static void dump_common_audit_data(struct audit_buffer *ab,
audit_log_format(ab, " dev=");
audit_log_untrustedstring(ab, inode->i_sb->s_id);
audit_log_format(ab, " ino=%lu", inode->i_ino);
- audit_getcwd();
break;
}
case LSM_AUDIT_DATA_TASK: {
--
2.18.4
4 years, 1 month
[PATCH v3 51/56] audit: fix a kernel-doc markup
by Mauro Carvalho Chehab
typo:
kauditd_print_skb -> kauditd_printk_skb
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei(a)kernel.org>
---
kernel/audit.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/kernel/audit.c b/kernel/audit.c
index 68cee3bc8cfe..0be42cac086b 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -523,7 +523,7 @@ static int auditd_set(struct pid *pid, u32 portid, struct net *net)
}
/**
- * kauditd_print_skb - Print the audit record to the ring buffer
+ * kauditd_printk_skb - Print the audit record to the ring buffer
* @skb: audit record
*
* Whatever the reason, this packet may not make it to the auditd connection
--
2.26.2
4 years, 1 month
How to monitor only when a binary is launched
by MAUPERTUIS, PHILIPPE
Hello,
Aide or clamscan are analyzing all the files on the system thus generating a lot of messages
They are binaries that I can trust so I can exclude their activity from auditd.
I know that I can do this with -a never,exit -F arch=b64 -F exe=/sbin/aide
However I would like to have an entry for the execution of the binary itself with the parameters used.
I would like to turn off only the report of the syscall it issued .
Is there a general way to achieve that : record the launch of a binary but not its actions.
Thanks
Philippe
Worldline and equensWorldline are registered trademarks and trading names owned by the Worldline Group.
This e-mail and any documents attached are confidential and intended solely for the addressee. If you receive this e-mail in error, you are not authorized to copy, disclose, use or retain it. Please notify the sender immediately and delete this e-mail from your systems. As e-mails may be intercepted, amended or lost, they are not secure. Worldline and its subsidiaries therefore cannot accept liability for any errors in their content. Although Worldline endeavours to maintain a virus-free network, we do not warrant that this e-mail is virus-free and cannot accept liability for any damages resulting from any transmitted virus if any. The risks are deemed to be accepted by anyone who communicates with Worldline or its subsidiaries by e-mail.
4 years, 2 months
Auditd is not turning off the system on (RHEL7.+)
by Fabio Sbano
My /etc/audit/auditd.conf is configured as below but it is not turning off the system
## This file controls the configuration of the audit daemon#
local_events = yeswrite_logs = yeslog_file = /var/log/audit/audit.loglog_group = rootlog_format = RAWflush = INCREMENTAL_ASYNCfreq = 50max_log_file = 8num_logs = 5priority_boost = 4disp_qos = lossydispatcher = /sbin/audispdname_format = NONE##name = mydomainmax_log_file_action = ROTATEspace_left = 75space_left_action = SYSLOGverify_email = yesaction_mail_acct = rootadmin_space_left = 50admin_space_left_action = haltdisk_full_action = SUSPENDdisk_error_action = SUSPENDuse_libwrap = yes##tcp_listen_port = 60tcp_listen_queue = 5tcp_max_per_addr = 1##tcp_client_ports = 1024-65535tcp_client_max_idle = 0enable_krb5 = nokrb5_principal = auditd##krb5_key_file = /etc/audit/audit.keydistribute_network = no
Best Regards,Fabio Sbano
4 years, 2 months