July 2020 - Linux-audit - Linux-Audit List Archives

[PATCH ghak124 v3] audit: log nftables configuration change events

by Richard Guy Briggs

iptables, ip6tables, arptables and ebtables table registration, replacement and unregistration configuration events are logged for the native (legacy) iptables setsockopt api, but not for the nftables netlink api which is used by the nft-variant of iptables in addition to nftables itself. Add calls to log the configuration actions in the nftables netlink api. This uses the same NETFILTER_CFG record format but overloads the table field. type=NETFILTER_CFG msg=audit(2020-05-28 17:46:41.878:162) : table=?:0;?:0 family=unspecified entries=2 op=nft_register_gen pid=396 subj=system_u:system_r:firewalld_t:s0 comm=firewalld ... type=NETFILTER_CFG msg=audit(2020-05-28 17:46:41.878:162) : table=firewalld:1;?:0 family=inet entries=0 op=nft_register_table pid=396 subj=system_u:system_r:firewalld_t:s0 comm=firewalld ... type=NETFILTER_CFG msg=audit(2020-05-28 17:46:41.911:163) : table=firewalld:1;filter_FORWARD:85 family=inet entries=8 op=nft_register_chain pid=396 subj=system_u:system_r:firewalld_t:s0 comm=firewalld ... type=NETFILTER_CFG msg=audit(2020-05-28 17:46:41.911:163) : table=firewalld:1;filter_FORWARD:85 family=inet entries=101 op=nft_register_rule pid=396 subj=system_u:system_r:firewalld_t:s0 comm=firewalld ... type=NETFILTER_CFG msg=audit(2020-05-28 17:46:41.911:163) : table=firewalld:1;__set0:87 family=inet entries=87 op=nft_register_setelem pid=396 subj=system_u:system_r:firewalld_t:s0 comm=firewalld ... type=NETFILTER_CFG msg=audit(2020-05-28 17:46:41.911:163) : table=firewalld:1;__set0:87 family=inet entries=0 op=nft_register_set pid=396 subj=system_u:system_r:firewalld_t:s0 comm=firewalld For further information please see issue https://github.com/linux-audit/audit-kernel/issues/124 Signed-off-by: Richard Guy Briggs <rgb(a)redhat.com> --- Changelog: v3: - inline message type rather than table v2: - differentiate between xtables and nftables - add set, setelem, obj, flowtable, gen - use nentries field as appropriate per type - overload the "tables" field with table handle and chain/set/flowtable include/linux/audit.h | 18 ++++++++ kernel/auditsc.c | 24 ++++++++-- net/netfilter/nf_tables_api.c | 103 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 142 insertions(+), 3 deletions(-) diff --git a/include/linux/audit.h b/include/linux/audit.h index 3fcd9ee49734..604ede630580 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -12,6 +12,7 @@ #include <linux/sched.h> #include <linux/ptrace.h> #include <uapi/linux/audit.h> +#include <uapi/linux/netfilter/nf_tables.h> #define AUDIT_INO_UNSET ((unsigned long)-1) #define AUDIT_DEV_UNSET ((dev_t)-1) @@ -98,6 +99,23 @@ enum audit_nfcfgop { AUDIT_XT_OP_REGISTER, AUDIT_XT_OP_REPLACE, AUDIT_XT_OP_UNREGISTER, + AUDIT_NFT_OP_TABLE_REGISTER, + AUDIT_NFT_OP_TABLE_UNREGISTER, + AUDIT_NFT_OP_CHAIN_REGISTER, + AUDIT_NFT_OP_CHAIN_UNREGISTER, + AUDIT_NFT_OP_RULE_REGISTER, + AUDIT_NFT_OP_RULE_UNREGISTER, + AUDIT_NFT_OP_SET_REGISTER, + AUDIT_NFT_OP_SET_UNREGISTER, + AUDIT_NFT_OP_SETELEM_REGISTER, + AUDIT_NFT_OP_SETELEM_UNREGISTER, + AUDIT_NFT_OP_GEN_REGISTER, + AUDIT_NFT_OP_OBJ_REGISTER, + AUDIT_NFT_OP_OBJ_UNREGISTER, + AUDIT_NFT_OP_OBJ_RESET, + AUDIT_NFT_OP_FLOWTABLE_REGISTER, + AUDIT_NFT_OP_FLOWTABLE_UNREGISTER, + AUDIT_NFT_OP_INVALID, }; extern int is_audit_feature_set(int which); diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 468a23390457..3a9100e95fda 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -75,6 +75,7 @@ #include <linux/uaccess.h> #include <linux/fsnotify_backend.h> #include <uapi/linux/limits.h> +#include <uapi/linux/netfilter/nf_tables.h> #include "audit.h" @@ -136,9 +137,26 @@ struct audit_nfcfgop_tab { }; static const struct audit_nfcfgop_tab audit_nfcfgs[] = { - { AUDIT_XT_OP_REGISTER, "register" }, - { AUDIT_XT_OP_REPLACE, "replace" }, - { AUDIT_XT_OP_UNREGISTER, "unregister" }, + { AUDIT_XT_OP_REGISTER, "xt_register" }, + { AUDIT_XT_OP_REPLACE, "xt_replace" }, + { AUDIT_XT_OP_UNREGISTER, "xt_unregister" }, + { AUDIT_NFT_OP_TABLE_REGISTER, "nft_register_table" }, + { AUDIT_NFT_OP_TABLE_UNREGISTER, "nft_unregister_table" }, + { AUDIT_NFT_OP_CHAIN_REGISTER, "nft_register_chain" }, + { AUDIT_NFT_OP_CHAIN_UNREGISTER, "nft_unregister_chain" }, + { AUDIT_NFT_OP_RULE_REGISTER, "nft_register_rule" }, + { AUDIT_NFT_OP_RULE_UNREGISTER, "nft_unregister_rule" }, + { AUDIT_NFT_OP_SET_REGISTER, "nft_register_set" }, + { AUDIT_NFT_OP_SET_UNREGISTER, "nft_unregister_set" }, + { AUDIT_NFT_OP_SETELEM_REGISTER, "nft_register_setelem" }, + { AUDIT_NFT_OP_SETELEM_UNREGISTER, "nft_unregister_setelem" }, + { AUDIT_NFT_OP_GEN_REGISTER, "nft_register_gen" }, + { AUDIT_NFT_OP_OBJ_REGISTER, "nft_register_obj" }, + { AUDIT_NFT_OP_OBJ_UNREGISTER, "nft_unregister_obj" }, + { AUDIT_NFT_OP_OBJ_RESET, "nft_reset_obj" }, + { AUDIT_NFT_OP_FLOWTABLE_REGISTER, "nft_register_flowtable" }, + { AUDIT_NFT_OP_FLOWTABLE_UNREGISTER, "nft_unregister_flowtable" }, + { AUDIT_NFT_OP_INVALID, "nft_invalid" }, }; static int audit_match_perm(struct audit_context *ctx, int mask) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 3558e76e2733..b9e7440cc87d 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -12,6 +12,7 @@ #include <linux/netlink.h> #include <linux/vmalloc.h> #include <linux/rhashtable.h> +#include <linux/audit.h> #include <linux/netfilter.h> #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/nf_tables.h> @@ -693,6 +694,16 @@ static void nf_tables_table_notify(const struct nft_ctx *ctx, int event) { struct sk_buff *skb; int err; + char *buf = kasprintf(GFP_KERNEL, "%s:%llu;?:0", + ctx->table->name, ctx->table->handle); + + audit_log_nfcfg(buf, + ctx->family, + ctx->table->use, + event == NFT_MSG_NEWTABLE ? + AUDIT_NFT_OP_TABLE_REGISTER : + AUDIT_NFT_OP_TABLE_UNREGISTER); + kfree(buf); if (!ctx->report && !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES)) @@ -1428,6 +1439,17 @@ static void nf_tables_chain_notify(const struct nft_ctx *ctx, int event) { struct sk_buff *skb; int err; + char *buf = kasprintf(GFP_KERNEL, "%s:%llu;%s:%llu", + ctx->table->name, ctx->table->handle, + ctx->chain->name, ctx->chain->handle); + + audit_log_nfcfg(buf, + ctx->family, + ctx->chain->use, + event == NFT_MSG_NEWCHAIN ? + AUDIT_NFT_OP_CHAIN_REGISTER : + AUDIT_NFT_OP_CHAIN_UNREGISTER); + kfree(buf); if (!ctx->report && !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES)) @@ -2691,6 +2713,17 @@ static void nf_tables_rule_notify(const struct nft_ctx *ctx, { struct sk_buff *skb; int err; + char *buf = kasprintf(GFP_KERNEL, "%s:%llu;%s:%llu", + ctx->table->name, ctx->table->handle, + ctx->chain->name, ctx->chain->handle); + + audit_log_nfcfg(buf, + ctx->family, + rule->handle, + event == NFT_MSG_NEWRULE ? + AUDIT_NFT_OP_RULE_REGISTER : + AUDIT_NFT_OP_RULE_UNREGISTER); + kfree(buf); if (!ctx->report && !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES)) @@ -3693,6 +3726,17 @@ static void nf_tables_set_notify(const struct nft_ctx *ctx, struct sk_buff *skb; u32 portid = ctx->portid; int err; + char *buf = kasprintf(gfp_flags, "%s:%llu;%s:%llu", + ctx->table->name, ctx->table->handle, + set->name, set->handle); + + audit_log_nfcfg(buf, + ctx->family, + set->field_count, + event == NFT_MSG_NEWSET ? + AUDIT_NFT_OP_SET_REGISTER : + AUDIT_NFT_OP_SET_UNREGISTER); + kfree(buf); if (!ctx->report && !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES)) @@ -4809,6 +4853,17 @@ static void nf_tables_setelem_notify(const struct nft_ctx *ctx, u32 portid = ctx->portid; struct sk_buff *skb; int err; + char *buf = kasprintf(GFP_KERNEL, "%s:%llu;%s:%llu", + ctx->table->name, ctx->table->handle, + set->name, set->handle); + + audit_log_nfcfg(buf, + ctx->family, + set->handle, + event == NFT_MSG_NEWSETELEM ? + AUDIT_NFT_OP_SETELEM_REGISTER : + AUDIT_NFT_OP_SETELEM_UNREGISTER); + kfree(buf); if (!ctx->report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES)) return; @@ -5890,6 +5945,19 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb) obj->ops->type->type != filter->type) goto cont; + if (reset) { + char *buf = kasprintf(GFP_KERNEL, + "%s:%llu;?:0", + table->name, + table->handle); + + audit_log_nfcfg(buf, + family, + obj->handle, + AUDIT_NFT_OP_OBJ_RESET); + kfree(buf); + } + if (nf_tables_fill_obj_info(skb, net, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NFT_MSG_NEWOBJ, @@ -6000,6 +6068,17 @@ static int nf_tables_getobj(struct net *net, struct sock *nlsk, if (NFNL_MSG_TYPE(nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET) reset = true; + if (reset) { + char *buf = kasprintf(GFP_KERNEL, "%s:%llu;?:0", + table->name, table->handle); + + audit_log_nfcfg(buf, + family, + obj->handle, + AUDIT_NFT_OP_OBJ_RESET); + kfree(buf); + } + err = nf_tables_fill_obj_info(skb2, net, NETLINK_CB(skb).portid, nlh->nlmsg_seq, NFT_MSG_NEWOBJ, 0, family, table, obj, reset); @@ -6075,6 +6154,16 @@ void nft_obj_notify(struct net *net, const struct nft_table *table, { struct sk_buff *skb; int err; + char *buf = kasprintf(GFP_KERNEL, "%s:%llu;?:0", + table->name, table->handle); + + audit_log_nfcfg(buf, + family, + obj->handle, + event == NFT_MSG_NEWOBJ ? + AUDIT_NFT_OP_OBJ_REGISTER : + AUDIT_NFT_OP_OBJ_UNREGISTER); + kfree(buf); if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES)) @@ -6701,6 +6790,17 @@ static void nf_tables_flowtable_notify(struct nft_ctx *ctx, { struct sk_buff *skb; int err; + char *buf = kasprintf(GFP_KERNEL, "%s:%llu;%s:%llu", + flowtable->table->name, flowtable->table->handle, + flowtable->name, flowtable->handle); + + audit_log_nfcfg(buf, + ctx->family, + flowtable->hooknum, + event == NFT_MSG_NEWFLOWTABLE ? + AUDIT_NFT_OP_FLOWTABLE_REGISTER : + AUDIT_NFT_OP_FLOWTABLE_UNREGISTER); + kfree(buf); if (ctx->report && !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES)) @@ -6822,6 +6922,9 @@ static void nf_tables_gen_notify(struct net *net, struct sk_buff *skb, struct sk_buff *skb2; int err; + audit_log_nfcfg("?:0;?:0", 0, net->nft.base_seq, + AUDIT_NFT_OP_GEN_REGISTER); + if (nlmsg_report(nlh) && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES)) return; -- 1.8.3.1

4 years, 4 months

6
26
0 / 0

[PATCH v2] audit: report audit wait metric in audit status reply

by Max Englander

In environments where the preservation of audit events and predictable usage of system memory are prioritized, admins may use a combination of --backlog_wait_time and -b options at the risk of degraded performance resulting from backlog waiting. In some cases, this risk may be preferred to lost events or unbounded memory usage. Ideally, this risk can be mitigated by making adjustments when backlog waiting is detected. However, detection can be diffult using the currently available metrics. For example, an admin attempting to debug degraded performance may falsely believe a full backlog indicates backlog waiting. It may turn out the backlog frequently fills up but drains quickly. To make it easier to reliably track degraded performance to backlog waiting, this patch makes the following changes: Add a new field backlog_wait_sum to the audit status reply. Initialize this field to zero. Add to this field the total time spent by the current task on scheduled timeouts while the backlog limit is exceeded. Tested on Ubuntu 18.04 using complementary changes to the audit userspace: https://github.com/linux-audit/audit-userspace/pull/134. Signed-off-by: Max Englander <max.englander(a)gmail.com> --- Patch changelogs between v1 and v2: - Instead of printing a warning when backlog waiting occurs, add duration of backlog waiting to cumulative sum, and report this sum in audit status reply. include/uapi/linux/audit.h | 7 ++++++- kernel/audit.c | 9 +++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h index a534d71e689a..ea0cc364beca 100644 --- a/include/uapi/linux/audit.h +++ b/include/uapi/linux/audit.h @@ -340,6 +340,7 @@ enum { #define AUDIT_STATUS_BACKLOG_LIMIT 0x0010 #define AUDIT_STATUS_BACKLOG_WAIT_TIME 0x0020 #define AUDIT_STATUS_LOST 0x0040 +#define AUDIT_STATUS_BACKLOG_WAIT_SUM 0x0080 #define AUDIT_FEATURE_BITMAP_BACKLOG_LIMIT 0x00000001 #define AUDIT_FEATURE_BITMAP_BACKLOG_WAIT_TIME 0x00000002 @@ -348,6 +349,7 @@ enum { #define AUDIT_FEATURE_BITMAP_SESSIONID_FILTER 0x00000010 #define AUDIT_FEATURE_BITMAP_LOST_RESET 0x00000020 #define AUDIT_FEATURE_BITMAP_FILTER_FS 0x00000040 +#define AUDIT_FEATURE_BITMAP_BACKLOG_WAIT_SUM 0x00000080 #define AUDIT_FEATURE_BITMAP_ALL (AUDIT_FEATURE_BITMAP_BACKLOG_LIMIT | \ AUDIT_FEATURE_BITMAP_BACKLOG_WAIT_TIME | \ @@ -355,12 +357,14 @@ enum { AUDIT_FEATURE_BITMAP_EXCLUDE_EXTEND | \ AUDIT_FEATURE_BITMAP_SESSIONID_FILTER | \ AUDIT_FEATURE_BITMAP_LOST_RESET | \ - AUDIT_FEATURE_BITMAP_FILTER_FS) + AUDIT_FEATURE_BITMAP_FILTER_FS | \ + AUDIT_FEATURE_BITMAP_BACKLOG_WAIT_SUM) /* deprecated: AUDIT_VERSION_* */ #define AUDIT_VERSION_LATEST AUDIT_FEATURE_BITMAP_ALL #define AUDIT_VERSION_BACKLOG_LIMIT AUDIT_FEATURE_BITMAP_BACKLOG_LIMIT #define AUDIT_VERSION_BACKLOG_WAIT_TIME AUDIT_FEATURE_BITMAP_BACKLOG_WAIT_TIME +#define AUDIT_VERSION_BACKLOG_WAIT_SUM AUDIT_FEATURE_BITMAP_BACKLOG_WAIT_SUM /* Failure-to-log actions */ #define AUDIT_FAIL_SILENT 0 @@ -466,6 +470,7 @@ struct audit_status { __u32 feature_bitmap; /* bitmap of kernel audit features */ }; __u32 backlog_wait_time;/* message queue wait timeout */ + __u32 backlog_wait_sum;/* time spent waiting while message limit exceeded */ }; struct audit_features { diff --git a/kernel/audit.c b/kernel/audit.c index 87f31bf1f0a0..301ea4f3d750 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -136,6 +136,11 @@ u32 audit_sig_sid = 0; */ static atomic_t audit_lost = ATOMIC_INIT(0); +/* Monotonically increasing sum of time the kernel has spent + * waiting while the backlog limit is exceeded. + */ +static atomic_t audit_backlog_wait_sum = ATOMIC_INIT(0); + /* Hash for inode-based rules */ struct list_head audit_inode_hash[AUDIT_INODE_BUCKETS]; @@ -1204,6 +1209,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) s.backlog = skb_queue_len(&audit_queue); s.feature_bitmap = AUDIT_FEATURE_BITMAP_ALL; s.backlog_wait_time = audit_backlog_wait_time; + s.backlog_wait_sum = atomic_read(&audit_backlog_wait_sum); audit_send_reply(skb, seq, AUDIT_GET, 0, 0, &s, sizeof(s)); break; } @@ -1794,6 +1800,9 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask, return NULL; } } + + if (stime != audit_backlog_wait_time) + atomic_add(audit_backlog_wait_time - stime, &audit_backlog_wait_sum); } ab = audit_buffer_alloc(ctx, gfp_mask, type); -- 2.17.1

4 years, 7 months

6
30
0 / 0

[PATCH ghak90 V9 00/13] audit: implement container identifier

by Richard Guy Briggs

Implement kernel audit container identifier. This patchset is an eighth based on the proposal document (V4) posted: https://www.redhat.com/archives/linux-audit/2019-September/msg00052.html The first patch was the last patch from ghak81 that was absorbed into this patchset since its primary justification is the rest of this patchset. The second patch implements the proc fs write to set the audit container identifier of a process, emitting an AUDIT_CONTAINER_OP record to announce the registration of that audit container identifier on that process. This patch requires userspace support for record acceptance and proper type display. This patch now includes the conversion over from a simple u64 to a list member that includes owner information to check for descendancy, allow process injection into a container and prevent id reuse by other orchestrators. The third implements reading the audit container identifier from the proc filesystem for debugging. This patch wasn't planned for upstream inclusion but is starting to become more likely. The fourth logs the drop of an audit container identifier once all tasks using that audit container identifier have exited. The 5th implements the auxiliary record AUDIT_CONTAINER_ID if an audit container identifier is associated with an event. This patch requires userspace support for proper type display. The 6th adds audit daemon signalling provenance through audit_sig_info2. The 7th creates a local audit context to be able to bind a standalone record with a locally created auxiliary record. The 8th patch adds audit container identifier records to the user standalone records. The 9th adds audit container identifier filtering to the exit, exclude and user lists. This patch adds the AUDIT_CONTID field and requires auditctl userspace support for the --contid option. The 10th adds network namespace audit container identifier labelling based on member tasks' audit container identifier labels which supports standalone netfilter records that don't have a task context and lists each container to which that net namespace belongs. The 11th checks that the target is a descendant for nesting and refactors to avoid a duplicate of the copied function. The 12th adds tracking and reporting for container nesting. This enables kernel filtering and userspace searches of nested audit container identifiers. The 13th adds a mechanism to allow a process to be designated as a container orchestrator/engine in non-init user namespaces. Example: Set an audit container identifier of 123456 to the "sleep" task: sleep 2& child=$! echo 123456 > /proc/$child/audit_containerid; echo $? ausearch -ts recent -m container_op echo child:$child contid:$( cat /proc/$child/audit_containerid) This should produce a record such as: type=CONTAINER_OP msg=audit(2018-06-06 12:39:29.636:26949) : op=set opid=2209 contid=123456 old-contid=18446744073709551615 Example: Set a filter on an audit container identifier 123459 on /tmp/tmpcontainerid: contid=123459 key=tmpcontainerid auditctl -a exit,always -F dir=/tmp -F perm=wa -F contid=$contid -F key=$key perl -e "sleep 1; open(my \$tmpfile, '>', \"/tmp/$key\"); close(\$tmpfile);" & child=$! echo $contid > /proc/$child/audit_containerid sleep 2 ausearch -i -ts recent -k $key auditctl -d exit,always -F dir=/tmp -F perm=wa -F contid=$contid -F key=$key rm -f /tmp/$key This should produce an event such as: type=CONTAINER_ID msg=audit(2018-06-06 12:46:31.707:26953) : contid=123459 type=PROCTITLE msg=audit(2018-06-06 12:46:31.707:26953) : proctitle=perl -e sleep 1; open(my $tmpfile, '>', "/tmp/tmpcontainerid"); close($tmpfile); type=PATH msg=audit(2018-06-06 12:46:31.707:26953) : item=1 name=/tmp/tmpcontainerid inode=25656 dev=00:26 mode=file,644 ouid=root ogid=root rdev=00:00 obj=unconfined_u:object_r:user_tmp_t:s0 nametype=CREATE cap_fp=none cap_fi=none cap_fe=0 cap_fver=0 type=PATH msg=audit(2018-06-06 12:46:31.707:26953) : item=0 name=/tmp/ inode=8985 dev=00:26 mode=dir,sticky,777 ouid=root ogid=root rdev=00:00 obj=system_u:object_r:tmp_t:s0 nametype=PARENT cap_fp=none cap_fi=none cap_fe=0 cap_fver=0 type=CWD msg=audit(2018-06-06 12:46:31.707:26953) : cwd=/root type=SYSCALL msg=audit(2018-06-06 12:46:31.707:26953) : arch=x86_64 syscall=openat success=yes exit=3 a0=0xffffffffffffff9c a1=0x5621f2b81900 a2=O_WRONLY|O_CREAT|O_TRUNC a3=0x1b6 items=2 ppid=628 pid=2232 auid=root uid=root gid=root euid=root suid=root fsuid=root egid=root sgid=root fsgid=root tty=ttyS0 ses=1 comm=perl exe=/usr/bin/perl subj=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023 key=tmpcontainerid Example: Test multiple containers on one netns: sleep 5 & child1=$! containerid1=123451 echo $containerid1 > /proc/$child1/audit_containerid sleep 5 & child2=$! containerid2=123452 echo $containerid2 > /proc/$child2/audit_containerid iptables -I INPUT -i lo -p icmp --icmp-type echo-request -j AUDIT --type accept iptables -I INPUT -t mangle -i lo -p icmp --icmp-type echo-request -j MARK --set-mark 0x12345555 sleep 1; bash -c "ping -q -c 1 127.0.0.1 >/dev/null 2>&1" sleep 1; ausearch -i -m NETFILTER_PKT -ts boot|grep mark=0x12345555 ausearch -i -m NETFILTER_PKT -ts boot|grep contid=|grep $containerid1|grep $containerid2 This would produce an event such as: type=NETFILTER_PKT msg=audit(03/15/2019 14:16:13.369:244) : mark=0x12345555 saddr=127.0.0.1 daddr=127.0.0.1 proto=icmp type=CONTAINER_ID msg=audit(03/15/2019 14:16:13.369:244) : contid=123452,123451 Includes the last patch of https://github.com/linux-audit/audit-kernel/issues/81 Please see the github audit kernel issue for the main feature: https://github.com/linux-audit/audit-kernel/issues/90 and the kernel filter code: https://github.com/linux-audit/audit-kernel/issues/91 and the network support: https://github.com/linux-audit/audit-kernel/issues/92 Please see the github audit userspace issue for supporting record types: https://github.com/linux-audit/audit-userspace/issues/51 and filter code: https://github.com/linux-audit/audit-userspace/issues/40 Please see the github audit testsuiite issue for the test case: https://github.com/linux-audit/audit-testsuite/issues/64 https://github.com/rgbriggs/audit-testsuite/tree/ghat64-contid https://githu.com/linux-audit/audit-testsuite/pull/91 Please see the github audit wiki for the feature overview: https://github.com/linux-audit/audit-kernel/wiki/RFE-Audit-Container-ID The code is also posted at: git://toccata2.tricolour.ca/linux-2.6-rgb.git ghak90-audit-containerID.v9 Changelog: v9 - rebase on v5.8-rc1 - fix whitespace and oversize lines where practicable - remove harmless duplicate S_IRUSR in capcontid - return -EBUSY for both threading and children (drop -EALREADY) - return -EEXIST if already set and not nesting (drop -ECHILD) - fix unbalanced brace and remove elseif ladder - drop check for same contid set again as redundant (drop -EADDRINUSE) - get reference to contobj's parent taskstruct - protect all contid list updates with audit_contobj_list_lock - protect refcounts with rcu read lock - convert _audit_contobj to _audit_contobj_get, which calls _audit_contobj_hold - convert audit_log_container_id() and audit_log_contid() from u64 to contobj, simplifying - issue death certificate on contid after exit of last task - keep contobj ref to block reuse with -ESHUTDOWN until auditd exit or signal info - report all contids nested - rework sig_info2 format to accomodate contid list - fix zero-length array in include/linux/audit.h struct audit_sig_info2 data[] - found bug in audit_alloc_local, don't check audit_ever_enabled, since all callers check audit_enabled - remove warning at declaration of audit_sig_cid of reuse since reuse is now blocked - report descendancy checking errcodes under -EXDEV (drop -EBADSLT) - add missed check, replace audit_contid_isowner with audit_contid_isnesting - limit calls to audit_log_format() with if(iter->parent) ... - list only one contid in contid, nested in old-contid to avoid duplication - switch to comma delimiter, carrat modifier in nested contid list - special case -1 for AUDIT_CID_UNSET printing - drop contid depth limit and netns contid limit patches - enforce capcontid policy on contid write and read - squash conversion to contobj into contid intro patch v8 - rebase on v5.5-rc1 audit/next - remove subject attrs in CONTAINER_OP record - group audit_contid_list_lock with audit_contid_hash - in audit_{set,log}_contid(), break out of loop after finding target - use target var to size kmalloc - rework audit_cont_owner() to bool audit_contid_isowner() and move to where used - create static void audit_cont_hold(struct audit_contobj *cont) { refcount_inc(&cont->refcount); } - rename audit_cont{,_*} refs to audit_contobj{,_*} - prefix special local functions with _ [audit_contobj*()] - protect contid list traversals with rcu_read_lock() and updates with audit_contid_list_lock - protect real_parent in audit_contid_depth() with rcu_dereference - give new contid field nesting format in patch description - squash task_is_descendant() - squash support for NETFILTER_PKT into network namespaces - limit nesting depth based on record length overflow, bandwidth and storage - implent control for audit container identifier nesting depth limit - make room for audit_bpf patches (bump CONTAINER_ID to 1335) - squash proc interface into capcontid - remove netlink access to loginuid/sessionid/contid/capcontid - delete 32k contid limit patch - document potential overlap between signal delivery and contid reuse - document audit_contobj_list_lock coverage - document disappearing orch task injection limitation - limit the number of containers that can be associated with a network namespace - implent control for audit container identifier netns count limit v7 - remove BUG() in audit_comparator64() - rebase on v5.2-rc1 audit/next - resolve merge conflict with ghak111 (signal_info regardless syscall) - resolve merge conflict with ghak73 (audit_field_valid) - resolve merge conflict with ghak64 (saddr_fam filter) - resolve merge conflict with ghak10 (ntp audit) change AUDIT_CONTAINER_ID from 1332 to 1334 - rebase on v5.3-rc1 audit/next - track container owner - only permit setting contid of descendants for nesting - track drop of contid and permit reuse - track and report container nesting - permit filtering on any nested contid - set/get contid and loginuid/sessionid via netlink - implement capcontid to enable orchestrators in non-init user namespaces - limit number of containers - limit depth of container nesting v6 - change TMPBUFLEN from 11 to 21 to cover the decimal value of contid u64 (nhorman) - fix bug overwriting ctx in struct audit_sig_info, move cid above ctx[0] (nhorman) - fix bug skipping remaining fields and not advancing bufp when copying out contid in audit_krule_to_data (omosnacec) - add acks, tidy commit descriptions, other formatting fixes (checkpatch wrong on audit_log_lost) - cast ull for u64 prints - target_cid tracking was moved from the ptrace/signal patch to container_op - target ptrace and signal records were moved from the ptrace/signal patch to container_id - auditd signaller tracking was moved to a new AUDIT_SIGNAL_INFO2 request and record - ditch unnecessary list_empty() checks - check for null net and aunet in audit_netns_contid_add() - swap CONTAINER_OP contid/old-contid order to ease parsing v5 - address loginuid and sessionid syscall scope in ghak104 - address audit_context in CONFIG_AUDIT vs CONFIG_AUDITSYSCALL in ghak105 - remove tty patch, addressed in ghak106 - rebase on audit/next v5.0-rc1 w/ghak59/ghak104/ghak103/ghak100/ghak107/ghak105/ghak106/ghak105sup - update CONTAINER_ID to CONTAINER_OP in patch description - move audit_context in audit_task_info to CONFIG_AUDITSYSCALL - move audit_alloc() and audit_free() out of CONFIG_AUDITSYSCALL and into CONFIG_AUDIT and create audit_{alloc,free}_syscall - use plain kmem_cache_alloc() rather than kmem_cache_zalloc() in audit_alloc() - fix audit_get_contid() declaration type error - move audit_set_contid() from auditsc.c to audit.c - audit_log_contid() returns void - audit_log_contid() handed contid rather than tsk - switch from AUDIT_CONTAINER to AUDIT_CONTAINER_ID for aux record - move audit_log_contid(tsk/contid) & audit_contid_set(tsk)/audit_contid_valid(contid) - switch from tsk to current - audit_alloc_local() calls audit_log_lost() on failure to allocate a context - add AUDIT_USER* non-syscall contid record - cosmetic cleanup double parens, goto out on err - ditch audit_get_ns_contid_list_lock(), fix aunet lock race - switch from all-cpu read spinlock to rcu, keep spinlock for write - update audit_alloc_local() to use ktime_get_coarse_real_ts64() - add nft_log support - add call from do_exit() in audit_free() to remove contid from netns - relegate AUDIT_CONTAINER ref= field (was op=) to debug patch v4 - preface set with ghak81:"collect audit task parameters" - add shallyn and sgrubb acks - rename feature bitmap macro - rename cid_valid() to audit_contid_valid() - rename AUDIT_CONTAINER_ID to AUDIT_CONTAINER_OP - delete audit_get_contid_list() from headers - move work into inner if, delete "found" - change netns contid list function names - move exports for audit_log_contid audit_alloc_local audit_free_context to non-syscall patch - list contids CSV - pass in gfp flags to audit_alloc_local() (fix audit_alloc_context callers) - use "local" in lieu of abusing in_syscall for auditsc_get_stamp() - read_lock(&tasklist_lock) around children and thread check - task_lock(tsk) should be taken before first check of tsk->audit - add spin lock to contid list in aunet - restrict /proc read to CAP_AUDIT_CONTROL - remove set again prohibition and inherited flag - delete contidion spelling fix from patchset, send to netdev/linux-wireless v3 - switched from containerid in task_struct to audit_task_info (depends on ghak81) - drop INVALID_CID in favour of only AUDIT_CID_UNSET - check for !audit_task_info, throw -ENOPROTOOPT on set - changed -EPERM to -EEXIST for parent check - return AUDIT_CID_UNSET if !audit_enabled - squash child/thread check patch into AUDIT_CONTAINER_ID patch - changed -EPERM to -EBUSY for child check - separate child and thread checks, use -EALREADY for latter - move addition of op= from ptrace/signal patch to AUDIT_CONTAINER patch - fix && to || bashism in ptrace/signal patch - uninline and export function for audit_free_context() - drop CONFIG_CHANGE, FEATURE_CHANGE, ANOM_ABEND, ANOM_SECCOMP patches - move audit_enabled check (xt_AUDIT) - switched from containerid list in struct net to net_generic's struct audit_net - move containerid list iteration into audit (xt_AUDIT) - create function to move namespace switch into audit - switched /proc/PID/ entry from containerid to audit_containerid - call kzalloc with GFP_ATOMIC on in_atomic() in audit_alloc_context() - call kzalloc with GFP_ATOMIC on in_atomic() in audit_log_container_info() - use xt_net(par) instead of sock_net(skb->sk) to get net - switched record and field names: initial CONTAINER_ID, aux CONTAINER, field CONTID - allow to set own contid - open code audit_set_containerid - add contid inherited flag - ccontainerid and pcontainerid eliminated due to inherited flag - change name of container list funcitons - rename containerid to contid - convert initial container record to syscall aux - fix spelling mistake of contidion in net/rfkill/core.c to avoid contid name collision v2 - add check for children and threads - add network namespace container identifier list - add NETFILTER_PKT audit container identifier logging - patch description and documentation clean-up and example - reap unused ppid Richard Guy Briggs (13): audit: collect audit task parameters audit: add container id audit: read container ID of a process audit: log drop of contid on exit of last task audit: log container info of syscalls audit: add contid support for signalling the audit daemon audit: add support for non-syscall auxiliary records audit: add containerid support for user records audit: add containerid filtering audit: add support for containerid to network namespaces audit: contid check descendancy and nesting audit: track container nesting audit: add capcontid to set contid outside init_user_ns fs/proc/base.c | 112 +++++++- include/linux/audit.h | 135 +++++++++- include/linux/sched.h | 10 +- include/uapi/linux/audit.h | 10 +- init/init_task.c | 3 +- init/main.c | 2 + kernel/audit.c | 621 +++++++++++++++++++++++++++++++++++++++++++- kernel/audit.h | 23 ++ kernel/auditfilter.c | 61 +++++ kernel/auditsc.c | 110 ++++++-- kernel/fork.c | 1 - kernel/nsproxy.c | 4 + kernel/sched/core.c | 33 +++ net/netfilter/nft_log.c | 11 +- net/netfilter/xt_AUDIT.c | 11 +- security/selinux/nlmsgtab.c | 1 + security/yama/yama_lsm.c | 33 --- 17 files changed, 1085 insertions(+), 96 deletions(-) -- 1.8.3.1

4 years, 8 months

3
50
0 / 0

[RFC PATCH v5 00/11] Integrity Policy Enforcement LSM (IPE)

by Deven Bowers

Overview: ------------------------------------ IPE is a Linux Security Module which allows for a configurable policy to enforce integrity requirements on the whole system. It attempts to solve the issue of Code Integrity: that any code being executed (or files being read), are identical to the version that was built by a trusted source. The type of system for which IPE is designed for use is an embedded device with a specific purpose (e.g. network firewall device in a data center), where all software and configuration is built and provisioned by the owner. Specifically, a system which leverages IPE is not intended for general purpose computing and does not utilize any software or configuration built by a third party. An ideal system to leverage IPE has both mutable and immutable components, however, all binary executable code is immutable. The scope of IPE is constrained to the OS. It is assumed that platform firmware verifies the the kernel and optionally the root filesystem (e.g. via U-Boot verified boot). IPE then utilizes LSM hooks to enforce a flexible, kernel-resident integrity verification policy. IPE differs from other LSMs which provide integrity checking (for instance, IMA), as it has no dependency on the filesystem metadata itself. The attributes that IPE checks are deterministic properties that exist solely in the kernel. Additionally, IPE provides no additional mechanisms of verifying these files (e.g. IMA Signatures) - all of the attributes of verifying files are existing features within the kernel, such as dm-verity or fsverity. IPE provides a policy that allows owners of the system to easily specify integrity requirements and uses dm-verity signatures to simplify the authentication of allowed objects like authorized code and data. IPE supports two modes, permissive (similar to SELinux's permissive mode) and enforce. Permissive mode performs the same checks, and logs policy violations as enforce mode, but will not enforce the policy. This allows users to test policies before enforcing them. The default mode is enforce, and can be changed via the kernel commandline parameter `ipe.enforce=(0|1)`, or the securityfs node `/sys/kernel/security/ipe/enforce`. The ability to switch modes can be compiled out of the LSM via setting the config CONFIG_SECURITY_IPE_PERMISSIVE_SWITCH to N. IPE additionally supports success auditing. When enabled, all events that pass IPE policy and are not blocked will emit an audit event. This is disabled by default, and can be enabled via the kernel commandline `ipe.success_audit=(0|1)` or the securityfs node `/sys/kernel/security/ipe/success_audit`. Policies can be staged at runtime through securityfs and activated through sysfs. Please see the Deploying Policies section of this cover letter for more information. The IPE LSM is compiled under CONFIG_SECURITY_IPE. Policy: ------------------------------------ IPE policy is designed to be both forward compatible and backwards compatible. There is one required line, at the top of the policy, indicating the policy name, and the policy version, for instance: policy_name="Ex Policy" policy_version=0.0.0 The policy version indicates the current version of the policy (NOT the policy syntax version). This is used to prevent roll-back of policy to potentially insecure previous versions of the policy. The next portion of IPE policy, are rules. Rules are formed by key=value pairs, known as properties. IPE rules require two properties: "action", which determines what IPE does when it encounters a match against the policy, and "op", which determines when that rule should be evaluated. Thus, a minimal rule is: op=EXECUTE action=ALLOW This example will allow any execution. Additional properties are used to restrict attributes about the files being evaluated. These properties are intended to be deterministic attributes that are resident in the kernel. Available properties for IPE described in the properties section of this cover-letter, the repository available in Appendix A, and the kernel documentation page. Order does not matter for the rule's properties - they can be listed in any order, however it is encouraged to have the "op" property be first, and the "action" property be last, for readability. Additionally, rules are evaluated top-to-bottom. As a result, any revocation rules, or denies should be placed early in the file to ensure that these rules are evaluated before a rule with "action=ALLOW" is hit. Any unknown syntax in IPE policy will result in a fatal error to parse the policy. User mode can interrogate the kernel to understand what properties and the associated versions through the securityfs node, $securityfs/ipe/property_config, which will return a string of form: key1=version1 key2=version2 . . . keyN=versionN User-mode should correlate these versions with the supported values identified in the documentation to determine whether a policy should be accepted by the system. Additionally, a DEFAULT operation must be set for all understood operations within IPE. For policies to remain completely forwards compatible, it is recommended that users add a "DEFAULT action=ALLOW" and override the defaults on a per-operation basis. For more information about the policy syntax, please see Appendix A or the kernel documentation page. Early Usermode Protection: -------------------------- IPE can be provided with a policy at startup to load and enforce. This is intended to be a minimal policy to get the system to a state where userland is setup and ready to receive commands, at which point a policy can be deployed via securityfs. This "boot policy" can be specified via the config, SECURITY_IPE_BOOT_POLICY, which accepts a path to a plain-text version of the IPE policy to apply. This policy will be compiled into the kernel. If not specified, IPE will be disabled until a policy is deployed and activated through the method above. Policy Examples: ------------------------------------ Allow all: policy_name="Allow All" policy_version=0.0.0 DEFAULT action=ALLOW Allow only initial superblock: policy_name="Allow All Initial SB" policy_version=0.0.0 DEFAULT action=DENY op=EXECUTE boot_verified=TRUE action=ALLOW Allow any signed dm-verity volume and the initial superblock: policy_name="AllowSignedAndInitial" policy_version=0.0.0 DEFAULT action=DENY op=EXECUTE boot_verified=TRUE action=ALLOW op=EXECUTE dmverity_signature=TRUE action=ALLOW Prohibit execution from a specific dm-verity volume: policy_name="AllowSignedAndInitial" policy_version=0.0.0 DEFAULT action=DENY op=EXECUTE dmverity_roothash=401fcec5944823ae12f62726e8184407a5fa9599783f030dec146938 action=DENY op=EXECUTE boot_verified=TRUE action=ALLOW op=EXECUTE dmverity_signature=TRUE action=ALLOW Allow only a specific dm-verity volume: policy_name="AllowSignedAndInitial" policy_version=0.0.0 DEFAULT action=DENY op=EXECUTE dmverity_roothash=401fcec5944823ae12f62726e8184407a5fa9599783f030dec146938 action=ALLOW Deploying Policies: ------------------- Deploying policies is simple. First sign a plain text policy, with a certificate that is present in the SYSTEM_TRUSTED_KEYRING of your test machine. Through openssl, the signing can be done via: openssl smime -sign -in "$MY_POLICY" -signer "$MY_CERTIFICATE" \ -inkey "$MY_PRIVATE_KEY" -binary -outform der -noattr -nodetach \ -out "$MY_POLICY.p7s" Then, simply cat the file into the IPE's "new_policy" securityfs node: cat "$MY_POLICY.p7s" > /sys/kernel/security/ipe/new_policy The policy should now be present under the policies/ subdirectory, under its "policy_name" attribute. The policy is now present in the kernel and can be marked as active, via the sysctl "ipe.active_policy": echo -n 1 > "/sys/kernel/security/ipe/$MY_POLICY_NAME/active" This will now mark the policy as active and the system will be enforcing $MY_POLICY_NAME. At any point the policy can be updated on the provision that the policy version to be deployed is greater than or equal to the running version (to prevent roll-back attacks). This update can be done by redirecting the file into the policy's "raw" node, under the policies subdirectory: cat "$MY_UPDATED_POLICY.p7s" > \ "/sys/kernel/security/ipe/policies/$MY_POLICY_NAME/raw" Additionally, policies can be deleted via the "del_policy" securityfs node. Simply write the name of the policy to be deleted to that node: echo -n 1 > "/sys/kernel/security/ipe/policies/$MY_POLICY_NAME/delete" There are two requirements to delete policies: 1. The policy being deleted must not be the active policy. 2. The policy being deleted must not be the boot policy. It's important to know above that the "echo" command will add a newline to the end of the input, and this will be considered as part of the filename. You can remove the newline via the -n parameter. NOTE: If a MAC LSM is enabled, the securityfs commands will require CAP_MAC_ADMIN. This is due to sysfs supporting fine-grained MAC attributes, while securityfs at the current moment does not. Properties: ------------------------------------ This initial patchset introducing IPE adds three properties: 'boot_verified', 'dmverity_signature' and 'dmverity_roothash'. boot_verified (CONFIG_IPE_BOOT_PROP): This property can be utilized for authorization of the first super-block that is mounted on the system, where IPE attempts to evaluate a file. Typically this is used for systems with an initramfs or other initial disk, where this is unmounted before the system becomes available, and is not covered by any other property. The format of this property is: boot_verified=(TRUE|FALSE) WARNING: This property will trust any disk where the first IPE evaluation occurs. If you do not have a startup disk that is unpacked and unmounted (like initramfs), then it will automatically trust the root filesystem and potentially overauthorize the entire disk. dmverity_roothash (CONFIG_IPE_DM_VERITY_ROOTHASH): This property can be utilized for authorization or revocation of specific dmverity volumes, identified via root hash. It has a dependency on the DM_VERITY module. The format of this property is: dmverity_roothash=<HashHexDigest> dmverity_signature (CONFIG_IPE_DM_VERITY_SIGNATURE): This property can be utilized for authorization of all dm-verity volumes that have a signed roothash that chains to the system trusted keyring. It has a dependency on the DM_VERITY_VERIFY_ROOTHASH_SIG config. The format of this property is: dmverity_signature=(TRUE|FALSE) Testing: ------------------------------------ A test suite is available (Appendix B) for ease of use. For manual instructions: Enable IPE through the following Kconfigs: CONFIG_SECURITY_IPE=y CONFIG_SECURITY_IPE_BOOT_POLICY="../AllowAllInitialSB.pol" CONFIG_SECURITY_IPE_PERMISSIVE_SWITCH=y CONFIG_IPE_BOOT_PROP=y CONFIG_IPE_DM_VERITY_ROOTHASH=y CONFIG_IPE_DM_VERITY_SIGNATURE=y CONFIG_DM_VERITY=y CONFIG_DM_VERITY_VERIFY_ROOTHASH_SIG=y CONFIG_SYSTEM_TRUSTED_KEYRING=y CONFIG_SYSTEM_TRUSTED_KEYS="/path/to/my/cert/list.pem" Start a test system, that boots directly from the filesystem, without an initrd. I recommend testing in permissive mode until all tests pass, then switch to enforce to ensure behavior remains identical. boot_verified: If booted correctly, the filesystem mounted on / should be marked as boot_verified. Verify by turning on success auditing (sysctl ipe.success_audit=1), and run a binary. In the audit output, `prop_boot_verified` should be `TRUE`. To test denials, mount a temporary filesystem (mount -t tmpfs -o size=4M tmp tmp), and copy a binary (e.g. ls) to this new filesystem. Disable success auditing and attempt to run the file. The file should have an audit event, but be allowed to execute in permissive mode, and prop_boot_verified should be FALSE. dmverity_roothash: First, you must create a dm-verity volume. This can be done through squashfs-tools and veritysetup (provided by cryptsetup). Creating a squashfs volume: mksquashfs /path/to/directory/with/executable /path/to/output.squashfs Format the volume for use with dm-verity & save the root hash: output_rh=$(veritysetup format output.squashfs output.hashtree | \ tee verity_out.txt | awk "/Root hash/" | \ sed -E "s/Root hash:\s+//g") echo -n $output_rh > output.roothash Create a two policies, filling in the appropriate fields below: Policy 1: policy_name="roothash-denial" policy_version=0.0.0 DEFAULT action=ALLOW op=EXECUTE dmverity_roothash=$output_rh action=DENY Policy 2: policy_name="roothash-allow" policy_version=0.0.0 DEFAULT action=ALLOW DEFAULT op=EXECUTE action=DENY op=EXECUTE boot_verified=TRUE action=ALLOW op=EXECUTE dmverity_roothash=$output_rh action=ALLOW Deploy each policy, then mark the first, "roothash-denial" as active, per the "Deploying Policies" section of this cover letter. Mount the dm-verity volume: veritysetup open output.squashfs output.hashtree unverified \ `cat output.roothash` mount /dev/mapper/unverified /my/mount/point Attempt to execute a binary in the mount point, and it should emit an audit event for a match against the rule: op=EXECUTE dmverity_roothash=$output_rh action=DENY To test the second policy, perform the same steps, but this time, enable success auditing before running the executable. The success audit event should be a match against this rule: op=EXECUTE dmverity_roothash=$output_rh action=ALLOW dmverity_signature: Follow the setup steps for dmverity_roothash. Sign the roothash via: openssl smime -sign -in "output.roothash" -signer "$MY_CERTIFICATE" \ -inkey "$MY_PRIVATE_KEY" -binary -outform der -noattr \ -out "output.p7s" Create a policy: policy_name="verified" policy_version=0.0.0 DEFAULT action=DENY op=EXECUTE boot_verified=TRUE action=ALLOW op=EXECUTE dmverity_verified=TRUE action=ALLOW Deploy the policy, and mark as active, per the "Deploying Policies" section of this cover letter. Mount the dm-verity volume with verification: veritysetup open output.squashfs output.hashtree unverified \ `cat output.roothash` --root-hash-signature=output.p7s mount /dev/mapper/unverified /my/mount/point NOTE: The --root-hash-signature option was introduced in veritysetup 2.3.0 Turn on success auditing and attempt to execute a binary in the mount point, and it should emit an audit event for a match against the rule: op=EXECUTE dmverity_verified=TRUE action=ALLOW To test denials, mount the dm-verity volume the same way as the "dmverity_roothash" section, and attempt to execute a binary. Failure should occur. Documentation: ------------------------------------ Full documentation is available on github in IPE's master repository (Appendix A). This is intended to be an exhaustive source of documentation around IPE. Additionally, there is higher level documentation in the admin-guide. Technical diagrams are available here: http://microsoft.github.io/ipe/technical/diagrams/ Known Gaps: ------------------------------------ IPE has two known gaps: 1. IPE cannot verify the integrity of anonymous executable memory, such as the trampolines created by gcc closures and libffi, or JIT'd code. Unfortunately, as this is dynamically generated code, there is no way for IPE to detect that this code has not been tampered with in transition from where it was built, to where it is running. As a result, IPE is incapable of tackling this problem for dynamically generated code. However, there is a patch series being prepared that addresses this problem for libffi and gcc closures by implemeting a safer kernel trampoline API. 2. IPE cannot verify the integrity of interpreted languages' programs when these scripts invoked via `<interpreter> <file>`. This is because the way interpreters execute these files, the scripts themselves are not evaluated as executable code through one of IPE's hooks. Interpreters can be enlightened to the usage of IPE by trying to mmap a file into executable memory (+X), after opening the file and responding to the error code appropriately. This also applies to included files, or high value files, such as configuration files of critical system components. This specific gap is planned on being addressed within IPE. For more information on how we plan to address this gap, please see the Future Development section, below. Future Development: ------------------------------------ Support for filtering signatures by specific certificates. In this case, our "dmverity_signature" (or a separate property) can be set to a specific certificate declared in IPE's policy, allowing for more controlled use-cases determine by a user's PKI structure. Support for integrity verification for general file reads. This addresses the script interpreter issue indicated in the "Known Gaps" section, as these script files are typically opened with O_RDONLY. We are evaluating whether to do this by comparing the original userland filepath passed into the open syscall, thereby allowing existing callers to take advantage without any code changes; the alternate design is to extend the new openat2(2) syscall, with an new flag, tentatively called "O_VERIFY". While the second option requires a code change for all the interpreters, frameworks and languages that wish to leverage it, it is a wholly cleaner implementation in the kernel. For interpreters specifically, the O_MAYEXEC patch series published by MickaÃ«l SalaÃ¼n[1] is a similar implementation to the O_VERIFY idea described above. Onboarding IPE's test suite to KernelCI. Currently we are developing a test suite in the same vein as SELinux's test suite. Once development of the test suite is complete, and provided IPE is accepted, we intend to onboard this test suite onto KernelCI. Hardened resistance against roll-back attacks. Currently there exists a window of opportunity between user-mode setup and the user-policy being deployed, where a prior user-policy can be loaded, that is potentially insecure. However, with a kernel update, you can revise the boot policy's version to be the same version as the latest policy, closing this window. In the future, I would like to close this window of opportunity without a kernel update, using some persistent storage mechanism. Open Issues: ------------ For linux-audit/integrity folks: 1. Introduction of new audit definitions in the kernel integrity range - is this preferred, as opposed to reusing definitions with existing IMA definitions? TODOs: ------ linux-audit changes to support the new audit events. Appendix: ------------------------------------ A. IPE Github Repository: https://github.com/microsoft/ipe Hosted Documentation: https://microsoft.github.io/ipe B. IPE Users' Guide: Documentation/admin-guide/LSM/ipe.rst C. IPE Test Suite: *TBA* (under development) References: ------------------------------------ 1. https://lore.kernel.org/linux-integrity/20200505153156.925111-1-mic@digik... Changelog: ------------------------------------ v1: Introduced v2: Split the second patch of the previous series into two. Minor corrections in the cover-letter and documentation comments regarding CAP_MAC_ADMIN checks in IPE. v3: Address various comments by Jann Horn. Highlights: Switch various audit allocators to GFP_KERNEL. Utilize rcu_access_pointer() in various locations. Strip out the caching system for properties Strip comments from headers Move functions around in patches Remove kernel command line parameters Reconcile the race condition on the delete node for policy by expanding the policy critical section. Address a few comments by Jonathan Corbet around the documentation pages for IPE. Fix an issue with the initialization of IPE policy with a "-0" version, caused by not initializing the hlist entries before freeing. v4: Address a concern around IPE's behavior with unknown syntax. Specifically, make any unknown syntax a fatal error instead of a warning, as suggested by Mickaël Salaün. Introduce a new securityfs node, $securityfs/ipe/property_config, which provides a listing of what properties are enabled by the kernel and their versions. This allows usermode to predict what policies should be allowed. Strip some comments from c files that I missed. Clarify some documentation comments around 'boot_verified'. While this currently does not functionally change the property itself, the distinction is important when IPE can enforce verified reads. Additionally, 'KERNEL_READ' was omitted from the documentation. This has been corrected. Change SecurityFS and SHA1 to a reverse dependency. Update the cover-letter with the updated behavior of unknown syntax. Remove all sysctls, making an equivalent function in securityfs. Rework the active/delete mechanism to be a node under the policy in $securityfs/ipe/policies. The kernel command line parameters ipe.enforce and ipe.success_audit have returned as this functionality is no longer exposed through sysfs. v5: Correct some grammatical errors reported by Randy Dunlap. Fix some warnings reported by kernel test bot. Change convention around security_bdev_setsecurity. -ENOSYS is now expected if an LSM does not implement a particular @name, as suggested by Casey Schaufler. Minor string corrections related to the move from sysfs to securityfs Correct a spelling of an #ifdef for the permissive argument. Add the kernel parameters re-added to the documentation.Integrity Policy Enforcement LSM (IPE) Overview: ------------------------------------ IPE is a Linux Security Module which allows for a configurable policy to enforce integrity requirements on the whole system. It attempts to solve the issue of Code Integrity: that any code being executed (or files being read), are identical to the version that was built by a trusted source. The type of system for which IPE is designed for use is an embedded device with a specific purpose (e.g. network firewall device in a data center), where all software and configuration is built and provisioned by the owner. Specifically, a system which leverages IPE is not intended for general purpose computing and does not utilize any software or configuration built by a third party. An ideal system to leverage IPE has both mutable and immutable components, however, all binary executable code is immutable. The scope of IPE is constrained to the OS. It is assumed that platform firmware verifies the the kernel and optionally the root filesystem (e.g. via U-Boot verified boot). IPE then utilizes LSM hooks to enforce a flexible, kernel-resident integrity verification policy. IPE differs from other LSMs which provide integrity checking (for instance, IMA), as it has no dependency on the filesystem metadata itself. The attributes that IPE checks are deterministic properties that exist solely in the kernel. Additionally, IPE provides no additional mechanisms of verifying these files (e.g. IMA Signatures) - all of the attributes of verifying files are existing features within the kernel, such as dm-verity or fsverity. IPE provides a policy that allows owners of the system to easily specify integrity requirements and uses dm-verity signatures to simplify the authentication of allowed objects like authorized code and data. IPE supports two modes, permissive (similar to SELinux's permissive mode) and enforce. Permissive mode performs the same checks, and logs policy violations as enforce mode, but will not enforce the policy. This allows users to test policies before enforcing them. The default mode is enforce, and can be changed via the kernel commandline parameter `ipe.enforce=(0|1)`, or the securityfs node `/sys/kernel/security/ipe/enforce`. The ability to switch modes can be compiled out of the LSM via setting the config CONFIG_SECURITY_IPE_PERMISSIVE_SWITCH to N. IPE additionally supports success auditing. When enabled, all events that pass IPE policy and are not blocked will emit an audit event. This is disabled by default, and can be enabled via the kernel commandline `ipe.success_audit=(0|1)` or the securityfs node `/sys/kernel/security/ipe/success_audit`. Policies can be staged at runtime through securityfs and activated through sysfs. Please see the Deploying Policies section of this cover letter for more information. The IPE LSM is compiled under CONFIG_SECURITY_IPE. Policy: ------------------------------------ IPE policy is designed to be both forward compatible and backwards compatible. There is one required line, at the top of the policy, indicating the policy name, and the policy version, for instance: policy_name="Ex Policy" policy_version=0.0.0 The policy version indicates the current version of the policy (NOT the policy syntax version). This is used to prevent roll-back of policy to potentially insecure previous versions of the policy. The next portion of IPE policy, are rules. Rules are formed by key=value pairs, known as properties. IPE rules require two properties: "action", which determines what IPE does when it encounters a match against the policy, and "op", which determines when that rule should be evaluated. Thus, a minimal rule is: op=EXECUTE action=ALLOW This example will allow any execution. Additional properties are used to restrict attributes about the files being evaluated. These properties are intended to be deterministic attributes that are resident in the kernel. Available properties for IPE described in the properties section of this cover-letter, the repository available in Appendix A, and the kernel documentation page. Order does not matter for the rule's properties - they can be listed in any order, however it is encouraged to have the "op" property be first, and the "action" property be last, for readability. Additionally, rules are evaluated top-to-bottom. As a result, any revocation rules, or denies should be placed early in the file to ensure that these rules are evaluated before a rule with "action=ALLOW" is hit. Any unknown syntax in IPE policy will result in a fatal error to parse the policy. User mode can interrogate the kernel to understand what properties and the associated versions through the securityfs node, $securityfs/ipe/property_config, which will return a string of form: key1=version1 key2=version2 . . . keyN=versionN User-mode should correlate these versions with the supported values identified in the documentation to determine whether a policy should be accepted by the system. Additionally, a DEFAULT operation must be set for all understood operations within IPE. For policies to remain completely forwards compatible, it is recommended that users add a "DEFAULT action=ALLOW" and override the defaults on a per-operation basis. For more information about the policy syntax, please see Appendix A or the kernel documentation page. Early Usermode Protection: -------------------------- IPE can be provided with a policy at startup to load and enforce. This is intended to be a minimal policy to get the system to a state where userland is setup and ready to receive commands, at which point a policy can be deployed via securityfs. This "boot policy" can be specified via the config, SECURITY_IPE_BOOT_POLICY, which accepts a path to a plain-text version of the IPE policy to apply. This policy will be compiled into the kernel. If not specified, IPE will be disabled until a policy is deployed and activated through the method above. Policy Examples: ------------------------------------ Allow all: policy_name="Allow All" policy_version=0.0.0 DEFAULT action=ALLOW Allow only initial superblock: policy_name="Allow All Initial SB" policy_version=0.0.0 DEFAULT action=DENY op=EXECUTE boot_verified=TRUE action=ALLOW Allow any signed dm-verity volume and the initial superblock: policy_name="AllowSignedAndInitial" policy_version=0.0.0 DEFAULT action=DENY op=EXECUTE boot_verified=TRUE action=ALLOW op=EXECUTE dmverity_signature=TRUE action=ALLOW Prohibit execution from a specific dm-verity volume: policy_name="AllowSignedAndInitial" policy_version=0.0.0 DEFAULT action=DENY op=EXECUTE dmverity_roothash=401fcec5944823ae12f62726e8184407a5fa9599783f030dec146938 action=DENY op=EXECUTE boot_verified=TRUE action=ALLOW op=EXECUTE dmverity_signature=TRUE action=ALLOW Allow only a specific dm-verity volume: policy_name="AllowSignedAndInitial" policy_version=0.0.0 DEFAULT action=DENY op=EXECUTE dmverity_roothash=401fcec5944823ae12f62726e8184407a5fa9599783f030dec146938 action=ALLOW Deploying Policies: ------------------- Deploying policies is simple. First sign a plain text policy, with a certificate that is present in the SYSTEM_TRUSTED_KEYRING of your test machine. Through openssl, the signing can be done via: openssl smime -sign -in "$MY_POLICY" -signer "$MY_CERTIFICATE" \ -inkey "$MY_PRIVATE_KEY" -binary -outform der -noattr -nodetach \ -out "$MY_POLICY.p7s" Then, simply cat the file into the IPE's "new_policy" securityfs node: cat "$MY_POLICY.p7s" > /sys/kernel/security/ipe/new_policy The policy should now be present under the policies/ subdirectory, under its "policy_name" attribute. The policy is now present in the kernel and can be marked as active, via the sysctl "ipe.active_policy": echo -n 1 > "/sys/kernel/security/ipe/$MY_POLICY_NAME/active" This will now mark the policy as active and the system will be enforcing $MY_POLICY_NAME. At any point the policy can be updated on the provision that the policy version to be deployed is greater than or equal to the running version (to prevent roll-back attacks). This update can be done by redirecting the file into the policy's "raw" node, under the policies subdirectory: cat "$MY_UPDATED_POLICY.p7s" > \ "/sys/kernel/security/ipe/policies/$MY_POLICY_NAME/raw" Additionally, policies can be deleted via the "del_policy" securityfs node. Simply write the name of the policy to be deleted to that node: echo -n 1 > "/sys/kernel/security/ipe/policies/$MY_POLICY_NAME/delete" There are two requirements to delete policies: 1. The policy being deleted must not be the active policy. 2. The policy being deleted must not be the boot policy. It's important to know above that the "echo" command will add a newline to the end of the input, and this will be considered as part of the filename. You can remove the newline via the -n parameter. NOTE: If a MAC LSM is enabled, the securityfs commands will require CAP_MAC_ADMIN. This is due to sysfs supporting fine-grained MAC attributes, while securityfs at the current moment does not. Properties: ------------------------------------ This initial patchset introducing IPE adds three properties: 'boot_verified', 'dmverity_signature' and 'dmverity_roothash'. boot_verified (CONFIG_IPE_BOOT_PROP): This property can be utilized for authorization of the first super-block that is mounted on the system, where IPE attempts to evaluate a file. Typically this is used for systems with an initramfs or other initial disk, where this is unmounted before the system becomes available, and is not covered by any other property. The format of this property is: boot_verified=(TRUE|FALSE) WARNING: This property will trust any disk where the first IPE evaluation occurs. If you do not have a startup disk that is unpacked and unmounted (like initramfs), then it will automatically trust the root filesystem and potentially overauthorize the entire disk. dmverity_roothash (CONFIG_IPE_DM_VERITY_ROOTHASH): This property can be utilized for authorization or revocation of specific dmverity volumes, identified via root hash. It has a dependency on the DM_VERITY module. The format of this property is: dmverity_roothash=<HashHexDigest> dmverity_signature (CONFIG_IPE_DM_VERITY_SIGNATURE): This property can be utilized for authorization of all dm-verity volumes that have a signed roothash that chains to the system trusted keyring. It has a dependency on the DM_VERITY_VERIFY_ROOTHASH_SIG config. The format of this property is: dmverity_signature=(TRUE|FALSE) Testing: ------------------------------------ A test suite is available (Appendix B) for ease of use. For manual instructions: Enable IPE through the following Kconfigs: CONFIG_SECURITY_IPE=y CONFIG_SECURITY_IPE_BOOT_POLICY="../AllowAllInitialSB.pol" CONFIG_SECURITY_IPE_PERMISSIVE_SWITCH=y CONFIG_IPE_BOOT_PROP=y CONFIG_IPE_DM_VERITY_ROOTHASH=y CONFIG_IPE_DM_VERITY_SIGNATURE=y CONFIG_DM_VERITY=y CONFIG_DM_VERITY_VERIFY_ROOTHASH_SIG=y CONFIG_SYSTEM_TRUSTED_KEYRING=y CONFIG_SYSTEM_TRUSTED_KEYS="/path/to/my/cert/list.pem" Start a test system, that boots directly from the filesystem, without an initrd. I recommend testing in permissive mode until all tests pass, then switch to enforce to ensure behavior remains identical. boot_verified: If booted correctly, the filesystem mounted on / should be marked as boot_verified. Verify by turning on success auditing (sysctl ipe.success_audit=1), and run a binary. In the audit output, `prop_boot_verified` should be `TRUE`. To test denials, mount a temporary filesystem (mount -t tmpfs -o size=4M tmp tmp), and copy a binary (e.g. ls) to this new filesystem. Disable success auditing and attempt to run the file. The file should have an audit event, but be allowed to execute in permissive mode, and prop_boot_verified should be FALSE. dmverity_roothash: First, you must create a dm-verity volume. This can be done through squashfs-tools and veritysetup (provided by cryptsetup). Creating a squashfs volume: mksquashfs /path/to/directory/with/executable /path/to/output.squashfs Format the volume for use with dm-verity & save the root hash: output_rh=$(veritysetup format output.squashfs output.hashtree | \ tee verity_out.txt | awk "/Root hash/" | \ sed -E "s/Root hash:\s+//g") echo -n $output_rh > output.roothash Create a two policies, filling in the appropriate fields below: Policy 1: policy_name="roothash-denial" policy_version=0.0.0 DEFAULT action=ALLOW op=EXECUTE dmverity_roothash=$output_rh action=DENY Policy 2: policy_name="roothash-allow" policy_version=0.0.0 DEFAULT action=ALLOW DEFAULT op=EXECUTE action=DENY op=EXECUTE boot_verified=TRUE action=ALLOW op=EXECUTE dmverity_roothash=$output_rh action=ALLOW Deploy each policy, then mark the first, "roothash-denial" as active, per the "Deploying Policies" section of this cover letter. Mount the dm-verity volume: veritysetup open output.squashfs output.hashtree unverified \ `cat output.roothash` mount /dev/mapper/unverified /my/mount/point Attempt to execute a binary in the mount point, and it should emit an audit event for a match against the rule: op=EXECUTE dmverity_roothash=$output_rh action=DENY To test the second policy, perform the same steps, but this time, enable success auditing before running the executable. The success audit event should be a match against this rule: op=EXECUTE dmverity_roothash=$output_rh action=ALLOW dmverity_signature: Follow the setup steps for dmverity_roothash. Sign the roothash via: openssl smime -sign -in "output.roothash" -signer "$MY_CERTIFICATE" \ -inkey "$MY_PRIVATE_KEY" -binary -outform der -noattr \ -out "output.p7s" Create a policy: policy_name="verified" policy_version=0.0.0 DEFAULT action=DENY op=EXECUTE boot_verified=TRUE action=ALLOW op=EXECUTE dmverity_verified=TRUE action=ALLOW Deploy the policy, and mark as active, per the "Deploying Policies" section of this cover letter. Mount the dm-verity volume with verification: veritysetup open output.squashfs output.hashtree unverified \ `cat output.roothash` --root-hash-signature=output.p7s mount /dev/mapper/unverified /my/mount/point NOTE: The --root-hash-signature option was introduced in veritysetup 2.3.0 Turn on success auditing and attempt to execute a binary in the mount point, and it should emit an audit event for a match against the rule: op=EXECUTE dmverity_verified=TRUE action=ALLOW To test denials, mount the dm-verity volume the same way as the "dmverity_roothash" section, and attempt to execute a binary. Failure should occur. Documentation: ------------------------------------ Full documentation is available on github in IPE's master repository (Appendix A). This is intended to be an exhaustive source of documentation around IPE. Additionally, there is higher level documentation in the admin-guide. Technical diagrams are available here: http://microsoft.github.io/ipe/technical/diagrams/ Known Gaps: ------------------------------------ IPE has two known gaps: 1. IPE cannot verify the integrity of anonymous executable memory, such as the trampolines created by gcc closures and libffi, or JIT'd code. Unfortunately, as this is dynamically generated code, there is no way for IPE to detect that this code has not been tampered with in transition from where it was built, to where it is running. As a result, IPE is incapable of tackling this problem for dynamically generated code. However, there is a patch series being prepared that addresses this problem for libffi and gcc closures by implemeting a safer kernel trampoline API. 2. IPE cannot verify the integrity of interpreted languages' programs when these scripts invoked via `<interpreter> <file>`. This is because the way interpreters execute these files, the scripts themselves are not evaluated as executable code through one of IPE's hooks. Interpreters can be enlightened to the usage of IPE by trying to mmap a file into executable memory (+X), after opening the file and responding to the error code appropriately. This also applies to included files, or high value files, such as configuration files of critical system components. This specific gap is planned on being addressed within IPE. For more information on how we plan to address this gap, please see the Future Development section, below. Future Development: ------------------------------------ Support for filtering signatures by specific certificates. In this case, our "dmverity_signature" (or a separate property) can be set to a specific certificate declared in IPE's policy, allowing for more controlled use-cases determine by a user's PKI structure. Support for integrity verification for general file reads. This addresses the script interpreter issue indicated in the "Known Gaps" section, as these script files are typically opened with O_RDONLY. We are evaluating whether to do this by comparing the original userland filepath passed into the open syscall, thereby allowing existing callers to take advantage without any code changes; the alternate design is to extend the new openat2(2) syscall, with an new flag, tentatively called "O_VERIFY". While the second option requires a code change for all the interpreters, frameworks and languages that wish to leverage it, it is a wholly cleaner implementation in the kernel. For interpreters specifically, the O_MAYEXEC patch series published by MickaÃ«l SalaÃ¼n[1] is a similar implementation to the O_VERIFY idea described above. Onboarding IPE's test suite to KernelCI. Currently we are developing a test suite in the same vein as SELinux's test suite. Once development of the test suite is complete, and provided IPE is accepted, we intend to onboard this test suite onto KernelCI. Hardened resistance against roll-back attacks. Currently there exists a window of opportunity between user-mode setup and the user-policy being deployed, where a prior user-policy can be loaded, that is potentially insecure. However, with a kernel update, you can revise the boot policy's version to be the same version as the latest policy, closing this window. In the future, I would like to close this window of opportunity without a kernel update, using some persistent storage mechanism. Open Issues: ------------ For linux-audit/integrity folks: 1. Introduction of new audit definitions in the kernel integrity range - is this preferred, as opposed to reusing definitions with existing IMA definitions? TODOs: ------ linux-audit changes to support the new audit events. Appendix: ------------------------------------ A. IPE Github Repository: https://github.com/microsoft/ipe Hosted Documentation: https://microsoft.github.io/ipe B. IPE Users' Guide: Documentation/admin-guide/LSM/ipe.rst C. IPE Test Suite: *TBA* (under development) References: ------------------------------------ 1. https://lore.kernel.org/linux-integrity/20200505153156.925111-1-mic@digik... Changelog: ------------------------------------ v1: Introduced v2: Split the second patch of the previous series into two. Minor corrections in the cover-letter and documentation comments regarding CAP_MAC_ADMIN checks in IPE. v3: Address various comments by Jann Horn. Highlights: Switch various audit allocators to GFP_KERNEL. Utilize rcu_access_pointer() in various locations. Strip out the caching system for properties Strip comments from headers Move functions around in patches Remove kernel command line parameters Reconcile the race condition on the delete node for policy by expanding the policy critical section. Address a few comments by Jonathan Corbet around the documentation pages for IPE. Fix an issue with the initialization of IPE policy with a "-0" version, caused by not initializing the hlist entries before freeing. v4: Address a concern around IPE's behavior with unknown syntax. Specifically, make any unknown syntax a fatal error instead of a warning, as suggested by Mickaël Salaün. Introduce a new securityfs node, $securityfs/ipe/property_config, which provides a listing of what properties are enabled by the kernel and their versions. This allows usermode to predict what policies should be allowed. Strip some comments from c files that I missed. Clarify some documentation comments around 'boot_verified'. While this currently does not functionally change the property itself, the distinction is important when IPE can enforce verified reads. Additionally, 'KERNEL_READ' was omitted from the documentation. This has been corrected. Change SecurityFS and SHA1 to a reverse dependency. Update the cover-letter with the updated behavior of unknown syntax. Remove all sysctls, making an equivalent function in securityfs. Rework the active/delete mechanism to be a node under the policy in $securityfs/ipe/policies. The kernel command line parameters ipe.enforce and ipe.success_audit have returned as this functionality is no longer exposed through sysfs. v5: Correct some grammatical errors reported by Randy Dunlap. Fix some warnings reported by kernel test bot. Change convention around security_bdev_setsecurity. -ENOSYS is now expected if an LSM does not implement a particular @name, as suggested by Casey Schaufler. Minor string corrections related to the move from sysfs to securityfs Correct a spelling of an #ifdef for the permissive argument. Add the kernel parameters re-added to the documentation. Fix a minor bug where the mode being audited on permissive switch was the original mode, not the mode being swapped to. Cleanup doc comments, fix some whitespace alignment issues. Deven Bowers (11): scripts: add ipe tooling to generate boot policy security: add ipe lsm evaluation loop and audit system security: add ipe lsm policy parser and policy loading ipe: add property for trust of boot volume fs: add security blob and hooks for block_device dm-verity: move signature check after tree validation dm-verity: add bdev_setsecurity hook for dm-verity signature ipe: add property for signed dmverity volumes dm-verity: add bdev_setsecurity hook for root-hash documentation: add ipe documentation cleanup: uapi/linux/audit.h Documentation/admin-guide/LSM/index.rst | 1 + Documentation/admin-guide/LSM/ipe.rst | 508 +++++++ .../admin-guide/kernel-parameters.txt | 12 + MAINTAINERS | 8 + drivers/md/dm-verity-target.c | 52 +- drivers/md/dm-verity-verify-sig.c | 147 +- drivers/md/dm-verity-verify-sig.h | 24 +- drivers/md/dm-verity.h | 2 +- fs/block_dev.c | 8 + include/linux/device-mapper.h | 3 + include/linux/fs.h | 1 + include/linux/lsm_hook_defs.h | 5 + include/linux/lsm_hooks.h | 12 + include/linux/security.h | 22 + include/uapi/linux/audit.h | 36 +- scripts/Makefile | 1 + scripts/ipe/Makefile | 2 + scripts/ipe/polgen/.gitignore | 1 + scripts/ipe/polgen/Makefile | 7 + scripts/ipe/polgen/polgen.c | 136 ++ security/Kconfig | 12 +- security/Makefile | 2 + security/ipe/.gitignore | 2 + security/ipe/Kconfig | 48 + security/ipe/Makefile | 33 + security/ipe/ipe-audit.c | 303 ++++ security/ipe/ipe-audit.h | 24 + security/ipe/ipe-blobs.c | 95 ++ security/ipe/ipe-blobs.h | 18 + security/ipe/ipe-engine.c | 213 +++ security/ipe/ipe-engine.h | 49 + security/ipe/ipe-hooks.c | 169 +++ security/ipe/ipe-hooks.h | 70 + security/ipe/ipe-parse.c | 889 +++++++++++ security/ipe/ipe-parse.h | 17 + security/ipe/ipe-pin.c | 93 ++ security/ipe/ipe-pin.h | 36 + security/ipe/ipe-policy.c | 149 ++ security/ipe/ipe-policy.h | 69 + security/ipe/ipe-prop-internal.h | 49 + security/ipe/ipe-property.c | 143 ++ security/ipe/ipe-property.h | 100 ++ security/ipe/ipe-secfs.c | 1309 +++++++++++++++++ security/ipe/ipe-secfs.h | 14 + security/ipe/ipe.c | 115 ++ security/ipe/ipe.h | 22 + security/ipe/properties/Kconfig | 36 + security/ipe/properties/Makefile | 13 + security/ipe/properties/boot-verified.c | 82 ++ security/ipe/properties/dmverity-roothash.c | 153 ++ security/ipe/properties/dmverity-signature.c | 82 ++ security/ipe/properties/prop-entry.h | 38 + security/ipe/utility.h | 32 + security/security.c | 74 + 54 files changed, 5443 insertions(+), 98 deletions(-) create mode 100644 Documentation/admin-guide/LSM/ipe.rst create mode 100644 scripts/ipe/Makefile create mode 100644 scripts/ipe/polgen/.gitignore create mode 100644 scripts/ipe/polgen/Makefile create mode 100644 scripts/ipe/polgen/polgen.c create mode 100644 security/ipe/.gitignore create mode 100644 security/ipe/Kconfig create mode 100644 security/ipe/Makefile create mode 100644 security/ipe/ipe-audit.c create mode 100644 security/ipe/ipe-audit.h create mode 100644 security/ipe/ipe-blobs.c create mode 100644 security/ipe/ipe-blobs.h create mode 100644 security/ipe/ipe-engine.c create mode 100644 security/ipe/ipe-engine.h create mode 100644 security/ipe/ipe-hooks.c create mode 100644 security/ipe/ipe-hooks.h create mode 100644 security/ipe/ipe-parse.c create mode 100644 security/ipe/ipe-parse.h create mode 100644 security/ipe/ipe-pin.c create mode 100644 security/ipe/ipe-pin.h create mode 100644 security/ipe/ipe-policy.c create mode 100644 security/ipe/ipe-policy.h create mode 100644 security/ipe/ipe-prop-internal.h create mode 100644 security/ipe/ipe-property.c create mode 100644 security/ipe/ipe-property.h create mode 100644 security/ipe/ipe-secfs.c create mode 100644 security/ipe/ipe-secfs.h create mode 100644 security/ipe/ipe.c create mode 100644 security/ipe/ipe.h create mode 100644 security/ipe/properties/Kconfig create mode 100644 security/ipe/properties/Makefile create mode 100644 security/ipe/properties/boot-verified.c create mode 100644 security/ipe/properties/dmverity-roothash.c create mode 100644 security/ipe/properties/dmverity-signature.c create mode 100644 security/ipe/properties/prop-entry.h create mode 100644 security/ipe/utility.h -- 2.27.0

4 years, 10 months

10
60
0 / 0

[PATCH v19 00/23] LSM: Module stacking for AppArmor

by Casey Schaufler

This patchset provides the changes required for the AppArmor security module to stack safely with any other. v19: Rebase to 5.8-rc6 Incorporate feedback from v18 - Revert UDS SO_PEERSEC implementation to use lsmblobs directly, rather than allocating as needed. The correct treatment of out-of-memory conditions in the later case is difficult to define. (patch 0005) - Use a size_t in append_ctx() (patch 0021) - Fix a memory leak when creating compound contexts. (patch 0021) Fix build error when CONFIG_SECURITY isn't set (patch 0013) Fix build error when CONFIG_SECURITY isn't set (patch 0020) Fix build error when CONFIG_SECURITY isn't set (patch 0021) v18: Rebase to 5.8-rc3 Incorporate feedback from v17 - Null pointer checking in UDS (patch 0005) Match changes in IMA code (patch 0012) Fix the behavior of LSM context supplimental audit records so that there's always exactly one when it's appropriate for there to be one. This is a substantial change that requires extention of the audit_context beyond syscall events. (patch 0020) v17: Rebase to 5.7-rc4 v16: Rebase to 5.6 Incorporate feedback from v15 - Thanks Stephen, Mimi and Paul - Generally improve commit messages WRT scaffolding - Comment ima_lsm_isset() (patch 0002) - Some question may remain on IMA warning (patch 0002) - Mark lsm_slot as __lsm_ro_after_init not __init_data (patch 0002) - Change name of lsmblob variable in ima_match_rules() (patch 0003) - Instead of putting a struct lsmblob into the unix_skb_parms structure put a pointer to an allocated instance. There is currently only space for 5 u32's in unix_skb_parms and it is likely to get even tighter. Fortunately, the lifecycle management of the allocated lsmblob is simple. (patch 0005) - Dropped Acks due to the above change (patch 0005) - Improved commentary on secmark labeling scaffolding. (patch 0006) - Reduced secmark related labeling scaffolding. (patch 0006) - Replace use of the zeroth entry of an lsmblob in scaffolding with a function lsmblob_value() to hopefully make it less obscure. (patch 0006) - Convert security_secmark_relabel_packet to use lsmblob as this reduces much of the most contentious scaffolding. (patch 0006) - Dropped Acks due to the above change (patch 0006) - Added BUILD_BUG_ON() for CIPSO tag 6. (patch 0018) - Reworked audit subject information. Instead of adding fields in the middle of existing records add a new record to the event. When a separate record is required use subj="?". (patch 0020) - Dropped Acks due to the above change (patch 0020) - Reworked audit object information. Instead of adding fields in the middle of existing records add a new record to the event. When a separate record is required use obj="?". (patch 0021) - Dropped Acks due to the above change (patch 0021) - Enhanced documentation (patch 0022) - Removed unnecessary error code check in security_getprocattr() (patch 0021) v15: Rebase to 5.6-rc1 - Revise IMA data use (patch 0002) Incorporate feedback from v14 - Fix lockdown module registration naming (patch 0002) - Revise how /proc/self/attr/context is gathered. (patch 0022) - Revise access modes on /proc/self/attr/context. (patch 0022) - Revise documentation on LSM external interfaces. (patch 0022) v14: Rebase to 5.5-rc5 Incorporate feedback from v13 - Use an array of audit rules (patch 0002) - Significant change, removed Acks (patch 0002) - Remove unneeded include (patch 0013) - Use context.len correctly (patch 0015) - Reorder code to be more sensible (patch 0016) - Drop SO_PEERCONTEXT as it's not needed yet (patch 0023) v13: Rebase to 5.5-rc2 Incorporate feedback from v12 - Print lsmblob size with %z (Patch 0002) - Convert lockdown LSM initialization. (Patch 0002) - Restore error check in nft_secmark_compute_secid (Patch 0006) - Correct blob scaffolding in ima_must_appraise() (Patch 0009) - Make security_setprocattr() clearer (Patch 0013) - Use lsm_task_display more widely (Patch 0013) - Use passed size in lsmcontext_init() (Patch 0014) - Don't add a smack_release_secctx() hook (Patch 0014) - Don't print warning in security_release_secctx() (Patch 0014) - Don't duplicate the label in nfs4_label_init_security() (Patch 0016) - Remove reviewed-by as code has significant change (Patch 0016) - Send the entire lsmblob for Tag 6 (Patch 0019) - Fix description of socket_getpeersec_stream parameters (Patch 0023) - Retain LSMBLOB_FIRST. What was I thinking? (Patch 0023) - Add compound context to LSM documentation (Patch 0023) v12: Rebase to 5.5-rc1 Fixed a couple of incorrect contractions in the text. v11: Rebase to 5.4-rc6 Incorporate feedback from v10 - Disambiguate reading /proc/.../attr/display by restricting all use of the interface to the current process. - Fix a merge error in AppArmor's display attribute check v10: Ask the security modules if the display can be changed. v9: There is no version 9 v8: Incorporate feedback from v7 - Minor clean-up in display value management - refactor "compound" context creation to use a common append_ctx() function. v7: Incorporate feedback from v6 - Make setting the display a privileged operation. The availability of compound contexts reduces the need for setting the display. v6: Incorporate feedback from v5 - Add subj_<lsm>= and obj_<lsm>= fields to audit records - Add /proc/.../attr/context to get the full context in lsmname\0value\0... format as suggested by Simon McVittie - Add SO_PEERCONTEXT for getsockopt() to get the full context in the same format, also suggested by Simon McVittie. - Add /sys/kernel/security/lsm_display_default to provide the display default value. v5: Incorporate feedback from v4 - Initialize the lsmcontext in security_secid_to_secctx() - Clear the lsmcontext in all security_release_secctx() cases - Don't use the "display" on strictly internal context interfaces. - The SELinux binder hooks check for cases where the context "display" isn't compatible with SELinux. v4: Incorporate feedback from v3 - Mark new lsm_<blob>_alloc functions static - Replace the lsm and slot fields of the security_hook_list with a pointer to a LSM allocated lsm_id structure. The LSM identifies if it needs a slot explicitly. Use the lsm_id rather than make security_add_hooks return the slot value. - Validate slot values used in security.c - Reworked the "display" process attribute handling so that it works right and doesn't use goofy list processing. - fix display value check in dentry_init_security - Replace audit_log of secids with '?' instead of deleting the audit log v3: Incorporate feedback from v2 - Make lsmblob parameter and variable names more meaningful, changing "le" and "l" to "blob". - Improve consistency of constant naming. - Do more sanity checking during LSM initialization. - Be a bit clearer about what is temporary scaffolding. - Rather than clutter security_getpeersec_dgram with otherwise unnecessary checks remove the apparmor stub, which does nothing useful. Patch 0001 moves management of the sock security blob from the individual modules to the infrastructure. Patches 0002-0012 replace system use of a "secid" with a structure "lsmblob" containing information from the security modules to be held and reused later. At this point lsmblob contains an array of u32 secids, one "slot" for each of the security modules compiled into the kernel that used secids. A "slot" is allocated when a security module requests one. The infrastructure is changed to use the slot number to pass the correct secid to or from the security module hooks. It is important that the lsmblob be a fixed size entity that does not have to be allocated. Several of the places where it is used would have performance and/or locking issues with dynamic allocation. Patch 0013 provides a mechanism for a process to identify which security module's hooks should be used when displaying or converting a security context string. A new interface /proc/self/attr/display contains the name of the security module to show. Reading from this file will present the name of the module, while writing to it will set the value. Only names of active security modules are accepted. Internally, the name is translated to the appropriate "slot" number for the module which is then stored in the task security blob. Setting the display requires that all modules using the /proc interfaces allow the transition. The "display" of other processess can be neither read nor written. All suggested cases for reading the display of a different process have race conditions. Patch 0014 Starts the process of changing how a security context is represented. Since it is possible for a security context to have been generated by more than one security module it is now necessary to note which module created a security context so that the correct "release" hook can be called. There are several places where the module that created a security context cannot be inferred. This is achieved by introducing a "lsmcontext" structure which contains the context string, its length and the "slot" number of the security module that created it. The security_release_secctx() interface is changed, replacing the (string,len) pointer pair with a lsmcontext pointer. Patches 0015-0017 convert the security interfaces from (string,len) pointer pairs to a lsmcontext pointer. The slot number identifying the creating module is added by the infrastructure. Where the security context is stored for extended periods the data type is changed. The Netlabel code is converted to save lsmblob structures instead of secids in Patch 0018. Patch 0019 adds checks to the binder hooks which verify that if both ends of a transaction use the same "display". Patches 0020-0021 add addition audit records for subject and object LSM data when there are multiple security modules with such data. The AUDIT_MAC_TASK_CONTEXTS record is used in conjuction with a "subj=?" field to identify the subject data. The AUDIT_MAC_OBJ_CONTEXTS record is used in conjuction with a "obj=?" field to identify the object data. The AUDIT_MAC_TASK_CONTEXTS record identifies the security module with the data: "subj_selinux=xyz_t subj_apparmor=abc". The AUDIT_MAC_OBJ_CONTEXTS record identifies the security module with the data: "obj_selinux=xyz_t obj_apparmor=abc". While AUDIT_MAC_TASK_CONTEXTS records will always contain an entry for each possible security modules, AUDIT_MAC_OBJ_CONTEXTS records will only contain entries for security modules for which the object in question has data. Patch 0022 adds a new interfaces for getting the compound security contexts. /proc/self/attr/context Finally, with all interference on the AppArmor hooks removed, Patch 0023 removes the exclusive bit from AppArmor. An unnecessary stub hook was also removed. The Ubuntu project is using an earlier version of this patchset in their distribution to enable stacking for containers. Performance measurements to date have the change within the "noise". The sockperf and dbench results are on the order of 0.2% to 0.8% difference, with better performance being as common as worse. The benchmarks were run with AppArmor and Smack on Ubuntu. https://github.com/cschaufler/lsm-stacking.git#stack-5.8-rc6-a-v19 Signed-off-by: Casey Schaufler <casey(a)schaufler-ca.com> ---

4 years, 11 months

4
45
0 / 0

httpd auid = -1

by Todd Heberlein

I’ve noticed that the httpd process on a CentOS 7.7 system I am working with is running with an Audit ID of -1. Example ID values are: auid=4294967295 uid=48 gid=48 ... So if use the standard filter "-F auid!=-1” in the audit rules I do not see httpd activity. Is this common? How do I change the auid to something else, so I can capture the httpd activity in the audit log? Example audit line: type=SYSCALL msg=audit(1596065566.721:31357): arch=c000003e syscall=2 success=yes exit=15 a0=55a0a2d9b3c0 a1=80000 a2=0 a3=7ffe5d4d6720 items=1 ppid=1130 pid=1253 auid=4294967295 uid=48 gid=48 euid=48 suid=48 fsuid=48 egid=48 sgid=48 fsgid=48 tty=(none) ses=4294967295 comm="httpd" exe="/usr/sbin/httpd" key=(null) Thanks, Todd

4 years, 11 months

2
3
0 / 0

[RFC PATCH v6 00/11] Integrity Policy Enforcement LSM (IPE)

by Deven Bowers

Overview: ------------------------------------ IPE is a Linux Security Module which allows for a configurable policy to enforce integrity requirements on the whole system. It attempts to solve the issue of Code Integrity: that any code being executed (or files being read), are identical to the version that was built by a trusted source. The type of system for which IPE is designed for use is an embedded device with a specific purpose (e.g. network firewall device in a data center), where all software and configuration is built and provisioned by the owner. Specifically, a system which leverages IPE is not intended for general purpose computing and does not utilize any software or configuration built by a third party. An ideal system to leverage IPE has both mutable and immutable components, however, all binary executable code is immutable. The scope of IPE is constrained to the OS. It is assumed that platform firmware verifies the the kernel and optionally the root filesystem (e.g. via U-Boot verified boot). IPE then utilizes LSM hooks to enforce a flexible, kernel-resident integrity verification policy. IPE differs from other LSMs which provide integrity checking (for instance, IMA), as it has no dependency on the filesystem metadata itself. The attributes that IPE checks are deterministic properties that exist solely in the kernel. Additionally, IPE provides no additional mechanisms of verifying these files (e.g. IMA Signatures) - all of the attributes of verifying files are existing features within the kernel, such as dm-verity or fsverity. IPE provides a policy that allows owners of the system to easily specify integrity requirements and uses dm-verity signatures to simplify the authentication of allowed objects like authorized code and data. IPE supports two modes, permissive (similar to SELinux's permissive mode) and enforce. Permissive mode performs the same checks, and logs policy violations as enforce mode, but will not enforce the policy. This allows users to test policies before enforcing them. The default mode is enforce, and can be changed via the kernel commandline parameter `ipe.enforce=(0|1)`, or the securityfs node `/sys/kernel/security/ipe/enforce`. The ability to switch modes can be compiled out of the LSM via setting the config CONFIG_SECURITY_IPE_PERMISSIVE_SWITCH to N. IPE additionally supports success auditing. When enabled, all events that pass IPE policy and are not blocked will emit an audit event. This is disabled by default, and can be enabled via the kernel commandline `ipe.success_audit=(0|1)` or the securityfs node `/sys/kernel/security/ipe/success_audit`. Policies can be staged at runtime through securityfs and activated through sysfs. Please see the Deploying Policies section of this cover letter for more information. The IPE LSM is compiled under CONFIG_SECURITY_IPE. Policy: ------------------------------------ IPE policy is designed to be both forward compatible and backwards compatible. There is one required line, at the top of the policy, indicating the policy name, and the policy version, for instance: policy_name="Ex Policy" policy_version=0.0.0 The policy version indicates the current version of the policy (NOT the policy syntax version). This is used to prevent roll-back of policy to potentially insecure previous versions of the policy. The next portion of IPE policy, are rules. Rules are formed by key=value pairs, known as properties. IPE rules require two properties: "action", which determines what IPE does when it encounters a match against the policy, and "op", which determines when that rule should be evaluated. Thus, a minimal rule is: op=EXECUTE action=ALLOW This example will allow any execution. Additional properties are used to restrict attributes about the files being evaluated. These properties are intended to be deterministic attributes that are resident in the kernel. Available properties for IPE described in the properties section of this cover-letter, the repository available in Appendix A, and the kernel documentation page. Order does not matter for the rule's properties - they can be listed in any order, however it is encouraged to have the "op" property be first, and the "action" property be last, for readability. Additionally, rules are evaluated top-to-bottom. As a result, any revocation rules, or denies should be placed early in the file to ensure that these rules are evaluated before a rule with "action=ALLOW" is hit. Any unknown syntax in IPE policy will result in a fatal error to parse the policy. User mode can interrogate the kernel to understand what properties and the associated versions through the securityfs node, $securityfs/ipe/property_config, which will return a string of form: key1=version1 key2=version2 . . . keyN=versionN User-mode should correlate these versions with the supported values identified in the documentation to determine whether a policy should be accepted by the system. Additionally, a DEFAULT operation must be set for all understood operations within IPE. For policies to remain completely forwards compatible, it is recommended that users add a "DEFAULT action=ALLOW" and override the defaults on a per-operation basis. For more information about the policy syntax, please see Appendix A or the kernel documentation page. Early Usermode Protection: -------------------------- IPE can be provided with a policy at startup to load and enforce. This is intended to be a minimal policy to get the system to a state where userland is setup and ready to receive commands, at which point a policy can be deployed via securityfs. This "boot policy" can be specified via the config, SECURITY_IPE_BOOT_POLICY, which accepts a path to a plain-text version of the IPE policy to apply. This policy will be compiled into the kernel. If not specified, IPE will be disabled until a policy is deployed and activated through the method above. Policy Examples: ------------------------------------ Allow all: policy_name="Allow All" policy_version=0.0.0 DEFAULT action=ALLOW Allow only initial superblock: policy_name="Allow All Initial SB" policy_version=0.0.0 DEFAULT action=DENY op=EXECUTE boot_verified=TRUE action=ALLOW Allow any signed dm-verity volume and the initial superblock: policy_name="AllowSignedAndInitial" policy_version=0.0.0 DEFAULT action=DENY op=EXECUTE boot_verified=TRUE action=ALLOW op=EXECUTE dmverity_signature=TRUE action=ALLOW Prohibit execution from a specific dm-verity volume: policy_name="AllowSignedAndInitial" policy_version=0.0.0 DEFAULT action=DENY op=EXECUTE dmverity_roothash=401fcec5944823ae12f62726e8184407a5fa9599783f030dec146938 action=DENY op=EXECUTE boot_verified=TRUE action=ALLOW op=EXECUTE dmverity_signature=TRUE action=ALLOW Allow only a specific dm-verity volume: policy_name="AllowSignedAndInitial" policy_version=0.0.0 DEFAULT action=DENY op=EXECUTE dmverity_roothash=401fcec5944823ae12f62726e8184407a5fa9599783f030dec146938 action=ALLOW Deploying Policies: ------------------- Deploying policies is simple. First sign a plain text policy, with a certificate that is present in the SYSTEM_TRUSTED_KEYRING of your test machine. Through openssl, the signing can be done via: openssl smime -sign -in "$MY_POLICY" -signer "$MY_CERTIFICATE" \ -inkey "$MY_PRIVATE_KEY" -binary -outform der -noattr -nodetach \ -out "$MY_POLICY.p7s" Then, simply cat the file into the IPE's "new_policy" securityfs node: cat "$MY_POLICY.p7s" > /sys/kernel/security/ipe/new_policy The policy should now be present under the policies/ subdirectory, under its "policy_name" attribute. The policy is now present in the kernel and can be marked as active, via the sysctl "ipe.active_policy": echo -n 1 > "/sys/kernel/security/ipe/$MY_POLICY_NAME/active" This will now mark the policy as active and the system will be enforcing $MY_POLICY_NAME. At any point the policy can be updated on the provision that the policy version to be deployed is greater than or equal to the running version (to prevent roll-back attacks). This update can be done by redirecting the file into the policy's "raw" node, under the policies subdirectory: cat "$MY_UPDATED_POLICY.p7s" > \ "/sys/kernel/security/ipe/policies/$MY_POLICY_NAME/raw" Additionally, policies can be deleted via the "del_policy" securityfs node. Simply write the name of the policy to be deleted to that node: echo -n 1 > "/sys/kernel/security/ipe/policies/$MY_POLICY_NAME/delete" There are two requirements to delete policies: 1. The policy being deleted must not be the active policy. 2. The policy being deleted must not be the boot policy. It's important to know above that the "echo" command will add a newline to the end of the input, and this will be considered as part of the filename. You can remove the newline via the -n parameter. NOTE: If a MAC LSM is enabled, the securityfs commands will require CAP_MAC_ADMIN. This is due to sysfs supporting fine-grained MAC attributes, while securityfs at the current moment does not. Properties: ------------------------------------ This initial patchset introducing IPE adds three properties: 'boot_verified', 'dmverity_signature' and 'dmverity_roothash'. boot_verified (CONFIG_IPE_BOOT_PROP): This property can be utilized for authorization of the first super-block that is mounted on the system, where IPE attempts to evaluate a file. Typically this is used for systems with an initramfs or other initial disk, where this is unmounted before the system becomes available, and is not covered by any other property. The format of this property is: boot_verified=(TRUE|FALSE) WARNING: This property will trust any disk where the first IPE evaluation occurs. If you do not have a startup disk that is unpacked and unmounted (like initramfs), then it will automatically trust the root filesystem and potentially overauthorize the entire disk. dmverity_roothash (CONFIG_IPE_DM_VERITY_ROOTHASH): This property can be utilized for authorization or revocation of specific dmverity volumes, identified via root hash. It has a dependency on the DM_VERITY module. The format of this property is: dmverity_roothash=<HashHexDigest> dmverity_signature (CONFIG_IPE_DM_VERITY_SIGNATURE): This property can be utilized for authorization of all dm-verity volumes that have a signed roothash that chains to the system trusted keyring. It has a dependency on the DM_VERITY_VERIFY_ROOTHASH_SIG config. The format of this property is: dmverity_signature=(TRUE|FALSE) Testing: ------------------------------------ A test suite is available (Appendix B) for ease of use. For manual instructions: Enable IPE through the following Kconfigs: CONFIG_SECURITY_IPE=y CONFIG_SECURITY_IPE_BOOT_POLICY="../AllowAllInitialSB.pol" CONFIG_SECURITY_IPE_PERMISSIVE_SWITCH=y CONFIG_IPE_BOOT_PROP=y CONFIG_IPE_DM_VERITY_ROOTHASH=y CONFIG_IPE_DM_VERITY_SIGNATURE=y CONFIG_DM_VERITY=y CONFIG_DM_VERITY_VERIFY_ROOTHASH_SIG=y CONFIG_SYSTEM_TRUSTED_KEYRING=y CONFIG_SYSTEM_TRUSTED_KEYS="/path/to/my/cert/list.pem" Start a test system, that boots directly from the filesystem, without an initrd. I recommend testing in permissive mode until all tests pass, then switch to enforce to ensure behavior remains identical. boot_verified: If booted correctly, the filesystem mounted on / should be marked as boot_verified. Verify by turning on success auditing (sysctl ipe.success_audit=1), and run a binary. In the audit output, `prop_boot_verified` should be `TRUE`. To test denials, mount a temporary filesystem (mount -t tmpfs -o size=4M tmp tmp), and copy a binary (e.g. ls) to this new filesystem. Disable success auditing and attempt to run the file. The file should have an audit event, but be allowed to execute in permissive mode, and prop_boot_verified should be FALSE. dmverity_roothash: First, you must create a dm-verity volume. This can be done through squashfs-tools and veritysetup (provided by cryptsetup). Creating a squashfs volume: mksquashfs /path/to/directory/with/executable /path/to/output.squashfs Format the volume for use with dm-verity & save the root hash: output_rh=$(veritysetup format output.squashfs output.hashtree | \ tee verity_out.txt | awk "/Root hash/" | \ sed -E "s/Root hash:\s+//g") echo -n $output_rh > output.roothash Create a two policies, filling in the appropriate fields below: Policy 1: policy_name="roothash-denial" policy_version=0.0.0 DEFAULT action=ALLOW op=EXECUTE dmverity_roothash=$output_rh action=DENY Policy 2: policy_name="roothash-allow" policy_version=0.0.0 DEFAULT action=ALLOW DEFAULT op=EXECUTE action=DENY op=EXECUTE boot_verified=TRUE action=ALLOW op=EXECUTE dmverity_roothash=$output_rh action=ALLOW Deploy each policy, then mark the first, "roothash-denial" as active, per the "Deploying Policies" section of this cover letter. Mount the dm-verity volume: veritysetup open output.squashfs output.hashtree unverified \ `cat output.roothash` mount /dev/mapper/unverified /my/mount/point Attempt to execute a binary in the mount point, and it should emit an audit event for a match against the rule: op=EXECUTE dmverity_roothash=$output_rh action=DENY To test the second policy, perform the same steps, but this time, enable success auditing before running the executable. The success audit event should be a match against this rule: op=EXECUTE dmverity_roothash=$output_rh action=ALLOW dmverity_signature: Follow the setup steps for dmverity_roothash. Sign the roothash via: openssl smime -sign -in "output.roothash" -signer "$MY_CERTIFICATE" \ -inkey "$MY_PRIVATE_KEY" -binary -outform der -noattr \ -out "output.p7s" Create a policy: policy_name="verified" policy_version=0.0.0 DEFAULT action=DENY op=EXECUTE boot_verified=TRUE action=ALLOW op=EXECUTE dmverity_verified=TRUE action=ALLOW Deploy the policy, and mark as active, per the "Deploying Policies" section of this cover letter. Mount the dm-verity volume with verification: veritysetup open output.squashfs output.hashtree unverified \ `cat output.roothash` --root-hash-signature=output.p7s mount /dev/mapper/unverified /my/mount/point NOTE: The --root-hash-signature option was introduced in veritysetup 2.3.0 Turn on success auditing and attempt to execute a binary in the mount point, and it should emit an audit event for a match against the rule: op=EXECUTE dmverity_verified=TRUE action=ALLOW To test denials, mount the dm-verity volume the same way as the "dmverity_roothash" section, and attempt to execute a binary. Failure should occur. Documentation: ------------------------------------ Full documentation is available on github in IPE's master repository (Appendix A). This is intended to be an exhaustive source of documentation around IPE. Additionally, there is higher level documentation in the admin-guide. Technical diagrams are available here: http://microsoft.github.io/ipe/technical/diagrams/ Known Gaps: ------------------------------------ IPE has two known gaps: 1. IPE cannot verify the integrity of anonymous executable memory, such as the trampolines created by gcc closures and libffi, or JIT'd code. Unfortunately, as this is dynamically generated code, there is no way for IPE to detect that this code has not been tampered with in transition from where it was built, to where it is running. As a result, IPE is incapable of tackling this problem for dynamically generated code. However, there is a patch series being prepared that addresses this problem for libffi and gcc closures by implemeting a safer kernel trampoline API. 2. IPE cannot verify the integrity of interpreted languages' programs when these scripts invoked via `<interpreter> <file>`. This is because the way interpreters execute these files, the scripts themselves are not evaluated as executable code through one of IPE's hooks. Interpreters can be enlightened to the usage of IPE by trying to mmap a file into executable memory (+X), after opening the file and responding to the error code appropriately. This also applies to included files, or high value files, such as configuration files of critical system components. This specific gap is planned on being addressed within IPE. For more information on how we plan to address this gap, please see the Future Development section, below. Future Development: ------------------------------------ Support for filtering signatures by specific certificates. In this case, our "dmverity_signature" (or a separate property) can be set to a specific certificate declared in IPE's policy, allowing for more controlled use-cases determine by a user's PKI structure. Support for integrity verification for general file reads. This addresses the script interpreter issue indicated in the "Known Gaps" section, as these script files are typically opened with O_RDONLY. We are evaluating whether to do this by comparing the original userland filepath passed into the open syscall, thereby allowing existing callers to take advantage without any code changes; the alternate design is to extend the new openat2(2) syscall, with an new flag, tentatively called "O_VERIFY". While the second option requires a code change for all the interpreters, frameworks and languages that wish to leverage it, it is a wholly cleaner implementation in the kernel. For interpreters specifically, the O_MAYEXEC patch series published by Mickaël Salaün[1] is a similar implementation to the O_VERIFY idea described above. Onboarding IPE's test suite to KernelCI. Currently we are developing a test suite in the same vein as SELinux's test suite. Once development of the test suite is complete, and provided IPE is accepted, we intend to onboard this test suite onto KernelCI. Hardened resistance against roll-back attacks. Currently there exists a window of opportunity between user-mode setup and the user-policy being deployed, where a prior user-policy can be loaded, that is potentially insecure. However, with a kernel update, you can revise the boot policy's version to be the same version as the latest policy, closing this window. In the future, I would like to close this window of opportunity without a kernel update, using some persistent storage mechanism. Open Issues: ------------ For linux-audit/integrity folks: 1. Introduction of new audit definitions in the kernel integrity range - is this preferred, as opposed to reusing definitions with existing IMA definitions? TODOs: ------ linux-audit changes to support the new audit events. Appendix: ------------------------------------ A. IPE Github Repository: https://github.com/microsoft/ipe Hosted Documentation: https://microsoft.github.io/ipe B. IPE Users' Guide: Documentation/admin-guide/LSM/ipe.rst C. IPE Test Suite: *TBA* (under development) References: ------------------------------------ 1. https://lore.kernel.org/linux-integrity/20200505153156.925111-1-mic@digik... Changelog: ------------------------------------ v1: Introduced v2: Split the second patch of the previous series into two. Minor corrections in the cover-letter and documentation comments regarding CAP_MAC_ADMIN checks in IPE. v3: Address various comments by Jann Horn. Highlights: Switch various audit allocators to GFP_KERNEL. Utilize rcu_access_pointer() in various locations. Strip out the caching system for properties Strip comments from headers Move functions around in patches Remove kernel command line parameters Reconcile the race condition on the delete node for policy by expanding the policy critical section. Address a few comments by Jonathan Corbet around the documentation pages for IPE. Fix an issue with the initialization of IPE policy with a "-0" version, caused by not initializing the hlist entries before freeing. v4: Address a concern around IPE's behavior with unknown syntax. Specifically, make any unknown syntax a fatal error instead of a warning, as suggested by Mickaël Salaün. Introduce a new securityfs node, $securityfs/ipe/property_config, which provides a listing of what properties are enabled by the kernel and their versions. This allows usermode to predict what policies should be allowed. Strip some comments from c files that I missed. Clarify some documentation comments around 'boot_verified'. While this currently does not functionally change the property itself, the distinction is important when IPE can enforce verified reads. Additionally, 'KERNEL_READ' was omitted from the documentation. This has been corrected. Change SecurityFS and SHA1 to a reverse dependency. Update the cover-letter with the updated behavior of unknown syntax. Remove all sysctls, making an equivalent function in securityfs. Rework the active/delete mechanism to be a node under the policy in $securityfs/ipe/policies. The kernel command line parameters ipe.enforce and ipe.success_audit have returned as this functionality is no longer exposed through sysfs. v5: Correct some grammatical errors reported by Randy Dunlap. Fix some warnings reported by kernel test bot. Change convention around security_bdev_setsecurity. -ENOSYS is now expected if an LSM does not implement a particular @name, as suggested by Casey Schaufler. Minor string corrections related to the move from sysfs to securityfs Correct a spelling of an #ifdef for the permissive argument. Add the kernel parameters re-added to the documentation. Fix a minor bug where the mode being audited on permissive switch was the original mode, not the mode being swapped to. Cleanup doc comments, fix some whitespace alignment issues. v6: Change if statement condition in security_bdev_setsecurity to be more concise, as suggested by Casey Schaufler and Al Viro Drop the 6th patch in the series, "dm-verity move signature check..." due to numerous issues, and it ultimately providing no real value. Fix the patch tree - the previous iteration appears to have been in a torn state (patches 8+9 were merged). This has since been corrected. Deven Bowers (11): scripts: add ipe tooling to generate boot policy security: add ipe lsm evaluation loop and audit system security: add ipe lsm policy parser and policy loading ipe: add property for trust of boot volume fs: add security blob and hooks for block_device dm-verity: add bdev_setsecurity hook for dm-verity signature ipe: add property for signed dmverity volumes dm-verity: add bdev_setsecurity hook for root-hash ipe: add property for dmverity roothash documentation: add ipe documentation cleanup: uapi/linux/audit.h Documentation/admin-guide/LSM/index.rst | 1 + Documentation/admin-guide/LSM/ipe.rst | 508 +++++++ .../admin-guide/kernel-parameters.txt | 12 + MAINTAINERS | 8 + drivers/md/dm-verity-target.c | 10 +- drivers/md/dm-verity-verify-sig.c | 14 +- drivers/md/dm-verity-verify-sig.h | 10 +- fs/block_dev.c | 8 + include/linux/device-mapper.h | 3 + include/linux/fs.h | 1 + include/linux/lsm_hook_defs.h | 5 + include/linux/lsm_hooks.h | 12 + include/linux/security.h | 22 + include/uapi/linux/audit.h | 36 +- scripts/Makefile | 1 + scripts/ipe/Makefile | 2 + scripts/ipe/polgen/.gitignore | 1 + scripts/ipe/polgen/Makefile | 7 + scripts/ipe/polgen/polgen.c | 136 ++ security/Kconfig | 12 +- security/Makefile | 2 + security/ipe/.gitignore | 2 + security/ipe/Kconfig | 48 + security/ipe/Makefile | 33 + security/ipe/ipe-audit.c | 303 ++++ security/ipe/ipe-audit.h | 24 + security/ipe/ipe-blobs.c | 95 ++ security/ipe/ipe-blobs.h | 18 + security/ipe/ipe-engine.c | 213 +++ security/ipe/ipe-engine.h | 49 + security/ipe/ipe-hooks.c | 169 +++ security/ipe/ipe-hooks.h | 70 + security/ipe/ipe-parse.c | 889 +++++++++++ security/ipe/ipe-parse.h | 17 + security/ipe/ipe-pin.c | 93 ++ security/ipe/ipe-pin.h | 36 + security/ipe/ipe-policy.c | 149 ++ security/ipe/ipe-policy.h | 69 + security/ipe/ipe-prop-internal.h | 49 + security/ipe/ipe-property.c | 143 ++ security/ipe/ipe-property.h | 100 ++ security/ipe/ipe-secfs.c | 1309 +++++++++++++++++ security/ipe/ipe-secfs.h | 14 + security/ipe/ipe.c | 115 ++ security/ipe/ipe.h | 22 + security/ipe/properties/Kconfig | 36 + security/ipe/properties/Makefile | 13 + security/ipe/properties/boot-verified.c | 82 ++ security/ipe/properties/dmverity-roothash.c | 153 ++ security/ipe/properties/dmverity-signature.c | 82 ++ security/ipe/properties/prop-entry.h | 38 + security/ipe/utility.h | 32 + security/security.c | 70 + 53 files changed, 5316 insertions(+), 30 deletions(-) create mode 100644 Documentation/admin-guide/LSM/ipe.rst create mode 100644 scripts/ipe/Makefile create mode 100644 scripts/ipe/polgen/.gitignore create mode 100644 scripts/ipe/polgen/Makefile create mode 100644 scripts/ipe/polgen/polgen.c create mode 100644 security/ipe/.gitignore create mode 100644 security/ipe/Kconfig create mode 100644 security/ipe/Makefile create mode 100644 security/ipe/ipe-audit.c create mode 100644 security/ipe/ipe-audit.h create mode 100644 security/ipe/ipe-blobs.c create mode 100644 security/ipe/ipe-blobs.h create mode 100644 security/ipe/ipe-engine.c create mode 100644 security/ipe/ipe-engine.h create mode 100644 security/ipe/ipe-hooks.c create mode 100644 security/ipe/ipe-hooks.h create mode 100644 security/ipe/ipe-parse.c create mode 100644 security/ipe/ipe-parse.h create mode 100644 security/ipe/ipe-pin.c create mode 100644 security/ipe/ipe-pin.h create mode 100644 security/ipe/ipe-policy.c create mode 100644 security/ipe/ipe-policy.h create mode 100644 security/ipe/ipe-prop-internal.h create mode 100644 security/ipe/ipe-property.c create mode 100644 security/ipe/ipe-property.h create mode 100644 security/ipe/ipe-secfs.c create mode 100644 security/ipe/ipe-secfs.h create mode 100644 security/ipe/ipe.c create mode 100644 security/ipe/ipe.h create mode 100644 security/ipe/properties/Kconfig create mode 100644 security/ipe/properties/Makefile create mode 100644 security/ipe/properties/boot-verified.c create mode 100644 security/ipe/properties/dmverity-roothash.c create mode 100644 security/ipe/properties/dmverity-signature.c create mode 100644 security/ipe/properties/prop-entry.h create mode 100644 security/ipe/utility.h -- 2.27.0

4 years, 11 months

1
11
0 / 0

[GIT PULL] Audit fixes for v5.8 (#1)

by Paul Moore

Hi Linus, One small audit fix that you can hopefully merge before v5.8 is released. Unfortunately it is a revert of a patch that went in during the v5.7 window and we just recently started to see some bug reports relating to that commit. We are working on a proper fix, but I'm not yet clear on when that will be ready and we need to fix the v5.7 kernels anyway, so in the interest of time a revert seemed like the best solution right now. The patch passes our test suite, and as of right now it merges cleanly against your tree. You may notice a force-push on the audit/stable-5.8 branch, but that was to fix a spelling mistake in the commit that was identified after the patch had been committed. Generally I try to avoid force-pushes, but since no one really uses the audit/stable-X.Y branches as a base for development it seemed safe. Thanks, -Paul -- The following changes since commit 9d44a121c5a79bc8a9d67c058456bd52a83c79e7: audit: add subj creds to NETFILTER_CFG record to (2020-05-20 18:09:19 -0400) are available in the Git repository at: git://git.kernel.org/pub/scm/linux/kernel/git/pcmoore/audit.git tags/audit-pr-20200729 for you to fetch changes up to 8ac68dc455d9d18241d44b96800d73229029ed34: revert: 1320a4052ea1 ("audit: trigger accompanying records when no rules present") (2020-07-29 10:00:36 -0400) ---------------------------------------------------------------- audit/stable-5.8 PR 20200729 ---------------------------------------------------------------- Paul Moore (1): revert: 1320a4052ea1 ("audit: trigger accompanying records when no rules present") kernel/audit.c | 1 - kernel/audit.h | 8 -------- kernel/auditsc.c | 3 +++ 3 files changed, 3 insertions(+), 9 deletions(-) -- paul moore www.paul-moore.com

4 years, 11 months

2
1
0 / 0

[PATCH V3fix ghak120] audit: initialize context values in case of mandatory events

by Richard Guy Briggs

Issue ghak120 enabled syscall records to accompany required records when no rules are present to trigger the storage of syscall context. A reported issue showed that the cwd was not always initialized. That issue was already resolved, but a review of all other records that could be triggered at the time of a syscall record revealed other potential values that could be missing or misleading. Initialize them. The fds array is reset to -1 after the first syscall to indicate it isn't valid any more, but was never set to -1 when the context was allocated to indicate it wasn't yet valid. The audit_inode* functions can be called without going through getname_flags() or getname_kernel() that sets audit_names and cwd, so set the cwd if it has not already been done so due to audit_names being valid. The LSM dump_common_audit_data() LSM_AUDIT_DATA_NET:AF_UNIX case was missed with the ghak96 patch, so add that case here. Please see issue https://github.com/linux-audit/audit-kernel/issues/120 Please see issue https://github.com/linux-audit/audit-kernel/issues/96 Passes audit-testsuite. Signed-off-by: Richard Guy Briggs <rgb(a)redhat.com> --- kernel/auditsc.c | 3 +++ security/lsm_audit.c | 1 + 2 files changed, 4 insertions(+) diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 6884b50069d1..2f97618e6a34 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -929,6 +929,7 @@ static inline struct audit_context *audit_alloc_context(enum audit_state state) context->prio = state == AUDIT_RECORD_CONTEXT ? ~0ULL : 0; INIT_LIST_HEAD(&context->killed_trees); INIT_LIST_HEAD(&context->names_list); + context->fds[0] = -1; return context; } @@ -2076,6 +2077,7 @@ void __audit_inode(struct filename *name, const struct dentry *dentry, } handle_path(dentry); audit_copy_inode(n, dentry, inode, flags & AUDIT_INODE_NOEVAL); + _audit_getcwd(context); } void __audit_file(const struct file *file) @@ -2194,6 +2196,7 @@ void __audit_inode_child(struct inode *parent, audit_copy_inode(found_child, dentry, inode, 0); else found_child->ino = AUDIT_INO_UNSET; + _audit_getcwd(context); } EXPORT_SYMBOL_GPL(__audit_inode_child); diff --git a/security/lsm_audit.c b/security/lsm_audit.c index 53d0d183db8f..e93077612246 100644 --- a/security/lsm_audit.c +++ b/security/lsm_audit.c @@ -369,6 +369,7 @@ static void dump_common_audit_data(struct audit_buffer *ab, audit_log_untrustedstring(ab, p); else audit_log_n_hex(ab, p, len); + audit_getcwd(); break; } } -- 1.8.3.1

4 years, 11 months

2
5
0 / 0

[PATCH] revert: 1320a4052ea1 ("audit: trigger accompanying records when no rules present")

by Paul Moore

Unfortunately the commit listed in the subject line above failed to ensure that the task's audit_context was properly initialized/set before enabling the "accompanying records". Depending on the sitation, the resulting audit_context could have invalid values in some of it's fields which could cause a kernel panic/oops when the task/syscall exists and the audit records are generated. We will revisit the original patch, with the necessary fixes, in a future kernel but right now we just want to fix the kernel panic with the least amount of added risk. Cc: stable(a)vger.kernel.org Fixes: 1320a4052ea1 ("audit: trigger accompanying records when no rules present") Reported-by: j2468h(a)googlemail.com Signed-off-by: Paul Moore <paul(a)paul-moore.com> --- kernel/audit.c | 1 - kernel/audit.h | 8 -------- kernel/auditsc.c | 3 +++ 3 files changed, 3 insertions(+), 9 deletions(-) diff --git a/kernel/audit.c b/kernel/audit.c index e33460e01b3b..9bf2b08b051f 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -1848,7 +1848,6 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask, } audit_get_stamp(ab->ctx, &t, &serial); - audit_clear_dummy(ab->ctx); audit_log_format(ab, "audit(%llu.%03lu:%u): ", (unsigned long long)t.tv_sec, t.tv_nsec/1000000, serial); diff --git a/kernel/audit.h b/kernel/audit.h index f0233dc40b17..ddc22878433d 100644 --- a/kernel/audit.h +++ b/kernel/audit.h @@ -290,13 +290,6 @@ extern int audit_signal_info_syscall(struct task_struct *t); extern void audit_filter_inodes(struct task_struct *tsk, struct audit_context *ctx); extern struct list_head *audit_killed_trees(void); - -static inline void audit_clear_dummy(struct audit_context *ctx) -{ - if (ctx) - ctx->dummy = 0; -} - #else /* CONFIG_AUDITSYSCALL */ #define auditsc_get_stamp(c, t, s) 0 #define audit_put_watch(w) {} @@ -330,7 +323,6 @@ static inline int audit_signal_info_syscall(struct task_struct *t) } #define audit_filter_inodes(t, c) AUDIT_DISABLED -#define audit_clear_dummy(c) {} #endif /* CONFIG_AUDITSYSCALL */ extern char *audit_unpack_string(void **bufp, size_t *remain, size_t len); diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 468a23390457..fd840c40abf7 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -1417,6 +1417,9 @@ static void audit_log_proctitle(void) struct audit_context *context = audit_context(); struct audit_buffer *ab; + if (!context || context->dummy) + return; + ab = audit_log_start(context, GFP_KERNEL, AUDIT_PROCTITLE); if (!ab) return; /* audit_panic or being filtered */

4 years, 11 months

1
1
0 / 0

2025

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

2010

2009

2008

2007

2006

2005

2004

Linux-audit July 2020