Contributors: 7
Author Tokens Token Proportion Commits Commit Proportion
Andrii Nakryiko 2910 96.52% 7 46.67%
Jiri Olsa 59 1.96% 1 6.67%
Song Liu 15 0.50% 1 6.67%
Eduard Zingerman 15 0.50% 1 6.67%
Wang Nan 8 0.27% 3 20.00%
Joe Stringer 7 0.23% 1 6.67%
Alexei Starovoitov 1 0.03% 1 6.67%
Total 3015 15


// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
#include <linux/kernel.h>
#include <linux/filter.h>
#include "bpf.h"
#include "libbpf.h"
#include "libbpf_common.h"
#include "libbpf_internal.h"
#include "str_error.h"

static inline __u64 ptr_to_u64(const void *ptr)
{
	return (__u64)(unsigned long)ptr;
}

int probe_fd(int fd)
{
	if (fd >= 0)
		close(fd);
	return fd >= 0;
}

static int probe_kern_prog_name(int token_fd)
{
	const size_t attr_sz = offsetofend(union bpf_attr, prog_name);
	struct bpf_insn insns[] = {
		BPF_MOV64_IMM(BPF_REG_0, 0),
		BPF_EXIT_INSN(),
	};
	union bpf_attr attr;
	int ret;

	memset(&attr, 0, attr_sz);
	attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
	attr.license = ptr_to_u64("GPL");
	attr.insns = ptr_to_u64(insns);
	attr.insn_cnt = (__u32)ARRAY_SIZE(insns);
	attr.prog_token_fd = token_fd;
	if (token_fd)
		attr.prog_flags |= BPF_F_TOKEN_FD;
	libbpf_strlcpy(attr.prog_name, "libbpf_nametest", sizeof(attr.prog_name));

	/* make sure loading with name works */
	ret = sys_bpf_prog_load(&attr, attr_sz, PROG_LOAD_ATTEMPTS);
	return probe_fd(ret);
}

static int probe_kern_global_data(int token_fd)
{
	char *cp, errmsg[STRERR_BUFSIZE];
	struct bpf_insn insns[] = {
		BPF_LD_MAP_VALUE(BPF_REG_1, 0, 16),
		BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42),
		BPF_MOV64_IMM(BPF_REG_0, 0),
		BPF_EXIT_INSN(),
	};
	LIBBPF_OPTS(bpf_map_create_opts, map_opts,
		.token_fd = token_fd,
		.map_flags = token_fd ? BPF_F_TOKEN_FD : 0,
	);
	LIBBPF_OPTS(bpf_prog_load_opts, prog_opts,
		.token_fd = token_fd,
		.prog_flags = token_fd ? BPF_F_TOKEN_FD : 0,
	);
	int ret, map, insn_cnt = ARRAY_SIZE(insns);

	map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_global", sizeof(int), 32, 1, &map_opts);
	if (map < 0) {
		ret = -errno;
		cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
		pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n",
			__func__, cp, -ret);
		return ret;
	}

	insns[0].imm = map;

	ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, &prog_opts);
	close(map);
	return probe_fd(ret);
}

static int probe_kern_btf(int token_fd)
{
	static const char strs[] = "\0int";
	__u32 types[] = {
		/* int */
		BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),
	};

	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
					     strs, sizeof(strs), token_fd));
}

static int probe_kern_btf_func(int token_fd)
{
	static const char strs[] = "\0int\0x\0a";
	/* void x(int a) {} */
	__u32 types[] = {
		/* int */
		BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
		/* FUNC_PROTO */                                /* [2] */
		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0),
		BTF_PARAM_ENC(7, 1),
		/* FUNC x */                                    /* [3] */
		BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), 2),
	};

	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
					     strs, sizeof(strs), token_fd));
}

static int probe_kern_btf_func_global(int token_fd)
{
	static const char strs[] = "\0int\0x\0a";
	/* static void x(int a) {} */
	__u32 types[] = {
		/* int */
		BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
		/* FUNC_PROTO */                                /* [2] */
		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0),
		BTF_PARAM_ENC(7, 1),
		/* FUNC x BTF_FUNC_GLOBAL */                    /* [3] */
		BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 2),
	};

	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
					     strs, sizeof(strs), token_fd));
}

static int probe_kern_btf_datasec(int token_fd)
{
	static const char strs[] = "\0x\0.data";
	/* static int a; */
	__u32 types[] = {
		/* int */
		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
		/* VAR x */                                     /* [2] */
		BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1),
		BTF_VAR_STATIC,
		/* DATASEC val */                               /* [3] */
		BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
		BTF_VAR_SECINFO_ENC(2, 0, 4),
	};

	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
					     strs, sizeof(strs), token_fd));
}

static int probe_kern_btf_qmark_datasec(int token_fd)
{
	static const char strs[] = "\0x\0?.data";
	/* static int a; */
	__u32 types[] = {
		/* int */
		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
		/* VAR x */                                     /* [2] */
		BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1),
		BTF_VAR_STATIC,
		/* DATASEC ?.data */                            /* [3] */
		BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
		BTF_VAR_SECINFO_ENC(2, 0, 4),
	};

	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
					     strs, sizeof(strs), token_fd));
}

static int probe_kern_btf_float(int token_fd)
{
	static const char strs[] = "\0float";
	__u32 types[] = {
		/* float */
		BTF_TYPE_FLOAT_ENC(1, 4),
	};

	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
					     strs, sizeof(strs), token_fd));
}

static int probe_kern_btf_decl_tag(int token_fd)
{
	static const char strs[] = "\0tag";
	__u32 types[] = {
		/* int */
		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
		/* VAR x */                                     /* [2] */
		BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1),
		BTF_VAR_STATIC,
		/* attr */
		BTF_TYPE_DECL_TAG_ENC(1, 2, -1),
	};

	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
					     strs, sizeof(strs), token_fd));
}

static int probe_kern_btf_type_tag(int token_fd)
{
	static const char strs[] = "\0tag";
	__u32 types[] = {
		/* int */
		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),		/* [1] */
		/* attr */
		BTF_TYPE_TYPE_TAG_ENC(1, 1),				/* [2] */
		/* ptr */
		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2),	/* [3] */
	};

	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
					     strs, sizeof(strs), token_fd));
}

static int probe_kern_array_mmap(int token_fd)
{
	LIBBPF_OPTS(bpf_map_create_opts, opts,
		.map_flags = BPF_F_MMAPABLE | (token_fd ? BPF_F_TOKEN_FD : 0),
		.token_fd = token_fd,
	);
	int fd;

	fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_mmap", sizeof(int), sizeof(int), 1, &opts);
	return probe_fd(fd);
}

static int probe_kern_exp_attach_type(int token_fd)
{
	LIBBPF_OPTS(bpf_prog_load_opts, opts,
		.expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE,
		.token_fd = token_fd,
		.prog_flags = token_fd ? BPF_F_TOKEN_FD : 0,
	);
	struct bpf_insn insns[] = {
		BPF_MOV64_IMM(BPF_REG_0, 0),
		BPF_EXIT_INSN(),
	};
	int fd, insn_cnt = ARRAY_SIZE(insns);

	/* use any valid combination of program type and (optional)
	 * non-zero expected attach type (i.e., not a BPF_CGROUP_INET_INGRESS)
	 * to see if kernel supports expected_attach_type field for
	 * BPF_PROG_LOAD command
	 */
	fd = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, NULL, "GPL", insns, insn_cnt, &opts);
	return probe_fd(fd);
}

static int probe_kern_probe_read_kernel(int token_fd)
{
	LIBBPF_OPTS(bpf_prog_load_opts, opts,
		.token_fd = token_fd,
		.prog_flags = token_fd ? BPF_F_TOKEN_FD : 0,
	);
	struct bpf_insn insns[] = {
		BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),	/* r1 = r10 (fp) */
		BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),	/* r1 += -8 */
		BPF_MOV64_IMM(BPF_REG_2, 8),		/* r2 = 8 */
		BPF_MOV64_IMM(BPF_REG_3, 0),		/* r3 = 0 */
		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_probe_read_kernel),
		BPF_EXIT_INSN(),
	};
	int fd, insn_cnt = ARRAY_SIZE(insns);

	fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts);
	return probe_fd(fd);
}

static int probe_prog_bind_map(int token_fd)
{
	char *cp, errmsg[STRERR_BUFSIZE];
	struct bpf_insn insns[] = {
		BPF_MOV64_IMM(BPF_REG_0, 0),
		BPF_EXIT_INSN(),
	};
	LIBBPF_OPTS(bpf_map_create_opts, map_opts,
		.token_fd = token_fd,
		.map_flags = token_fd ? BPF_F_TOKEN_FD : 0,
	);
	LIBBPF_OPTS(bpf_prog_load_opts, prog_opts,
		.token_fd = token_fd,
		.prog_flags = token_fd ? BPF_F_TOKEN_FD : 0,
	);
	int ret, map, prog, insn_cnt = ARRAY_SIZE(insns);

	map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_det_bind", sizeof(int), 32, 1, &map_opts);
	if (map < 0) {
		ret = -errno;
		cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
		pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n",
			__func__, cp, -ret);
		return ret;
	}

	prog = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, &prog_opts);
	if (prog < 0) {
		close(map);
		return 0;
	}

	ret = bpf_prog_bind_map(prog, map, NULL);

	close(map);
	close(prog);

	return ret >= 0;
}

static int probe_module_btf(int token_fd)
{
	static const char strs[] = "\0int";
	__u32 types[] = {
		/* int */
		BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),
	};
	struct bpf_btf_info info;
	__u32 len = sizeof(info);
	char name[16];
	int fd, err;

	fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs), token_fd);
	if (fd < 0)
		return 0; /* BTF not supported at all */

	memset(&info, 0, sizeof(info));
	info.name = ptr_to_u64(name);
	info.name_len = sizeof(name);

	/* check that BPF_OBJ_GET_INFO_BY_FD supports specifying name pointer;
	 * kernel's module BTF support coincides with support for
	 * name/name_len fields in struct bpf_btf_info.
	 */
	err = bpf_btf_get_info_by_fd(fd, &info, &len);
	close(fd);
	return !err;
}

static int probe_perf_link(int token_fd)
{
	struct bpf_insn insns[] = {
		BPF_MOV64_IMM(BPF_REG_0, 0),
		BPF_EXIT_INSN(),
	};
	LIBBPF_OPTS(bpf_prog_load_opts, opts,
		.token_fd = token_fd,
		.prog_flags = token_fd ? BPF_F_TOKEN_FD : 0,
	);
	int prog_fd, link_fd, err;

	prog_fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL",
				insns, ARRAY_SIZE(insns), &opts);
	if (prog_fd < 0)
		return -errno;

	/* use invalid perf_event FD to get EBADF, if link is supported;
	 * otherwise EINVAL should be returned
	 */
	link_fd = bpf_link_create(prog_fd, -1, BPF_PERF_EVENT, NULL);
	err = -errno; /* close() can clobber errno */

	if (link_fd >= 0)
		close(link_fd);
	close(prog_fd);

	return link_fd < 0 && err == -EBADF;
}

static int probe_uprobe_multi_link(int token_fd)
{
	LIBBPF_OPTS(bpf_prog_load_opts, load_opts,
		.expected_attach_type = BPF_TRACE_UPROBE_MULTI,
		.token_fd = token_fd,
		.prog_flags = token_fd ? BPF_F_TOKEN_FD : 0,
	);
	LIBBPF_OPTS(bpf_link_create_opts, link_opts);
	struct bpf_insn insns[] = {
		BPF_MOV64_IMM(BPF_REG_0, 0),
		BPF_EXIT_INSN(),
	};
	int prog_fd, link_fd, err;
	unsigned long offset = 0;

	prog_fd = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL",
				insns, ARRAY_SIZE(insns), &load_opts);
	if (prog_fd < 0)
		return -errno;

	/* Creating uprobe in '/' binary should fail with -EBADF. */
	link_opts.uprobe_multi.path = "/";
	link_opts.uprobe_multi.offsets = &offset;
	link_opts.uprobe_multi.cnt = 1;

	link_fd = bpf_link_create(prog_fd, -1, BPF_TRACE_UPROBE_MULTI, &link_opts);
	err = -errno; /* close() can clobber errno */

	if (link_fd >= 0)
		close(link_fd);
	close(prog_fd);

	return link_fd < 0 && err == -EBADF;
}

static int probe_kern_bpf_cookie(int token_fd)
{
	struct bpf_insn insns[] = {
		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_attach_cookie),
		BPF_EXIT_INSN(),
	};
	LIBBPF_OPTS(bpf_prog_load_opts, opts,
		.token_fd = token_fd,
		.prog_flags = token_fd ? BPF_F_TOKEN_FD : 0,
	);
	int ret, insn_cnt = ARRAY_SIZE(insns);

	ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts);
	return probe_fd(ret);
}

static int probe_kern_btf_enum64(int token_fd)
{
	static const char strs[] = "\0enum64";
	__u32 types[] = {
		BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 0), 8),
	};

	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
					     strs, sizeof(strs), token_fd));
}

static int probe_kern_arg_ctx_tag(int token_fd)
{
	static const char strs[] = "\0a\0b\0arg:ctx\0";
	const __u32 types[] = {
		/* [1] INT */
		BTF_TYPE_INT_ENC(1 /* "a" */, BTF_INT_SIGNED, 0, 32, 4),
		/* [2] PTR -> VOID */
		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 0),
		/* [3] FUNC_PROTO `int(void *a)` */
		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 1),
		BTF_PARAM_ENC(1 /* "a" */, 2),
		/* [4] FUNC 'a' -> FUNC_PROTO (main prog) */
		BTF_TYPE_ENC(1 /* "a" */, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 3),
		/* [5] FUNC_PROTO `int(void *b __arg_ctx)` */
		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 1),
		BTF_PARAM_ENC(3 /* "b" */, 2),
		/* [6] FUNC 'b' -> FUNC_PROTO (subprog) */
		BTF_TYPE_ENC(3 /* "b" */, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 5),
		/* [7] DECL_TAG 'arg:ctx' -> func 'b' arg 'b' */
		BTF_TYPE_DECL_TAG_ENC(5 /* "arg:ctx" */, 6, 0),
	};
	const struct bpf_insn insns[] = {
		/* main prog */
		BPF_CALL_REL(+1),
		BPF_EXIT_INSN(),
		/* global subprog */
		BPF_EMIT_CALL(BPF_FUNC_get_func_ip), /* needs PTR_TO_CTX */
		BPF_EXIT_INSN(),
	};
	const struct bpf_func_info_min func_infos[] = {
		{ 0, 4 }, /* main prog -> FUNC 'a' */
		{ 2, 6 }, /* subprog -> FUNC 'b' */
	};
	LIBBPF_OPTS(bpf_prog_load_opts, opts,
		.token_fd = token_fd,
		.prog_flags = token_fd ? BPF_F_TOKEN_FD : 0,
	);
	int prog_fd, btf_fd, insn_cnt = ARRAY_SIZE(insns);

	btf_fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs), token_fd);
	if (btf_fd < 0)
		return 0;

	opts.prog_btf_fd = btf_fd;
	opts.func_info = &func_infos;
	opts.func_info_cnt = ARRAY_SIZE(func_infos);
	opts.func_info_rec_size = sizeof(func_infos[0]);

	prog_fd = bpf_prog_load(BPF_PROG_TYPE_KPROBE, "det_arg_ctx",
				"GPL", insns, insn_cnt, &opts);
	close(btf_fd);

	return probe_fd(prog_fd);
}

typedef int (*feature_probe_fn)(int /* token_fd */);

static struct kern_feature_cache feature_cache;

static struct kern_feature_desc {
	const char *desc;
	feature_probe_fn probe;
} feature_probes[__FEAT_CNT] = {
	[FEAT_PROG_NAME] = {
		"BPF program name", probe_kern_prog_name,
	},
	[FEAT_GLOBAL_DATA] = {
		"global variables", probe_kern_global_data,
	},
	[FEAT_BTF] = {
		"minimal BTF", probe_kern_btf,
	},
	[FEAT_BTF_FUNC] = {
		"BTF functions", probe_kern_btf_func,
	},
	[FEAT_BTF_GLOBAL_FUNC] = {
		"BTF global function", probe_kern_btf_func_global,
	},
	[FEAT_BTF_DATASEC] = {
		"BTF data section and variable", probe_kern_btf_datasec,
	},
	[FEAT_ARRAY_MMAP] = {
		"ARRAY map mmap()", probe_kern_array_mmap,
	},
	[FEAT_EXP_ATTACH_TYPE] = {
		"BPF_PROG_LOAD expected_attach_type attribute",
		probe_kern_exp_attach_type,
	},
	[FEAT_PROBE_READ_KERN] = {
		"bpf_probe_read_kernel() helper", probe_kern_probe_read_kernel,
	},
	[FEAT_PROG_BIND_MAP] = {
		"BPF_PROG_BIND_MAP support", probe_prog_bind_map,
	},
	[FEAT_MODULE_BTF] = {
		"module BTF support", probe_module_btf,
	},
	[FEAT_BTF_FLOAT] = {
		"BTF_KIND_FLOAT support", probe_kern_btf_float,
	},
	[FEAT_PERF_LINK] = {
		"BPF perf link support", probe_perf_link,
	},
	[FEAT_BTF_DECL_TAG] = {
		"BTF_KIND_DECL_TAG support", probe_kern_btf_decl_tag,
	},
	[FEAT_BTF_TYPE_TAG] = {
		"BTF_KIND_TYPE_TAG support", probe_kern_btf_type_tag,
	},
	[FEAT_MEMCG_ACCOUNT] = {
		"memcg-based memory accounting", probe_memcg_account,
	},
	[FEAT_BPF_COOKIE] = {
		"BPF cookie support", probe_kern_bpf_cookie,
	},
	[FEAT_BTF_ENUM64] = {
		"BTF_KIND_ENUM64 support", probe_kern_btf_enum64,
	},
	[FEAT_SYSCALL_WRAPPER] = {
		"Kernel using syscall wrapper", probe_kern_syscall_wrapper,
	},
	[FEAT_UPROBE_MULTI_LINK] = {
		"BPF multi-uprobe link support", probe_uprobe_multi_link,
	},
	[FEAT_ARG_CTX_TAG] = {
		"kernel-side __arg_ctx tag", probe_kern_arg_ctx_tag,
	},
	[FEAT_BTF_QMARK_DATASEC] = {
		"BTF DATASEC names starting from '?'", probe_kern_btf_qmark_datasec,
	},
};

bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_id)
{
	struct kern_feature_desc *feat = &feature_probes[feat_id];
	int ret;

	/* assume global feature cache, unless custom one is provided */
	if (!cache)
		cache = &feature_cache;

	if (READ_ONCE(cache->res[feat_id]) == FEAT_UNKNOWN) {
		ret = feat->probe(cache->token_fd);
		if (ret > 0) {
			WRITE_ONCE(cache->res[feat_id], FEAT_SUPPORTED);
		} else if (ret == 0) {
			WRITE_ONCE(cache->res[feat_id], FEAT_MISSING);
		} else {
			pr_warn("Detection of kernel %s support failed: %d\n", feat->desc, ret);
			WRITE_ONCE(cache->res[feat_id], FEAT_MISSING);
		}
	}

	return READ_ONCE(cache->res[feat_id]) == FEAT_SUPPORTED;
}