Contributors: 6
Author Tokens Token Proportion Commits Commit Proportion
Stanislav Fomichev 769 92.43% 7 50.00%
YiFei Zhu 28 3.37% 2 14.29%
Andrii Nakryiko 23 2.76% 2 14.29%
Yauheni Kaliuta 9 1.08% 1 7.14%
Felix Maurer 2 0.24% 1 7.14%
Toke Höiland-Jörgensen 1 0.12% 1 7.14%
Total 832 14


// SPDX-License-Identifier: GPL-2.0
#include <string.h>
#include <linux/tcp.h>
#include <linux/bpf.h>
#include <netinet/in.h>
#include <bpf/bpf_helpers.h>

char _license[] SEC("license") = "GPL";

int page_size = 0; /* userspace should set it */

#ifndef SOL_TCP
#define SOL_TCP IPPROTO_TCP
#endif

#define SOL_CUSTOM			0xdeadbeef

struct sockopt_sk {
	__u8 val;
};

struct {
	__uint(type, BPF_MAP_TYPE_SK_STORAGE);
	__uint(map_flags, BPF_F_NO_PREALLOC);
	__type(key, int);
	__type(value, struct sockopt_sk);
} socket_storage_map SEC(".maps");

SEC("cgroup/getsockopt")
int _getsockopt(struct bpf_sockopt *ctx)
{
	__u8 *optval_end = ctx->optval_end;
	__u8 *optval = ctx->optval;
	struct sockopt_sk *storage;
	struct bpf_sock *sk;

	/* Bypass AF_NETLINK. */
	sk = ctx->sk;
	if (sk && sk->family == AF_NETLINK)
		goto out;

	/* Make sure bpf_get_netns_cookie is callable.
	 */
	if (bpf_get_netns_cookie(NULL) == 0)
		return 0;

	if (bpf_get_netns_cookie(ctx) == 0)
		return 0;

	if (ctx->level == SOL_IP && ctx->optname == IP_TOS) {
		/* Not interested in SOL_IP:IP_TOS;
		 * let next BPF program in the cgroup chain or kernel
		 * handle it.
		 */
		goto out;
	}

	if (ctx->level == SOL_SOCKET && ctx->optname == SO_SNDBUF) {
		/* Not interested in SOL_SOCKET:SO_SNDBUF;
		 * let next BPF program in the cgroup chain or kernel
		 * handle it.
		 */
		goto out;
	}

	if (ctx->level == SOL_TCP && ctx->optname == TCP_CONGESTION) {
		/* Not interested in SOL_TCP:TCP_CONGESTION;
		 * let next BPF program in the cgroup chain or kernel
		 * handle it.
		 */
		goto out;
	}

	if (ctx->level == SOL_TCP && ctx->optname == TCP_ZEROCOPY_RECEIVE) {
		/* Verify that TCP_ZEROCOPY_RECEIVE triggers.
		 * It has a custom implementation for performance
		 * reasons.
		 */

		/* Check that optval contains address (__u64) */
		if (optval + sizeof(__u64) > optval_end)
			return 0; /* bounds check */

		if (((struct tcp_zerocopy_receive *)optval)->address != 0)
			return 0; /* unexpected data */

		goto out;
	}

	if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) {
		if (optval + 1 > optval_end)
			return 0; /* bounds check */

		ctx->retval = 0; /* Reset system call return value to zero */

		/* Always export 0x55 */
		optval[0] = 0x55;
		ctx->optlen = 1;

		/* Userspace buffer is PAGE_SIZE * 2, but BPF
		 * program can only see the first PAGE_SIZE
		 * bytes of data.
		 */
		if (optval_end - optval != page_size)
			return 0; /* unexpected data size */

		return 1;
	}

	if (ctx->level != SOL_CUSTOM)
		return 0; /* deny everything except custom level */

	if (optval + 1 > optval_end)
		return 0; /* bounds check */

	storage = bpf_sk_storage_get(&socket_storage_map, ctx->sk, 0,
				     BPF_SK_STORAGE_GET_F_CREATE);
	if (!storage)
		return 0; /* couldn't get sk storage */

	if (!ctx->retval)
		return 0; /* kernel should not have handled
			   * SOL_CUSTOM, something is wrong!
			   */
	ctx->retval = 0; /* Reset system call return value to zero */

	optval[0] = storage->val;
	ctx->optlen = 1;

	return 1;

out:
	/* optval larger than PAGE_SIZE use kernel's buffer. */
	if (ctx->optlen > page_size)
		ctx->optlen = 0;
	return 1;
}

SEC("cgroup/setsockopt")
int _setsockopt(struct bpf_sockopt *ctx)
{
	__u8 *optval_end = ctx->optval_end;
	__u8 *optval = ctx->optval;
	struct sockopt_sk *storage;
	struct bpf_sock *sk;

	/* Bypass AF_NETLINK. */
	sk = ctx->sk;
	if (sk && sk->family == AF_NETLINK)
		goto out;

	/* Make sure bpf_get_netns_cookie is callable.
	 */
	if (bpf_get_netns_cookie(NULL) == 0)
		return 0;

	if (bpf_get_netns_cookie(ctx) == 0)
		return 0;

	if (ctx->level == SOL_IP && ctx->optname == IP_TOS) {
		/* Not interested in SOL_IP:IP_TOS;
		 * let next BPF program in the cgroup chain or kernel
		 * handle it.
		 */
		ctx->optlen = 0; /* bypass optval>PAGE_SIZE */
		return 1;
	}

	if (ctx->level == SOL_SOCKET && ctx->optname == SO_SNDBUF) {
		/* Overwrite SO_SNDBUF value */

		if (optval + sizeof(__u32) > optval_end)
			return 0; /* bounds check */

		*(__u32 *)optval = 0x55AA;
		ctx->optlen = 4;

		return 1;
	}

	if (ctx->level == SOL_TCP && ctx->optname == TCP_CONGESTION) {
		/* Always use cubic */

		if (optval + 5 > optval_end)
			return 0; /* bounds check */

		memcpy(optval, "cubic", 5);
		ctx->optlen = 5;

		return 1;
	}

	if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) {
		/* Original optlen is larger than PAGE_SIZE. */
		if (ctx->optlen != page_size * 2)
			return 0; /* unexpected data size */

		if (optval + 1 > optval_end)
			return 0; /* bounds check */

		/* Make sure we can trim the buffer. */
		optval[0] = 0;
		ctx->optlen = 1;

		/* Usepace buffer is PAGE_SIZE * 2, but BPF
		 * program can only see the first PAGE_SIZE
		 * bytes of data.
		 */
		if (optval_end - optval != page_size)
			return 0; /* unexpected data size */

		return 1;
	}

	if (ctx->level != SOL_CUSTOM)
		return 0; /* deny everything except custom level */

	if (optval + 1 > optval_end)
		return 0; /* bounds check */

	storage = bpf_sk_storage_get(&socket_storage_map, ctx->sk, 0,
				     BPF_SK_STORAGE_GET_F_CREATE);
	if (!storage)
		return 0; /* couldn't get sk storage */

	storage->val = optval[0];
	ctx->optlen = -1; /* BPF has consumed this option, don't call kernel
			   * setsockopt handler.
			   */

	return 1;

out:
	/* optval larger than PAGE_SIZE use kernel's buffer. */
	if (ctx->optlen > page_size)
		ctx->optlen = 0;
	return 1;
}