Contributors: 4
Author Tokens Token Proportion Commits Commit Proportion
Christina Jacob 3940 99.72% 1 25.00%
Eric Leblond 5 0.13% 1 25.00%
Dan Carpenter 5 0.13% 1 25.00%
Jakub Kiciński 1 0.03% 1 25.00%
Total 3951 4


/* Copyright (C) 2017 Cavium, Inc.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms of version 2 of the GNU General Public License
 * as published by the Free Software Foundation.
 */
#include <linux/bpf.h>
#include <linux/netlink.h>
#include <linux/rtnetlink.h>
#include <assert.h>
#include <errno.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/socket.h>
#include <unistd.h>
#include "bpf_load.h"
#include <bpf/bpf.h>
#include <arpa/inet.h>
#include <fcntl.h>
#include <poll.h>
#include <net/if.h>
#include <netdb.h>
#include <sys/ioctl.h>
#include <sys/syscall.h>
#include "bpf_util.h"

int sock, sock_arp, flags = 0;
static int total_ifindex;
int *ifindex_list;
char buf[8192];

static int get_route_table(int rtm_family);
static void int_exit(int sig)
{
	int i = 0;

	for (i = 0; i < total_ifindex; i++)
		bpf_set_link_xdp_fd(ifindex_list[i], -1, flags);
	exit(0);
}

static void close_and_exit(int sig)
{
	int i = 0;

	close(sock);
	close(sock_arp);

	for (i = 0; i < total_ifindex; i++)
		bpf_set_link_xdp_fd(ifindex_list[i], -1, flags);
	exit(0);
}

/* Get the mac address of the interface given interface name */
static __be64 getmac(char *iface)
{
	struct ifreq ifr;
	__be64 mac = 0;
	int fd, i;

	fd = socket(AF_INET, SOCK_DGRAM, 0);
	ifr.ifr_addr.sa_family = AF_INET;
	strncpy(ifr.ifr_name, iface, IFNAMSIZ - 1);
	if (ioctl(fd, SIOCGIFHWADDR, &ifr) < 0) {
		printf("ioctl failed leaving....\n");
		return -1;
	}
	for (i = 0; i < 6 ; i++)
		*((__u8 *)&mac + i) = (__u8)ifr.ifr_hwaddr.sa_data[i];
	close(fd);
	return mac;
}

static int recv_msg(struct sockaddr_nl sock_addr, int sock)
{
	struct nlmsghdr *nh;
	int len, nll = 0;
	char *buf_ptr;

	buf_ptr = buf;
	while (1) {
		len = recv(sock, buf_ptr, sizeof(buf) - nll, 0);
		if (len < 0)
			return len;

		nh = (struct nlmsghdr *)buf_ptr;

		if (nh->nlmsg_type == NLMSG_DONE)
			break;
		buf_ptr += len;
		nll += len;
		if ((sock_addr.nl_groups & RTMGRP_NEIGH) == RTMGRP_NEIGH)
			break;

		if ((sock_addr.nl_groups & RTMGRP_IPV4_ROUTE) == RTMGRP_IPV4_ROUTE)
			break;
	}
	return nll;
}

/* Function to parse the route entry returned by netlink
 * Updates the route entry related map entries
 */
static void read_route(struct nlmsghdr *nh, int nll)
{
	char dsts[24], gws[24], ifs[16], dsts_len[24], metrics[24];
	struct bpf_lpm_trie_key *prefix_key;
	struct rtattr *rt_attr;
	struct rtmsg *rt_msg;
	int rtm_family;
	int rtl;
	int i;
	struct route_table {
		int  dst_len, iface, metric;
		char *iface_name;
		__be32 dst, gw;
		__be64 mac;
	} route;
	struct arp_table {
		__be64 mac;
		__be32 dst;
	};

	struct direct_map {
		struct arp_table arp;
		int ifindex;
		__be64 mac;
	} direct_entry;

	if (nh->nlmsg_type == RTM_DELROUTE)
		printf("DELETING Route entry\n");
	else if (nh->nlmsg_type == RTM_GETROUTE)
		printf("READING Route entry\n");
	else if (nh->nlmsg_type == RTM_NEWROUTE)
		printf("NEW Route entry\n");
	else
		printf("%d\n", nh->nlmsg_type);

	memset(&route, 0, sizeof(route));
	printf("Destination\t\tGateway\t\tGenmask\t\tMetric\t\tIface\n");
	for (; NLMSG_OK(nh, nll); nh = NLMSG_NEXT(nh, nll)) {
		rt_msg = (struct rtmsg *)NLMSG_DATA(nh);
		rtm_family = rt_msg->rtm_family;
		if (rtm_family == AF_INET)
			if (rt_msg->rtm_table != RT_TABLE_MAIN)
				continue;
		rt_attr = (struct rtattr *)RTM_RTA(rt_msg);
		rtl = RTM_PAYLOAD(nh);

		for (; RTA_OK(rt_attr, rtl); rt_attr = RTA_NEXT(rt_attr, rtl)) {
			switch (rt_attr->rta_type) {
			case NDA_DST:
				sprintf(dsts, "%u",
					(*((__be32 *)RTA_DATA(rt_attr))));
				break;
			case RTA_GATEWAY:
				sprintf(gws, "%u",
					*((__be32 *)RTA_DATA(rt_attr)));
				break;
			case RTA_OIF:
				sprintf(ifs, "%u",
					*((int *)RTA_DATA(rt_attr)));
				break;
			case RTA_METRICS:
				sprintf(metrics, "%u",
					*((int *)RTA_DATA(rt_attr)));
			default:
				break;
			}
		}
		sprintf(dsts_len, "%d", rt_msg->rtm_dst_len);
		route.dst = atoi(dsts);
		route.dst_len = atoi(dsts_len);
		route.gw = atoi(gws);
		route.iface = atoi(ifs);
		route.metric = atoi(metrics);
		route.iface_name = alloca(sizeof(char *) * IFNAMSIZ);
		route.iface_name = if_indextoname(route.iface, route.iface_name);
		route.mac = getmac(route.iface_name);
		if (route.mac == -1) {
			int i = 0;

			for (i = 0; i < total_ifindex; i++)
				bpf_set_link_xdp_fd(ifindex_list[i], -1, flags);
			exit(0);
		}
		assert(bpf_map_update_elem(map_fd[4], &route.iface, &route.iface, 0) == 0);
		if (rtm_family == AF_INET) {
			struct trie_value {
				__u8 prefix[4];
				__be64 value;
				int ifindex;
				int metric;
				__be32 gw;
			} *prefix_value;

			prefix_key = alloca(sizeof(*prefix_key) + 3);
			prefix_value = alloca(sizeof(*prefix_value));

			prefix_key->prefixlen = 32;
			prefix_key->prefixlen = route.dst_len;
			direct_entry.mac = route.mac & 0xffffffffffff;
			direct_entry.ifindex = route.iface;
			direct_entry.arp.mac = 0;
			direct_entry.arp.dst = 0;
			if (route.dst_len == 32) {
				if (nh->nlmsg_type == RTM_DELROUTE) {
					assert(bpf_map_delete_elem(map_fd[3], &route.dst) == 0);
				} else {
					if (bpf_map_lookup_elem(map_fd[2], &route.dst, &direct_entry.arp.mac) == 0)
						direct_entry.arp.dst = route.dst;
					assert(bpf_map_update_elem(map_fd[3], &route.dst, &direct_entry, 0) == 0);
				}
			}
			for (i = 0; i < 4; i++)
				prefix_key->data[i] = (route.dst >> i * 8) & 0xff;

			printf("%3d.%d.%d.%d\t\t%3x\t\t%d\t\t%d\t\t%s\n",
			       (int)prefix_key->data[0],
			       (int)prefix_key->data[1],
			       (int)prefix_key->data[2],
			       (int)prefix_key->data[3],
			       route.gw, route.dst_len,
			       route.metric,
			       route.iface_name);
			if (bpf_map_lookup_elem(map_fd[0], prefix_key,
						prefix_value) < 0) {
				for (i = 0; i < 4; i++)
					prefix_value->prefix[i] = prefix_key->data[i];
				prefix_value->value = route.mac & 0xffffffffffff;
				prefix_value->ifindex = route.iface;
				prefix_value->gw = route.gw;
				prefix_value->metric = route.metric;

				assert(bpf_map_update_elem(map_fd[0],
							   prefix_key,
							   prefix_value, 0
							   ) == 0);
			} else {
				if (nh->nlmsg_type == RTM_DELROUTE) {
					printf("deleting entry\n");
					printf("prefix key=%d.%d.%d.%d/%d",
					       prefix_key->data[0],
					       prefix_key->data[1],
					       prefix_key->data[2],
					       prefix_key->data[3],
					       prefix_key->prefixlen);
					assert(bpf_map_delete_elem(map_fd[0],
								   prefix_key
								   ) == 0);
					/* Rereading the route table to check if
					 * there is an entry with the same
					 * prefix but a different metric as the
					 * deleted enty.
					 */
					get_route_table(AF_INET);
				} else if (prefix_key->data[0] ==
					   prefix_value->prefix[0] &&
					   prefix_key->data[1] ==
					   prefix_value->prefix[1] &&
					   prefix_key->data[2] ==
					   prefix_value->prefix[2] &&
					   prefix_key->data[3] ==
					   prefix_value->prefix[3] &&
					   route.metric >= prefix_value->metric) {
					continue;
				} else {
					for (i = 0; i < 4; i++)
						prefix_value->prefix[i] =
							prefix_key->data[i];
					prefix_value->value =
						route.mac & 0xffffffffffff;
					prefix_value->ifindex = route.iface;
					prefix_value->gw = route.gw;
					prefix_value->metric = route.metric;
					assert(bpf_map_update_elem(
								   map_fd[0],
								   prefix_key,
								   prefix_value,
								   0) == 0);
				}
			}
		}
		memset(&route, 0, sizeof(route));
		memset(dsts, 0, sizeof(dsts));
		memset(dsts_len, 0, sizeof(dsts_len));
		memset(gws, 0, sizeof(gws));
		memset(ifs, 0, sizeof(ifs));
		memset(&route, 0, sizeof(route));
	}
}

/* Function to read the existing route table  when the process is launched*/
static int get_route_table(int rtm_family)
{
	struct sockaddr_nl sa;
	struct nlmsghdr *nh;
	int sock, seq = 0;
	struct msghdr msg;
	struct iovec iov;
	int ret = 0;
	int nll;

	struct {
		struct nlmsghdr nl;
		struct rtmsg rt;
		char buf[8192];
	} req;

	sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
	if (sock < 0) {
		printf("open netlink socket: %s\n", strerror(errno));
		return -1;
	}
	memset(&sa, 0, sizeof(sa));
	sa.nl_family = AF_NETLINK;
	if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
		printf("bind to netlink: %s\n", strerror(errno));
		ret = -1;
		goto cleanup;
	}
	memset(&req, 0, sizeof(req));
	req.nl.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
	req.nl.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
	req.nl.nlmsg_type = RTM_GETROUTE;

	req.rt.rtm_family = rtm_family;
	req.rt.rtm_table = RT_TABLE_MAIN;
	req.nl.nlmsg_pid = 0;
	req.nl.nlmsg_seq = ++seq;
	memset(&msg, 0, sizeof(msg));
	iov.iov_base = (void *)&req.nl;
	iov.iov_len = req.nl.nlmsg_len;
	msg.msg_iov = &iov;
	msg.msg_iovlen = 1;
	ret = sendmsg(sock, &msg, 0);
	if (ret < 0) {
		printf("send to netlink: %s\n", strerror(errno));
		ret = -1;
		goto cleanup;
	}
	memset(buf, 0, sizeof(buf));
	nll = recv_msg(sa, sock);
	if (nll < 0) {
		printf("recv from netlink: %s\n", strerror(nll));
		ret = -1;
		goto cleanup;
	}
	nh = (struct nlmsghdr *)buf;
	read_route(nh, nll);
cleanup:
	close(sock);
	return ret;
}

/* Function to parse the arp entry returned by netlink
 * Updates the arp entry related map entries
 */
static void read_arp(struct nlmsghdr *nh, int nll)
{
	struct rtattr *rt_attr;
	char dsts[24], mac[24];
	struct ndmsg *rt_msg;
	int rtl, ndm_family;

	struct arp_table {
		__be64 mac;
		__be32 dst;
	} arp_entry;
	struct direct_map {
		struct arp_table arp;
		int ifindex;
		__be64 mac;
	} direct_entry;

	if (nh->nlmsg_type == RTM_GETNEIGH)
		printf("READING arp entry\n");
	printf("Address\tHwAddress\n");
	for (; NLMSG_OK(nh, nll); nh = NLMSG_NEXT(nh, nll)) {
		rt_msg = (struct ndmsg *)NLMSG_DATA(nh);
		rt_attr = (struct rtattr *)RTM_RTA(rt_msg);
		ndm_family = rt_msg->ndm_family;
		rtl = RTM_PAYLOAD(nh);
		for (; RTA_OK(rt_attr, rtl); rt_attr = RTA_NEXT(rt_attr, rtl)) {
			switch (rt_attr->rta_type) {
			case NDA_DST:
				sprintf(dsts, "%u",
					*((__be32 *)RTA_DATA(rt_attr)));
				break;
			case NDA_LLADDR:
				sprintf(mac, "%lld",
					*((__be64 *)RTA_DATA(rt_attr)));
				break;
			default:
				break;
			}
		}
		arp_entry.dst = atoi(dsts);
		arp_entry.mac = atol(mac);
		printf("%x\t\t%llx\n", arp_entry.dst, arp_entry.mac);
		if (ndm_family == AF_INET) {
			if (bpf_map_lookup_elem(map_fd[3], &arp_entry.dst,
						&direct_entry) == 0) {
				if (nh->nlmsg_type == RTM_DELNEIGH) {
					direct_entry.arp.dst = 0;
					direct_entry.arp.mac = 0;
				} else if (nh->nlmsg_type == RTM_NEWNEIGH) {
					direct_entry.arp.dst = arp_entry.dst;
					direct_entry.arp.mac = arp_entry.mac;
				}
				assert(bpf_map_update_elem(map_fd[3],
							   &arp_entry.dst,
							   &direct_entry, 0
							   ) == 0);
				memset(&direct_entry, 0, sizeof(direct_entry));
			}
			if (nh->nlmsg_type == RTM_DELNEIGH) {
				assert(bpf_map_delete_elem(map_fd[2], &arp_entry.dst) == 0);
			} else if (nh->nlmsg_type == RTM_NEWNEIGH) {
				assert(bpf_map_update_elem(map_fd[2],
							   &arp_entry.dst,
							   &arp_entry.mac, 0
							   ) == 0);
			}
		}
		memset(&arp_entry, 0, sizeof(arp_entry));
		memset(dsts, 0, sizeof(dsts));
	}
}

/* Function to read the existing arp table  when the process is launched*/
static int get_arp_table(int rtm_family)
{
	struct sockaddr_nl sa;
	struct nlmsghdr *nh;
	int sock, seq = 0;
	struct msghdr msg;
	struct iovec iov;
	int ret = 0;
	int nll;
	struct {
		struct nlmsghdr nl;
		struct ndmsg rt;
		char buf[8192];
	} req;

	sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
	if (sock < 0) {
		printf("open netlink socket: %s\n", strerror(errno));
		return -1;
	}
	memset(&sa, 0, sizeof(sa));
	sa.nl_family = AF_NETLINK;
	if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
		printf("bind to netlink: %s\n", strerror(errno));
		ret = -1;
		goto cleanup;
	}
	memset(&req, 0, sizeof(req));
	req.nl.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
	req.nl.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
	req.nl.nlmsg_type = RTM_GETNEIGH;
	req.rt.ndm_state = NUD_REACHABLE;
	req.rt.ndm_family = rtm_family;
	req.nl.nlmsg_pid = 0;
	req.nl.nlmsg_seq = ++seq;
	memset(&msg, 0, sizeof(msg));
	iov.iov_base = (void *)&req.nl;
	iov.iov_len = req.nl.nlmsg_len;
	msg.msg_iov = &iov;
	msg.msg_iovlen = 1;
	ret = sendmsg(sock, &msg, 0);
	if (ret < 0) {
		printf("send to netlink: %s\n", strerror(errno));
		ret = -1;
		goto cleanup;
	}
	memset(buf, 0, sizeof(buf));
	nll = recv_msg(sa, sock);
	if (nll < 0) {
		printf("recv from netlink: %s\n", strerror(nll));
		ret = -1;
		goto cleanup;
	}
	nh = (struct nlmsghdr *)buf;
	read_arp(nh, nll);
cleanup:
	close(sock);
	return ret;
}

/* Function to keep track and update changes in route and arp table
 * Give regular statistics of packets forwarded
 */
static int monitor_route(void)
{
	unsigned int nr_cpus = bpf_num_possible_cpus();
	const unsigned int nr_keys = 256;
	struct pollfd fds_route, fds_arp;
	__u64 prev[nr_keys][nr_cpus];
	struct sockaddr_nl la, lr;
	__u64 values[nr_cpus];
	struct nlmsghdr *nh;
	int nll, ret = 0;
	int interval = 5;
	__u32 key;
	int i;

	sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
	if (sock < 0) {
		printf("open netlink socket: %s\n", strerror(errno));
		return -1;
	}

	fcntl(sock, F_SETFL, O_NONBLOCK);
	memset(&lr, 0, sizeof(lr));
	lr.nl_family = AF_NETLINK;
	lr.nl_groups = RTMGRP_IPV6_ROUTE | RTMGRP_IPV4_ROUTE | RTMGRP_NOTIFY;
	if (bind(sock, (struct sockaddr *)&lr, sizeof(lr)) < 0) {
		printf("bind to netlink: %s\n", strerror(errno));
		ret = -1;
		goto cleanup;
	}
	fds_route.fd = sock;
	fds_route.events = POLL_IN;

	sock_arp = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
	if (sock_arp < 0) {
		printf("open netlink socket: %s\n", strerror(errno));
		return -1;
	}

	fcntl(sock_arp, F_SETFL, O_NONBLOCK);
	memset(&la, 0, sizeof(la));
	la.nl_family = AF_NETLINK;
	la.nl_groups = RTMGRP_NEIGH | RTMGRP_NOTIFY;
	if (bind(sock_arp, (struct sockaddr *)&la, sizeof(la)) < 0) {
		printf("bind to netlink: %s\n", strerror(errno));
		ret = -1;
		goto cleanup;
	}
	fds_arp.fd = sock_arp;
	fds_arp.events = POLL_IN;

	memset(prev, 0, sizeof(prev));
	do {
		signal(SIGINT, close_and_exit);
		signal(SIGTERM, close_and_exit);

		sleep(interval);
		for (key = 0; key < nr_keys; key++) {
			__u64 sum = 0;

			assert(bpf_map_lookup_elem(map_fd[1], &key, values) == 0);
			for (i = 0; i < nr_cpus; i++)
				sum += (values[i] - prev[key][i]);
			if (sum)
				printf("proto %u: %10llu pkt/s\n",
				       key, sum / interval);
			memcpy(prev[key], values, sizeof(values));
		}

		memset(buf, 0, sizeof(buf));
		if (poll(&fds_route, 1, 3) == POLL_IN) {
			nll = recv_msg(lr, sock);
			if (nll < 0) {
				printf("recv from netlink: %s\n", strerror(nll));
				ret = -1;
				goto cleanup;
			}

			nh = (struct nlmsghdr *)buf;
			printf("Routing table updated.\n");
			read_route(nh, nll);
		}
		memset(buf, 0, sizeof(buf));
		if (poll(&fds_arp, 1, 3) == POLL_IN) {
			nll = recv_msg(la, sock_arp);
			if (nll < 0) {
				printf("recv from netlink: %s\n", strerror(nll));
				ret = -1;
				goto cleanup;
			}

			nh = (struct nlmsghdr *)buf;
			read_arp(nh, nll);
		}

	} while (1);
cleanup:
	close(sock);
	return ret;
}

int main(int ac, char **argv)
{
	char filename[256];
	char **ifname_list;
	int i = 1;

	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
	if (ac < 2) {
		printf("usage: %s [-S] Interface name list\n", argv[0]);
		return 1;
	}
	if (!strcmp(argv[1], "-S")) {
		flags = XDP_FLAGS_SKB_MODE;
		total_ifindex = ac - 2;
		ifname_list = (argv + 2);
	} else {
		flags = 0;
		total_ifindex = ac - 1;
		ifname_list = (argv + 1);
	}
	if (load_bpf_file(filename)) {
		printf("%s", bpf_log_buf);
		return 1;
	}
	printf("\n**************loading bpf file*********************\n\n\n");
	if (!prog_fd[0]) {
		printf("load_bpf_file: %s\n", strerror(errno));
		return 1;
	}
	ifindex_list = (int *)malloc(total_ifindex * sizeof(int *));
	for (i = 0; i < total_ifindex; i++) {
		ifindex_list[i] = if_nametoindex(ifname_list[i]);
		if (!ifindex_list[i]) {
			printf("Couldn't translate interface name: %s",
			       strerror(errno));
			return 1;
		}
	}
	for (i = 0; i < total_ifindex; i++) {
		if (bpf_set_link_xdp_fd(ifindex_list[i], prog_fd[0], flags) < 0) {
			printf("link set xdp fd failed\n");
			int recovery_index = i;

			for (i = 0; i < recovery_index; i++)
				bpf_set_link_xdp_fd(ifindex_list[i], -1, flags);

			return 1;
		}
		printf("Attached to %d\n", ifindex_list[i]);
	}
	signal(SIGINT, int_exit);
	signal(SIGTERM, int_exit);

	printf("*******************ROUTE TABLE*************************\n\n\n");
	get_route_table(AF_INET);
	printf("*******************ARP TABLE***************************\n\n\n");
	get_arp_table(AF_INET);
	if (monitor_route() < 0) {
		printf("Error in receiving route update");
		return 1;
	}

	return 0;
}