cregit-Linux how code gets into the kernel

Release 4.15 kernel/bpf/core.c

Directory: kernel/bpf
/*
 * Linux Socket Filter - Kernel level socket filtering
 *
 * Based on the design of the Berkeley Packet Filter. The new
 * internal format has been designed by PLUMgrid:
 *
 *      Copyright (c) 2011 - 2014 PLUMgrid, http://plumgrid.com
 *
 * Authors:
 *
 *      Jay Schulist <jschlst@samba.org>
 *      Alexei Starovoitov <ast@plumgrid.com>
 *      Daniel Borkmann <dborkman@redhat.com>
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version
 * 2 of the License, or (at your option) any later version.
 *
 * Andi Kleen - Fix a few bad bugs and races.
 * Kris Katterjohn - Added many additional checks in bpf_check_classic()
 */

#include <linux/filter.h>
#include <linux/skbuff.h>
#include <linux/vmalloc.h>
#include <linux/random.h>
#include <linux/moduleloader.h>
#include <linux/bpf.h>
#include <linux/frame.h>
#include <linux/rbtree_latch.h>
#include <linux/kallsyms.h>
#include <linux/rcupdate.h>

#include <asm/unaligned.h>

/* Registers */

#define BPF_R0	regs[BPF_REG_0]

#define BPF_R1	regs[BPF_REG_1]

#define BPF_R2	regs[BPF_REG_2]

#define BPF_R3	regs[BPF_REG_3]

#define BPF_R4	regs[BPF_REG_4]

#define BPF_R5	regs[BPF_REG_5]

#define BPF_R6	regs[BPF_REG_6]

#define BPF_R7	regs[BPF_REG_7]

#define BPF_R8	regs[BPF_REG_8]

#define BPF_R9	regs[BPF_REG_9]

#define BPF_R10	regs[BPF_REG_10]

/* Named registers */

#define DST	regs[insn->dst_reg]

#define SRC	regs[insn->src_reg]

#define FP	regs[BPF_REG_FP]

#define ARG1	regs[BPF_REG_ARG1]

#define CTX	regs[BPF_REG_CTX]

#define IMM	insn->imm

/* No hurry in this branch
 *
 * Exported for the bpf jit load helper.
 */

void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, unsigned int size) { u8 *ptr = NULL; if (k >= SKF_NET_OFF) ptr = skb_network_header(skb) + k - SKF_NET_OFF; else if (k >= SKF_LL_OFF) ptr = skb_mac_header(skb) + k - SKF_LL_OFF; if (ptr >= skb->head && ptr + size <= skb_tail_pointer(skb)) return ptr; return NULL; }

Contributors

PersonTokensPropCommitsCommitProp
Alexei Starovoitov83100.00%1100.00%
Total83100.00%1100.00%


struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags) { gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags; struct bpf_prog_aux *aux; struct bpf_prog *fp; size = round_up(size, PAGE_SIZE); fp = __vmalloc(size, gfp_flags, PAGE_KERNEL); if (fp == NULL) return NULL; aux = kzalloc(sizeof(*aux), GFP_KERNEL | gfp_extra_flags); if (aux == NULL) { vfree(fp); return NULL; } fp->pages = size / PAGE_SIZE; fp->aux = aux; fp->aux->prog = fp; INIT_LIST_HEAD_RCU(&fp->aux->ksym_lnode); return fp; }

Contributors

PersonTokensPropCommitsCommitProp
Daniel Borkmann12093.75%375.00%
Alexei Starovoitov86.25%125.00%
Total128100.00%4100.00%

EXPORT_SYMBOL_GPL(bpf_prog_alloc);
struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size, gfp_t gfp_extra_flags) { gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags; struct bpf_prog *fp; u32 pages, delta; int ret; BUG_ON(fp_old == NULL); size = round_up(size, PAGE_SIZE); pages = size / PAGE_SIZE; if (pages <= fp_old->pages) return fp_old; delta = pages - fp_old->pages; ret = __bpf_prog_charge(fp_old->aux->user, delta); if (ret) return NULL; fp = __vmalloc(size, gfp_flags, PAGE_KERNEL); if (fp == NULL) { __bpf_prog_uncharge(fp_old->aux->user, delta); } else { memcpy(fp, fp_old, fp_old->pages * PAGE_SIZE); fp->pages = pages; fp->aux->prog = fp; /* We keep fp->aux from fp_old around in the new * reallocated structure. */ fp_old->aux = NULL; __bpf_prog_free(fp_old); } return fp; }

Contributors

PersonTokensPropCommitsCommitProp
Daniel Borkmann17598.87%375.00%
Alexei Starovoitov21.13%125.00%
Total177100.00%4100.00%


void __bpf_prog_free(struct bpf_prog *fp) { kfree(fp->aux); vfree(fp); }

Contributors

PersonTokensPropCommitsCommitProp
Daniel Borkmann2195.45%150.00%
Alexei Starovoitov14.55%150.00%
Total22100.00%2100.00%


int bpf_prog_calc_tag(struct bpf_prog *fp) { const u32 bits_offset = SHA_MESSAGE_BYTES - sizeof(__be64); u32 raw_size = bpf_prog_tag_scratch_size(fp); u32 digest[SHA_DIGEST_WORDS]; u32 ws[SHA_WORKSPACE_WORDS]; u32 i, bsize, psize, blocks; struct bpf_insn *dst; bool was_ld_map; u8 *raw, *todo; __be32 *result; __be64 *bits; raw = vmalloc(raw_size); if (!raw) return -ENOMEM; sha_init(digest); memset(ws, 0, sizeof(ws)); /* We need to take out the map fd for the digest calculation * since they are unstable from user space side. */ dst = (void *)raw; for (i = 0, was_ld_map = false; i < fp->len; i++) { dst[i] = fp->insnsi[i]; if (!was_ld_map && dst[i].code == (BPF_LD | BPF_IMM | BPF_DW) && dst[i].src_reg == BPF_PSEUDO_MAP_FD) { was_ld_map = true; dst[i].imm = 0; } else if (was_ld_map && dst[i].code == 0 && dst[i].dst_reg == 0 && dst[i].src_reg == 0 && dst[i].off == 0) { was_ld_map = false; dst[i].imm = 0; } else { was_ld_map = false; } } psize = bpf_prog_insn_size(fp); memset(&raw[psize], 0, raw_size - psize); raw[psize++] = 0x80; bsize = round_up(psize, SHA_MESSAGE_BYTES); blocks = bsize / SHA_MESSAGE_BYTES; todo = raw; if (bsize - psize >= sizeof(__be64)) { bits = (__be64 *)(todo + bsize - sizeof(__be64)); } else { bits = (__be64 *)(todo + bsize + bits_offset); blocks++; } *bits = cpu_to_be64((psize - 1) << 3); while (blocks--) { sha_transform(digest, todo, ws); todo += SHA_MESSAGE_BYTES; } result = (__force __be32 *)digest; for (i = 0; i < SHA_DIGEST_WORDS; i++) result[i] = cpu_to_be32(digest[i]); memcpy(fp->tag, result, sizeof(fp->tag)); vfree(raw); return 0; }

Contributors

PersonTokensPropCommitsCommitProp
Daniel Borkmann447100.00%3100.00%
Total447100.00%3100.00%


static bool bpf_is_jmp_and_has_target(const struct bpf_insn *insn) { return BPF_CLASS(insn->code) == BPF_JMP && /* Call and Exit are both special jumps with no * target inside the BPF instruction image. */ BPF_OP(insn->code) != BPF_CALL && BPF_OP(insn->code) != BPF_EXIT; }

Contributors

PersonTokensPropCommitsCommitProp
Daniel Borkmann41100.00%1100.00%
Total41100.00%1100.00%


static void bpf_adj_branches(struct bpf_prog *prog, u32 pos, u32 delta) { struct bpf_insn *insn = prog->insnsi; u32 i, insn_cnt = prog->len; for (i = 0; i < insn_cnt; i++, insn++) { if (!bpf_is_jmp_and_has_target(insn)) continue; /* Adjust offset of jmps if we cross boundaries. */ if (i < pos && i + insn->off + 1 > pos) insn->off += delta; else if (i > pos + delta && i + insn->off + 1 <= pos + delta) insn->off -= delta; } }

Contributors

PersonTokensPropCommitsCommitProp
Daniel Borkmann112100.00%1100.00%
Total112100.00%1100.00%


struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, const struct bpf_insn *patch, u32 len) { u32 insn_adj_cnt, insn_rest, insn_delta = len - 1; struct bpf_prog *prog_adj; /* Since our patchlet doesn't expand the image, we're done. */ if (insn_delta == 0) { memcpy(prog->insnsi + off, patch, sizeof(*patch)); return prog; } insn_adj_cnt = prog->len + insn_delta; /* Several new instructions need to be inserted. Make room * for them. Likely, there's no need for a new allocation as * last page could have large enough tailroom. */ prog_adj = bpf_prog_realloc(prog, bpf_prog_size(insn_adj_cnt), GFP_USER); if (!prog_adj) return NULL; prog_adj->len = insn_adj_cnt; /* Patching happens in 3 steps: * * 1) Move over tail of insnsi from next instruction onwards, * so we can patch the single target insn with one or more * new ones (patching is always from 1 to n insns, n > 0). * 2) Inject new instructions at the target location. * 3) Adjust branch offsets if necessary. */ insn_rest = insn_adj_cnt - off - len; memmove(prog_adj->insnsi + off + len, prog_adj->insnsi + off + 1, sizeof(*patch) * insn_rest); memcpy(prog_adj->insnsi + off, patch, sizeof(*patch) * len); bpf_adj_branches(prog_adj, off, insn_delta); return prog_adj; }

Contributors

PersonTokensPropCommitsCommitProp
Daniel Borkmann173100.00%1100.00%
Total173100.00%1100.00%

#ifdef CONFIG_BPF_JIT
static __always_inline void bpf_get_prog_addr_region(const struct bpf_prog *prog, unsigned long *symbol_start, unsigned long *symbol_end) { const struct bpf_binary_header *hdr = bpf_jit_binary_hdr(prog); unsigned long addr = (unsigned long)hdr; WARN_ON_ONCE(!bpf_prog_ebpf_jited(prog)); *symbol_start = addr; *symbol_end = addr + hdr->pages * PAGE_SIZE; }

Contributors

PersonTokensPropCommitsCommitProp
Daniel Borkmann69100.00%2100.00%
Total69100.00%2100.00%


static void bpf_get_prog_name(const struct bpf_prog *prog, char *sym) { const char *end = sym + KSYM_NAME_LEN; BUILD_BUG_ON(sizeof("bpf_prog_") + sizeof(prog->tag) * 2 + /* name has been null terminated. * We should need +1 for the '_' preceding * the name. However, the null character * is double counted between the name and the * sizeof("bpf_prog_") above, so we omit * the +1 here. */ sizeof(prog->aux->name) > KSYM_NAME_LEN); sym += snprintf(sym, KSYM_NAME_LEN, "bpf_prog_"); sym = bin2hex(sym, prog->tag, sizeof(prog->tag)); if (prog->aux->name[0]) snprintf(sym, (size_t)(end - sym), "_%s", prog->aux->name); else *sym = 0; }

Contributors

PersonTokensPropCommitsCommitProp
Daniel Borkmann7057.38%266.67%
Martin KaFai Lau5242.62%133.33%
Total122100.00%3100.00%


static __always_inline unsigned long bpf_get_prog_addr_start(struct latch_tree_node *n) { unsigned long symbol_start, symbol_end; const struct bpf_prog_aux *aux; aux = container_of(n, struct bpf_prog_aux, ksym_tnode); bpf_get_prog_addr_region(aux->prog, &symbol_start, &symbol_end); return symbol_start; }

Contributors

PersonTokensPropCommitsCommitProp
Daniel Borkmann53100.00%2100.00%
Total53100.00%2100.00%


static __always_inline bool bpf_tree_less(struct latch_tree_node *a, struct latch_tree_node *b) { return bpf_get_prog_addr_start(a) < bpf_get_prog_addr_start(b); }

Contributors

PersonTokensPropCommitsCommitProp
Daniel Borkmann28100.00%2100.00%
Total28100.00%2100.00%


static __always_inline int bpf_tree_comp(void *key, struct latch_tree_node *n) { unsigned long val = (unsigned long)key; unsigned long symbol_start, symbol_end; const struct bpf_prog_aux *aux; aux = container_of(n, struct bpf_prog_aux, ksym_tnode); bpf_get_prog_addr_region(aux->prog, &symbol_start, &symbol_end); if (val < symbol_start) return -1; if (val >= symbol_end) return 1; return 0; }

Contributors

PersonTokensPropCommitsCommitProp
Daniel Borkmann85100.00%1100.00%
Total85100.00%1100.00%

static const struct latch_tree_ops bpf_tree_ops = { .less = bpf_tree_less, .comp = bpf_tree_comp, }; static DEFINE_SPINLOCK(bpf_lock); static LIST_HEAD(bpf_kallsyms); static struct latch_tree_root bpf_tree __cacheline_aligned; int bpf_jit_kallsyms __read_mostly;
static void bpf_prog_ksym_node_add(struct bpf_prog_aux *aux) { WARN_ON_ONCE(!list_empty(&aux->ksym_lnode)); list_add_tail_rcu(&aux->ksym_lnode, &bpf_kallsyms); latch_tree_insert(&aux->ksym_tnode, &bpf_tree, &bpf_tree_ops); }

Contributors

PersonTokensPropCommitsCommitProp
Daniel Borkmann48100.00%1100.00%
Total48100.00%1100.00%


static void bpf_prog_ksym_node_del(struct bpf_prog_aux *aux) { if (list_empty(&aux->ksym_lnode)) return; latch_tree_erase(&aux->ksym_tnode, &bpf_tree, &bpf_tree_ops); list_del_rcu(&aux->ksym_lnode); }

Contributors

PersonTokensPropCommitsCommitProp
Daniel Borkmann44100.00%1100.00%
Total44100.00%1100.00%


static bool bpf_prog_kallsyms_candidate(const struct bpf_prog *fp) { return fp->jited && !bpf_prog_was_classic(fp); }

Contributors

PersonTokensPropCommitsCommitProp
Daniel Borkmann23100.00%1100.00%
Total23100.00%1100.00%


static bool bpf_prog_kallsyms_verify_off(const struct bpf_prog *fp) { return list_empty(&fp->aux->ksym_lnode) || fp->aux->ksym_lnode.prev == LIST_POISON2; }

Contributors

PersonTokensPropCommitsCommitProp
Daniel Borkmann33100.00%1100.00%
Total33100.00%1100.00%


void bpf_prog_kallsyms_add(struct bpf_prog *fp) { if (!bpf_prog_kallsyms_candidate(fp) || !capable(CAP_SYS_ADMIN)) return; spin_lock_bh(&bpf_lock); bpf_prog_ksym_node_add(fp->aux); spin_unlock_bh(&bpf_lock); }

Contributors

PersonTokensPropCommitsCommitProp
Daniel Borkmann4295.45%150.00%
Hannes Frederic Sowa24.55%150.00%
Total44100.00%2100.00%


void bpf_prog_kallsyms_del(struct bpf_prog *fp) { if (!bpf_prog_kallsyms_candidate(fp)) return; spin_lock_bh(&bpf_lock); bpf_prog_ksym_node_del(fp->aux); spin_unlock_bh(&bpf_lock); }

Contributors

PersonTokensPropCommitsCommitProp
Daniel Borkmann3694.74%150.00%
Hannes Frederic Sowa25.26%150.00%
Total38100.00%2100.00%


static struct bpf_prog *bpf_prog_kallsyms_find(unsigned long addr) { struct latch_tree_node *n; if (!bpf_jit_kallsyms_enabled()) return NULL; n = latch_tree_find((void *)addr, &bpf_tree, &bpf_tree_ops); return n ? container_of(n, struct bpf_prog_aux, ksym_tnode)->prog : NULL; }

Contributors

PersonTokensPropCommitsCommitProp
Daniel Borkmann60100.00%1100.00%
Total60100.00%1100.00%


const char *__bpf_address_lookup(unsigned long addr, unsigned long *size, unsigned long *off, char *sym) { unsigned long symbol_start, symbol_end; struct bpf_prog *prog; char *ret = NULL; rcu_read_lock(); prog = bpf_prog_kallsyms_find(addr); if (prog) { bpf_get_prog_addr_region(prog, &symbol_start, &symbol_end); bpf_get_prog_name(prog, sym); ret = sym; if (size) *size = symbol_end - symbol_start; if (off) *off = addr - symbol_start; } rcu_read_unlock(); return ret; }

Contributors

PersonTokensPropCommitsCommitProp
Daniel Borkmann108100.00%1100.00%
Total108100.00%1100.00%


bool is_bpf_text_address(unsigned long addr) { bool ret; rcu_read_lock(); ret = bpf_prog_kallsyms_find(addr) != NULL; rcu_read_unlock(); return ret; }

Contributors

PersonTokensPropCommitsCommitProp
Daniel Borkmann30100.00%1100.00%
Total30100.00%1100.00%


int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type, char *sym) { unsigned long symbol_start, symbol_end; struct bpf_prog_aux *aux; unsigned int it = 0; int ret = -ERANGE; if (!bpf_jit_kallsyms_enabled()) return ret; rcu_read_lock(); list_for_each_entry_rcu(aux, &bpf_kallsyms, ksym_lnode) { if (it++ != symnum) continue; bpf_get_prog_addr_region(aux->prog, &symbol_start, &symbol_end); bpf_get_prog_name(aux->prog, sym); *value = symbol_start; *type = BPF_SYM_ELF_TYPE; ret = 0; break; } rcu_read_unlock(); return ret; }

Contributors

PersonTokensPropCommitsCommitProp
Daniel Borkmann118100.00%1100.00%
Total118100.00%1100.00%


struct bpf_binary_header * bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr, unsigned int alignment, bpf_jit_fill_hole_t bpf_fill_ill_insns) { struct bpf_binary_header *hdr; unsigned int size, hole, start; /* Most of BPF filters are really small, but if some of them * fill a page, allow at least 128 extra bytes to insert a * random section of illegal instructions. */ size = round_up(proglen + sizeof(*hdr) + 128, PAGE_SIZE); hdr = module_alloc(size); if (hdr == NULL) return NULL; /* Fill space with illegal/arch-dep instructions. */ bpf_fill_ill_insns(hdr, size); hdr->pages = size / PAGE_SIZE; hole = min_t(unsigned int, size - (proglen + sizeof(*hdr)), PAGE_SIZE - sizeof(*hdr)); start = (get_random_int() % hole) & ~(alignment - 1); /* Leave a random number of instructions before BPF code. */ *image_ptr = &hdr->image[start]; return hdr; }

Contributors

PersonTokensPropCommitsCommitProp
Daniel Borkmann128100.00%1100.00%
Total128100.00%1100.00%


void bpf_jit_binary_free(struct bpf_binary_header *hdr) { module_memfree(hdr); }

Contributors

PersonTokensPropCommitsCommitProp
Daniel Borkmann15100.00%1100.00%
Total15100.00%1100.00%

/* This symbol is only overridden by archs that have different * requirements than the usual eBPF JITs, f.e. when they only * implement cBPF JIT, do not set images read-only, etc. */
void __weak bpf_jit_free(struct bpf_prog *fp) { if (fp->jited) { struct bpf_binary_header *hdr = bpf_jit_binary_hdr(fp); bpf_jit_binary_unlock_ro(hdr); bpf_jit_binary_free(hdr); WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(fp)); } bpf_prog_unlock_free(fp); }

Contributors

PersonTokensPropCommitsCommitProp
Daniel Borkmann53100.00%2100.00%
Total53100.00%2100.00%

int bpf_jit_harden __read_mostly;
static int bpf_jit_blind_insn(const struct bpf_insn *from, const struct bpf_insn *aux, struct bpf_insn *to_buff) { struct bpf_insn *to = to_buff; u32 imm_rnd = get_random_int(); s16 off; BUILD_BUG_ON(BPF_REG_AX + 1 != MAX_BPF_JIT_REG); BUILD_BUG_ON(MAX_BPF_REG + 1 != MAX_BPF_JIT_REG); if (from->imm == 0 && (from->code == (BPF_ALU | BPF_MOV | BPF_K) || from->code == (BPF_ALU64 | BPF_MOV | BPF_K))) { *to++ = BPF_ALU64_REG(BPF_XOR, from->dst_reg, from->dst_reg); goto out; } switch (from->code) { case BPF_ALU | BPF_ADD | BPF_K: case BPF_ALU | BPF_SUB | BPF_K: case BPF_ALU | BPF_AND | BPF_K: case BPF_ALU | BPF_OR | BPF_K: case BPF_ALU | BPF_XOR | BPF_K: case BPF_ALU | BPF_MUL | BPF_K: case BPF_ALU | BPF_MOV | BPF_K: case BPF_ALU | BPF_DIV | BPF_K: case BPF_ALU | BPF_MOD | BPF_K: *to++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm); *to++ = BPF_ALU32_IMM(BPF_XOR, BPF_REG_AX, imm_rnd); *to++ = BPF_ALU32_REG(from->code, from->dst_reg, BPF_REG_AX); break; case BPF_ALU64 | BPF_ADD | BPF_K: case BPF_ALU64 | BPF_SUB | BPF_K: case BPF_ALU64 | BPF_AND | BPF_K: case BPF_ALU64 | BPF_OR | BPF_K: case BPF_ALU64 | BPF_XOR | BPF_K: case BPF_ALU64 | BPF_MUL | BPF_K: case BPF_ALU64 | BPF_MOV | BPF_K: case BPF_ALU64 | BPF_DIV | BPF_K: case BPF_ALU64 | BPF_MOD | BPF_K: *to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm); *to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd); *to++ = BPF_ALU64_REG(from->code, from->dst_reg, BPF_REG_AX); break; case BPF_JMP | BPF_JEQ | BPF_K: case BPF_JMP | BPF_JNE | BPF_K: case BPF_JMP | BPF_JGT | BPF_K: case BPF_JMP |<