cregit-Linux how code gets into the kernel

Release 4.15 arch/x86/include/asm/tlbflush.h

/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_TLBFLUSH_H

#define _ASM_X86_TLBFLUSH_H

#include <linux/mm.h>
#include <linux/sched.h>

#include <asm/processor.h>
#include <asm/cpufeature.h>
#include <asm/special_insns.h>
#include <asm/smp.h>
#include <asm/invpcid.h>
#include <asm/pti.h>
#include <asm/processor-flags.h>

/*
 * The x86 feature is called PCID (Process Context IDentifier). It is similar
 * to what is traditionally called ASID on the RISC processors.
 *
 * We don't use the traditional ASID implementation, where each process/mm gets
 * its own ASID and flush/restart when we run out of ASID space.
 *
 * Instead we have a small per-cpu array of ASIDs and cache the last few mm's
 * that came by on this CPU, allowing cheaper switch_mm between processes on
 * this CPU.
 *
 * We end up with different spaces for different things. To avoid confusion we
 * use different names for each of them:
 *
 * ASID  - [0, TLB_NR_DYN_ASIDS-1]
 *         the canonical identifier for an mm
 *
 * kPCID - [1, TLB_NR_DYN_ASIDS]
 *         the value we write into the PCID part of CR3; corresponds to the
 *         ASID+1, because PCID 0 is special.
 *
 * uPCID - [2048 + 1, 2048 + TLB_NR_DYN_ASIDS]
 *         for KPTI each mm has two address spaces and thus needs two
 *         PCID values, but we can still do with a single ASID denomination
 *         for each mm. Corresponds to kPCID + 2048.
 *
 */

/* There are 12 bits of space for ASIDS in CR3 */

#define CR3_HW_ASID_BITS		12

/*
 * When enabled, PAGE_TABLE_ISOLATION consumes a single bit for
 * user/kernel switches
 */
#ifdef CONFIG_PAGE_TABLE_ISOLATION

# define PTI_CONSUMED_PCID_BITS	1
#else

# define PTI_CONSUMED_PCID_BITS	0
#endif


#define CR3_AVAIL_PCID_BITS (X86_CR3_PCID_BITS - PTI_CONSUMED_PCID_BITS)

/*
 * ASIDs are zero-based: 0->MAX_AVAIL_ASID are valid.  -1 below to account
 * for them being zero-based.  Another -1 is because PCID 0 is reserved for
 * use by non-PCID-aware users.
 */

#define MAX_ASID_AVAILABLE ((1 << CR3_AVAIL_PCID_BITS) - 2)

/*
 * 6 because 6 should be plenty and struct tlb_state will fit in two cache
 * lines.
 */

#define TLB_NR_DYN_ASIDS	6

/*
 * Given @asid, compute kPCID
 */

static inline u16 kern_pcid(u16 asid) { VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE); #ifdef CONFIG_PAGE_TABLE_ISOLATION /* * Make sure that the dynamic ASID space does not confict with the * bit we are using to switch between user and kernel ASIDs. */ BUILD_BUG_ON(TLB_NR_DYN_ASIDS >= (1 << X86_CR3_PTI_PCID_USER_BIT)); /* * The ASID being passed in here should have respected the * MAX_ASID_AVAILABLE and thus never have the switch bit set. */ VM_WARN_ON_ONCE(asid & (1 << X86_CR3_PTI_PCID_USER_BIT)); #endif /* * The dynamically-assigned ASIDs that get passed in are small * (<TLB_NR_DYN_ASIDS). They never have the high switch bit set, * so do not bother to clear it. * * If PCID is on, ASID-aware code paths put the ASID+1 into the * PCID bits. This serves two purposes. It prevents a nasty * situation in which PCID-unaware code saves CR3, loads some other * value (with PCID == 0), and then restores CR3, thus corrupting * the TLB for ASID 0 if the saved ASID was nonzero. It also means * that any bugs involving loading a PCID-enabled CR3 with * CR4.PCIDE off will trigger deterministically. */ return asid + 1; }

Contributors

PersonTokensPropCommitsCommitProp
Peter Zijlstra2853.85%125.00%
Dave Hansen1325.00%125.00%
Andrew Lutomirski917.31%125.00%
Thomas Gleixner23.85%125.00%
Total52100.00%4100.00%

/* * Given @asid, compute uPCID */
static inline u16 user_pcid(u16 asid) { u16 ret = kern_pcid(asid); #ifdef CONFIG_PAGE_TABLE_ISOLATION ret |= 1 << X86_CR3_PTI_PCID_USER_BIT; #endif return ret; }

Contributors

PersonTokensPropCommitsCommitProp
Dave Hansen3196.88%150.00%
Thomas Gleixner13.12%150.00%
Total32100.00%2100.00%

struct pgd_t;
static inline unsigned long build_cr3(pgd_t *pgd, u16 asid) { if (static_cpu_has(X86_FEATURE_PCID)) { return __sme_pa(pgd) | kern_pcid(asid); } else { VM_WARN_ON_ONCE(asid != 0); return __sme_pa(pgd); } }

Contributors

PersonTokensPropCommitsCommitProp
Dave Hansen3364.71%266.67%
Andrew Lutomirski1835.29%133.33%
Total51100.00%3100.00%


static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid) { VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE); VM_WARN_ON_ONCE(!this_cpu_has(X86_FEATURE_PCID)); return __sme_pa(pgd) | kern_pcid(asid) | CR3_NOFLUSH; }

Contributors

PersonTokensPropCommitsCommitProp
Dave Hansen3170.45%375.00%
Andrew Lutomirski1329.55%125.00%
Total44100.00%4100.00%

#ifdef CONFIG_PARAVIRT #include <asm/paravirt.h> #else #define __flush_tlb() __native_flush_tlb() #define __flush_tlb_global() __native_flush_tlb_global() #define __flush_tlb_single(addr) __native_flush_tlb_single(addr) #endif
static inline bool tlb_defer_switch_to_init_mm(void) { /* * If we have PCID, then switching to init_mm is reasonably * fast. If we don't have PCID, then switching to init_mm is * quite slow, so we try to defer it in the hopes that we can * avoid it entirely. The latter approach runs the risk of * receiving otherwise unnecessary IPIs. * * This choice is just a heuristic. The tlb code can handle this * function returning true or false regardless of whether we have * PCID. */ return !static_cpu_has(X86_FEATURE_PCID); }

Contributors

PersonTokensPropCommitsCommitProp
Andrew Lutomirski17100.00%2100.00%
Total17100.00%2100.00%

struct tlb_context { u64 ctx_id; u64 tlb_gen; }; struct tlb_state { /* * cpu_tlbstate.loaded_mm should match CR3 whenever interrupts * are on. This means that it may not match current->active_mm, * which will contain the previous user mm when we're in lazy TLB * mode even if we've already switched back to swapper_pg_dir. */ struct mm_struct *loaded_mm; u16 loaded_mm_asid; u16 next_asid; /* * We can be in one of several states: * * - Actively using an mm. Our CPU's bit will be set in * mm_cpumask(loaded_mm) and is_lazy == false; * * - Not using a real mm. loaded_mm == &init_mm. Our CPU's bit * will not be set in mm_cpumask(&init_mm) and is_lazy == false. * * - Lazily using a real mm. loaded_mm != &init_mm, our bit * is set in mm_cpumask(loaded_mm), but is_lazy == true. * We're heuristically guessing that the CR3 load we * skipped more than makes up for the overhead added by * lazy mode. */ bool is_lazy; /* * If set we changed the page tables in such a way that we * needed an invalidation of all contexts (aka. PCIDs / ASIDs). * This tells us to go invalidate all the non-loaded ctxs[] * on the next context switch. * * The current ctx was kept up-to-date as it ran and does not * need to be invalidated. */ bool invalidate_other; /* * Mask that contains TLB_NR_DYN_ASIDS+1 bits to indicate * the corresponding user PCID needs a flush next time we * switch to it; see SWITCH_TO_USER_CR3. */ unsigned short user_pcid_flush_mask; /* * Access to this CR4 shadow and to H/W CR4 is protected by * disabling interrupts when modifying either one. */ unsigned long cr4; /* * This is a list of all contexts that might exist in the TLB. * There is one per ASID that we use, and the ASID (what the * CPU calls PCID) is the index into ctxts. * * For each context, ctx_id indicates which mm the TLB's user * entries came from. As an invariant, the TLB will never * contain entries that are out-of-date as when that mm reached * the tlb_gen in the list. * * To be clear, this means that it's legal for the TLB code to * flush the TLB without updating tlb_gen. This can happen * (for now, at least) due to paravirt remote flushes. * * NB: context 0 is a bit special, since it's also used by * various bits of init code. This is fine -- code that * isn't aware of PCID will end up harmlessly flushing * context 0. */ struct tlb_context ctxs[TLB_NR_DYN_ASIDS]; }; DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate); /* Initialize cr4 shadow for this CPU. */
static inline void cr4_init_shadow(void) { this_cpu_write(cpu_tlbstate.cr4, __read_cr4()); }

Contributors

PersonTokensPropCommitsCommitProp
Andrew Lutomirski19100.00%2100.00%
Total19100.00%2100.00%


static inline void __cr4_set(unsigned long cr4) { lockdep_assert_irqs_disabled(); this_cpu_write(cpu_tlbstate.cr4, cr4); __write_cr4(cr4); }

Contributors

PersonTokensPropCommitsCommitProp
Nadav Amit28100.00%2100.00%
Total28100.00%2100.00%

/* Set in this cpu's CR4. */
static inline void cr4_set_bits(unsigned long mask) { unsigned long cr4, flags; local_irq_save(flags); cr4 = this_cpu_read(cpu_tlbstate.cr4); if ((cr4 | mask) != cr4) __cr4_set(cr4 | mask); local_irq_restore(flags); }

Contributors

PersonTokensPropCommitsCommitProp
Andrew Lutomirski3871.70%250.00%
Nadav Amit1528.30%250.00%
Total53100.00%4100.00%

/* Clear in this cpu's CR4. */
static inline void cr4_clear_bits(unsigned long mask) { unsigned long cr4, flags; local_irq_save(flags); cr4 = this_cpu_read(cpu_tlbstate.cr4); if ((cr4 & ~mask) != cr4) __cr4_set(cr4 & ~mask); local_irq_restore(flags); }

Contributors

PersonTokensPropCommitsCommitProp
Andrew Lutomirski4072.73%250.00%
Nadav Amit1527.27%250.00%
Total55100.00%4100.00%


static inline void cr4_toggle_bits_irqsoff(unsigned long mask) { unsigned long cr4; cr4 = this_cpu_read(cpu_tlbstate.cr4); __cr4_set(cr4 ^ mask); }

Contributors

PersonTokensPropCommitsCommitProp
Thomas Gleixner2787.10%133.33%
Nadav Amit412.90%266.67%
Total31100.00%3100.00%

/* Read the CR4 shadow. */
static inline unsigned long cr4_read_shadow(void) { return this_cpu_read(cpu_tlbstate.cr4); }

Contributors

PersonTokensPropCommitsCommitProp
Andrew Lutomirski18100.00%2100.00%
Total18100.00%2100.00%

/* * Mark all other ASIDs as invalid, preserves the current. */
static inline void invalidate_other_asid(void) { this_cpu_write(cpu_tlbstate.invalidate_other, true); }

Contributors

PersonTokensPropCommitsCommitProp
Dave Hansen18100.00%1100.00%
Total18100.00%1100.00%

/* * Save some of cr4 feature set we're using (e.g. Pentium 4MB * enable and PPro Global page enable), so that any CPU's that boot * up after us can get the correct flags. This should only be used * during boot on the boot cpu. */ extern unsigned long mmu_cr4_features; extern u32 *trampoline_cr4_features;
static inline void cr4_set_bits_and_update_boot(unsigned long mask) { mmu_cr4_features |= mask; if (trampoline_cr4_features) *trampoline_cr4_features = mmu_cr4_features; cr4_set_bits(mask); }

Contributors

PersonTokensPropCommitsCommitProp
Andrew Lutomirski29100.00%1100.00%
Total29100.00%1100.00%

extern void initialize_tlbstate_and_flush(void); /* * Given an ASID, flush the corresponding user ASID. We can delay this * until the next time we switch to it. * * See SWITCH_TO_USER_CR3. */
static inline void invalidate_user_asid(u16 asid) { /* There is no user ASID if address space separation is off */ if (!IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION)) return; /* * We only have a single ASID if PCID is off and the CR3 * write will have flushed it. */ if (!cpu_feature_enabled(X86_FEATURE_PCID)) return; if (!static_cpu_has(X86_FEATURE_PTI)) return; __set_bit(kern_pcid(asid), (unsigned long *)this_cpu_ptr(&cpu_tlbstate.user_pcid_flush_mask)); }

Contributors

PersonTokensPropCommitsCommitProp
Peter Zijlstra60100.00%1100.00%
Total60100.00%1100.00%

/* * flush the entire current user mapping */
static inline void __native_flush_tlb(void) { /* * Preemption or interrupts must be disabled to protect the access * to the per CPU variable and to prevent being preempted between * read_cr3() and write_cr3(). */ WARN_ON_ONCE(preemptible()); invalidate_user_asid(this_cpu_read(cpu_tlbstate.loaded_mm_asid)); /* If current->mm == NULL then the read_cr3() "borrows" an mm */ native_write_cr3(__native_read_cr3()); }

Contributors

PersonTokensPropCommitsCommitProp
Thomas Gleixner2163.64%240.00%
Peter Zijlstra1030.30%120.00%
Andrew Lutomirski13.03%120.00%
Chris Wright13.03%120.00%
Total33100.00%5100.00%

/* * flush everything */
static inline void __native_flush_tlb_global(void) { unsigned long cr4, flags; if (static_cpu_has(X86_FEATURE_INVPCID)) { /* * Using INVPCID is considerably faster than a pair of writes * to CR4 sandwiched inside an IRQ flag save/restore. * * Note, this works with CR4.PCIDE=0 or 1. */ invpcid_flush_all(); return; } /* * Read-modify-write to CR4 - protect it from preemption and * from interrupts. (Use the raw variant because this code can * be called from deep inside debugging code.) */ raw_local_irq_save(flags); cr4 = this_cpu_read(cpu_tlbstate.cr4); /* toggle PGE */ native_write_cr4(cr4 ^ X86_CR4_PGE); /* write old PGE again and flush TLBs */ native_write_cr4(cr4); raw_local_irq_restore(flags); }

Contributors

PersonTokensPropCommitsCommitProp
Peter Zijlstra2438.10%116.67%
Fenghua Yu1930.16%116.67%
Andrew Lutomirski1320.63%116.67%
Ingo Molnar57.94%116.67%
Thomas Gleixner11.59%116.67%
Dave Hansen11.59%116.67%
Total63100.00%6100.00%

/* * flush one page in the user mapping */
static inline void __native_flush_tlb_single(unsigned long addr) { u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid); asm volatile("invlpg (%0)" ::"r" (addr) : "memory"); if (!static_cpu_has(X86_FEATURE_PTI)) return; /* * Some platforms #GP if we call invpcid(type=1/2) before CR4.PCIDE=1. * Just use invalidate_user_asid() in case we are called early. */ if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE)) invalidate_user_asid(loaded_mm_asid); else invpcid_flush_one(user_pcid(loaded_mm_asid), addr); }

Contributors

PersonTokensPropCommitsCommitProp
Peter Zijlstra2441.38%125.00%
Dave Hansen2034.48%125.00%
Thomas Gleixner1220.69%125.00%
Joe Perches23.45%125.00%
Total58100.00%4100.00%

/* * flush everything */
static inline void __flush_tlb_all(void) { if (boot_cpu_has(X86_FEATURE_PGE)) { __flush_tlb_global(); } else { /* * !PGE -> !PCID (setup_pcid()), thus every flush is total. */ __flush_tlb(); } }

Contributors

PersonTokensPropCommitsCommitProp
Thomas Gleixner1967.86%125.00%
Peter Zijlstra517.86%125.00%
Borislav Petkov310.71%125.00%
Daniel Borkmann13.57%125.00%
Total28100.00%4100.00%

/* * flush one page in the kernel mapping */
static inline void __flush_tlb_one(unsigned long addr) { count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE); __flush_tlb_single(addr); if (!static_cpu_has(X86_FEATURE_PTI)) return; /* * __flush_tlb_single() will have cleared the TLB entry for this ASID, * but since kernel space is replicated across all, we must also * invalidate all others. */ invalidate_other_asid(); }

Contributors

PersonTokensPropCommitsCommitProp
Dave Hansen1750.00%250.00%
Thomas Gleixner1647.06%125.00%
Mel Gorman12.94%125.00%
Total34100.00%4100.00%

#define TLB_FLUSH_ALL -1UL /* * TLB flushing: * * - flush_tlb_all() flushes all processes TLBs * - flush_tlb_mm(mm) flushes the specified mm context TLB's * - flush_tlb_page(vma, vmaddr) flushes one page * - flush_tlb_range(vma, start, end) flushes a range of pages * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages * - flush_tlb_others(cpumask, info) flushes TLBs on other cpus * * ..but the i386 has somewhat limited tlb flushing capabilities, * and page-granular flushes are available only on i486 and up. */ struct flush_tlb_info { /* * We support several kinds of flushes. * * - Fully flush a single mm. .mm will be set, .end will be * TLB_FLUSH_ALL, and .new_tlb_gen will be the tlb_gen to * which the IPI sender is trying to catch us up. * * - Partially flush a single mm. .mm will be set, .start and * .end will indicate the range, and .new_tlb_gen will be set * such that the changes between generation .new_tlb_gen-1 and * .new_tlb_gen are entirely contained in the indicated range. * * - Fully flush all mms whose tlb_gens have been updated. .mm * will be NULL, .end will be TLB_FLUSH_ALL, and .new_tlb_gen * will be zero. */ struct mm_struct *mm; unsigned long start; unsigned long end; u64 new_tlb_gen; }; #define local_flush_tlb() __flush_tlb() #define flush_tlb_mm(mm) flush_tlb_mm_range(mm, 0UL, TLB_FLUSH_ALL, 0UL) #define flush_tlb_range(vma, start, end) \ flush_tlb_mm_range(vma->vm_mm, start, end, vma->vm_flags) extern void flush_tlb_all(void); extern void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, unsigned long end, unsigned long vmflag); extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long a) { flush_tlb_mm_range(vma->vm_mm, a, a + PAGE_SIZE, VM_NONE); }

Contributors

PersonTokensPropCommitsCommitProp
Andrew Lutomirski31100.00%1100.00%
Total31100.00%1100.00%

void native_flush_tlb_others(const struct cpumask *cpumask, const struct flush_tlb_info *info);
static inline u64 inc_mm_tlb_gen(struct mm_struct *mm) { /* * Bump the generation count. This also serves as a full barrier * that synchronizes with switch_mm(): callers are required to order * their read of mm_cpumask after their writes to the paging * structures. */ return atomic64_inc_return(&mm->context.tlb_gen); }

Contributors

PersonTokensPropCommitsCommitProp
Peter Zijlstra24100.00%1100.00%
Total24100.00%1100.00%


static inline void arch_tlbbatch_add_mm(struct arch_tlbflush_unmap_batch *batch, struct mm_struct *mm) { inc_mm_tlb_gen(mm); cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm)); }

Contributors

PersonTokensPropCommitsCommitProp
Andrew Lutomirski40100.00%2100.00%
Total40100.00%2100.00%

extern void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch); #ifndef CONFIG_PARAVIRT #define flush_tlb_others(mask, info) \ native_flush_tlb_others(mask, info) #endif #endif /* _ASM_X86_TLBFLUSH_H */

Overall Contributors

PersonTokensPropCommitsCommitProp
Andrew Lutomirski41035.71%1834.62%
Peter Zijlstra21718.90%59.62%
Thomas Gleixner19617.07%59.62%
Dave Hansen18015.68%611.54%
Nadav Amit625.40%23.85%
Alex Shi413.57%47.69%
Fenghua Yu191.66%11.92%
Borislav Petkov60.52%23.85%
Ingo Molnar50.44%11.92%
H. Peter Anvin30.26%11.92%
Joe Perches20.17%11.92%
Rusty Russell20.17%11.92%
Greg Kroah-Hartman10.09%11.92%
Chris Wright10.09%11.92%
David Howells10.09%11.92%
Mel Gorman10.09%11.92%
Daniel Borkmann10.09%11.92%
Total1148100.00%52100.00%
Information contained on this website is for historical information purposes only and does not indicate or represent copyright ownership.
Created with cregit.