cregit-Linux how code gets into the kernel

Release 4.11 arch/x86/mm/tlb.c

Directory: arch/x86/mm
#include <linux/init.h>

#include <linux/mm.h>
#include <linux/spinlock.h>
#include <linux/smp.h>
#include <linux/interrupt.h>
#include <linux/export.h>
#include <linux/cpu.h>

#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
#include <asm/cache.h>
#include <asm/apic.h>
#include <asm/uv/uv.h>
#include <linux/debugfs.h>

/*
 *      Smarter SMP flushing macros.
 *              c/o Linus Torvalds.
 *
 *      These mean you can really definitely utterly forget about
 *      writing to user space from interrupts. (Its not allowed anyway).
 *
 *      Optimizations Manfred Spraul <manfred@colorfullife.com>
 *
 *      More scalable flush, from Andi Kleen
 *
 *      Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi
 */

#ifdef CONFIG_SMP


struct flush_tlb_info {
	
struct mm_struct *flush_mm;
	
unsigned long flush_start;
	
unsigned long flush_end;
};

/*
 * We cannot call mmdrop() because we are in interrupt context,
 * instead update mm->cpu_vm_mask.
 */

void leave_mm(int cpu) { struct mm_struct *active_mm = this_cpu_read(cpu_tlbstate.active_mm); if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) BUG(); if (cpumask_test_cpu(cpu, mm_cpumask(active_mm))) { cpumask_clear_cpu(cpu, mm_cpumask(active_mm)); load_cr3(swapper_pg_dir); /* * This gets called in the idle path where RCU * functions differently. Tracing normally * uses RCU, so we have to call the tracepoint * specially here. */ trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); } }

Contributors

PersonTokensPropCommitsCommitProp
Glauber de Oliveira Costa3042.25%112.50%
Suresh B. Siddha2535.21%112.50%
Dave Hansen811.27%225.00%
Brian Gerst34.23%112.50%
Rusty Russell34.23%112.50%
Linus Torvalds11.41%112.50%
Alex Shi11.41%112.50%
Total71100.00%8100.00%

EXPORT_SYMBOL_GPL(leave_mm); #endif /* CONFIG_SMP */
void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) { unsigned long flags; local_irq_save(flags); switch_mm_irqs_off(prev, next, tsk); local_irq_restore(flags); }

Contributors

PersonTokensPropCommitsCommitProp
Andrew Lutomirski43100.00%2100.00%
Total43100.00%2100.00%


void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) { unsigned cpu = smp_processor_id(); if (likely(prev != next)) { if (IS_ENABLED(CONFIG_VMAP_STACK)) { /* * If our current stack is in vmalloc space and isn't * mapped in the new pgd, we'll double-fault. Forcibly * map it. */ unsigned int stack_pgd_index = pgd_index(current_stack_pointer()); pgd_t *pgd = next->pgd + stack_pgd_index; if (unlikely(pgd_none(*pgd))) set_pgd(pgd, init_mm.pgd[stack_pgd_index]); } #ifdef CONFIG_SMP this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK); this_cpu_write(cpu_tlbstate.active_mm, next); #endif cpumask_set_cpu(cpu, mm_cpumask(next)); /* * Re-load page tables. * * This logic has an ordering constraint: * * CPU 0: Write to a PTE for 'next' * CPU 0: load bit 1 in mm_cpumask. if nonzero, send IPI. * CPU 1: set bit 1 in next's mm_cpumask * CPU 1: load from the PTE that CPU 0 writes (implicit) * * We need to prevent an outcome in which CPU 1 observes * the new PTE value and CPU 0 observes bit 1 clear in * mm_cpumask. (If that occurs, then the IPI will never * be sent, and CPU 0's TLB will contain a stale entry.) * * The bad outcome can occur if either CPU's load is * reordered before that CPU's store, so both CPUs must * execute full barriers to prevent this from happening. * * Thus, switch_mm needs a full barrier between the * store to mm_cpumask and any operation that could load * from next->pgd. TLB fills are special and can happen * due to instruction fetches or for no reason at all, * and neither LOCK nor MFENCE orders them. * Fortunately, load_cr3() is serializing and gives the * ordering guarantee we need. * */ load_cr3(next->pgd); trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); /* Stop flush ipis for the previous mm */ cpumask_clear_cpu(cpu, mm_cpumask(prev)); /* Load per-mm CR4 state */ load_mm_cr4(next); #ifdef CONFIG_MODIFY_LDT_SYSCALL /* * Load the LDT, if the LDT is different. * * It's possible that prev->context.ldt doesn't match * the LDT register. This can happen if leave_mm(prev) * was called and then modify_ldt changed * prev->context.ldt but suppressed an IPI to this CPU. * In this case, prev->context.ldt != NULL, because we * never set context.ldt to NULL while the mm still * exists. That means that next->context.ldt != * prev->context.ldt, because mms never share an LDT. */ if (unlikely(prev->context.ldt != next->context.ldt)) load_mm_ldt(next); #endif } #ifdef CONFIG_SMP else { this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK); BUG_ON(this_cpu_read(cpu_tlbstate.active_mm) != next); if (!cpumask_test_cpu(cpu, mm_cpumask(next))) { /* * On established mms, the mm_cpumask is only changed * from irq context, from ptep_clear_flush() while in * lazy tlb mode, and here. Irqs are blocked during * schedule, protecting us from simultaneous changes. */ cpumask_set_cpu(cpu, mm_cpumask(next)); /* * We were in lazy tlb mode and leave_mm disabled * tlb flush IPI delivery. We must reload CR3 * to make sure to use no freed page tables. * * As above, load_cr3() is serializing and orders TLB * fills with respect to the mm_cpumask write. */ load_cr3(next->pgd); trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); load_mm_cr4(next); load_mm_ldt(next); } } #endif }

Contributors

PersonTokensPropCommitsCommitProp
Andrew Lutomirski263100.00%3100.00%
Total263100.00%3100.00%

#ifdef CONFIG_SMP /* * The flush IPI assumes that a thread switch happens in this order: * [cpu0: the cpu that switches] * 1) switch_mm() either 1a) or 1b) * 1a) thread switch to a different mm * 1a1) set cpu_tlbstate to TLBSTATE_OK * Now the tlb flush NMI handler flush_tlb_func won't call leave_mm * if cpu0 was in lazy tlb mode. * 1a2) update cpu active_mm * Now cpu0 accepts tlb flushes for the new mm. * 1a3) cpu_set(cpu, new_mm->cpu_vm_mask); * Now the other cpus will send tlb flush ipis. * 1a4) change cr3. * 1a5) cpu_clear(cpu, old_mm->cpu_vm_mask); * Stop ipi delivery for the old mm. This is not synchronized with * the other cpus, but flush_tlb_func ignore flush ipis for the wrong * mm, and in the worst case we perform a superfluous tlb flush. * 1b) thread switch without mm change * cpu active_mm is correct, cpu0 already handles flush ipis. * 1b1) set cpu_tlbstate to TLBSTATE_OK * 1b2) test_and_set the cpu bit in cpu_vm_mask. * Atomically set the bit [other cpus will start sending flush ipis], * and test the bit. * 1b3) if the bit was 0: leave_mm was called, flush the tlb. * 2) switch %%esp, ie current * * The interrupt must handle 2 special cases: * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm. * - the cpu performs speculative tlb reads, i.e. even if the cpu only * runs in kernel space, the cpu could load tlb entries for user space * pages. * * The good news is that cpu_tlbstate is local to each cpu, no * write/read ordering problems. */ /* * TLB flush funcation: * 1) Flush the tlb entries if the cpu uses the mm that's being flushed. * 2) Leave the mm if we are in the lazy tlb mode. */
static void flush_tlb_func(void *info) { struct flush_tlb_info *f = info; inc_irq_stat(irq_tlb_count); if (f->flush_mm && f->flush_mm != this_cpu_read(cpu_tlbstate.active_mm)) return; count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED); if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) { if (f->flush_end == TLB_FLUSH_ALL) { local_flush_tlb(); trace_tlb_flush(TLB_REMOTE_SHOOTDOWN, TLB_FLUSH_ALL); } else { unsigned long addr; unsigned long nr_pages = (f->flush_end - f->flush_start) / PAGE_SIZE; addr = f->flush_start; while (addr < f->flush_end) { __flush_tlb_single(addr); addr += PAGE_SIZE; } trace_tlb_flush(TLB_REMOTE_SHOOTDOWN, nr_pages); } } else leave_mm(smp_processor_id()); }

Contributors

PersonTokensPropCommitsCommitProp
Glauber de Oliveira Costa4431.43%18.33%
Alex Shi4431.43%325.00%
Dave Hansen3625.71%325.00%
Tomoki Sekiyama53.57%18.33%
Brian Gerst53.57%18.33%
Nadav Amit42.86%18.33%
Mel Gorman10.71%18.33%
Tejun Heo10.71%18.33%
Total140100.00%12100.00%


void native_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm, unsigned long start, unsigned long end) { struct flush_tlb_info info; if (end == 0) end = start + PAGE_SIZE; info.flush_mm = mm; info.flush_start = start; info.flush_end = end; count_vm_tlb_event(NR_TLB_REMOTE_FLUSH); if (end == TLB_FLUSH_ALL) trace_tlb_flush(TLB_REMOTE_SEND_IPI, TLB_FLUSH_ALL); else trace_tlb_flush(TLB_REMOTE_SEND_IPI, (end - start) >> PAGE_SHIFT); if (is_uv_system()) { unsigned int cpu; cpu = smp_processor_id(); cpumask = uv_flush_tlb_others(cpumask, mm, start, end, cpu); if (cpumask) smp_call_function_many(cpumask, flush_tlb_func, &info, 1); return; } smp_call_function_many(cpumask, flush_tlb_func, &info, 1); }

Contributors

PersonTokensPropCommitsCommitProp
Alex Shi3422.67%215.38%
Nadav Amit3020.00%17.69%
Rusty Russell2617.33%17.69%
Glauber de Oliveira Costa2315.33%17.69%
Tejun Heo1510.00%17.69%
Mel Gorman106.67%215.38%
David Shaohua Li42.67%17.69%
Dave Hansen42.67%17.69%
Linus Torvalds21.33%17.69%
Mike Travis10.67%17.69%
Xiao Guangrong10.67%17.69%
Total150100.00%13100.00%


void flush_tlb_current_task(void) { struct mm_struct *mm = current->mm; preempt_disable(); count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); /* This is an implicit full barrier that synchronizes with switch_mm. */ local_flush_tlb(); trace_tlb_flush(TLB_LOCAL_SHOOTDOWN, TLB_FLUSH_ALL); if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL); preempt_enable(); }

Contributors

PersonTokensPropCommitsCommitProp
Glauber de Oliveira Costa3856.72%112.50%
Rusty Russell1420.90%225.00%
Dave Hansen1116.42%225.00%
Alex Shi22.99%112.50%
Mel Gorman11.49%112.50%
Andrew Lutomirski11.49%112.50%
Total67100.00%8100.00%

/* * See Documentation/x86/tlb.txt for details. We choose 33 * because it is large enough to cover the vast majority (at * least 95%) of allocations, and is small enough that we are * confident it will not cause too much overhead. Each single * flush is about 100 ns, so this caps the maximum overhead at * _about_ 3,000 ns. * * This is in units of pages. */ static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;
void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, unsigned long end, unsigned long vmflag) { unsigned long addr; /* do a global flush by default */ unsigned long base_pages_to_flush = TLB_FLUSH_ALL; preempt_disable(); if (current->active_mm != mm) { /* Synchronize with switch_mm. */ smp_mb(); goto out; } if (!current->mm) { leave_mm(smp_processor_id()); /* Synchronize with switch_mm. */ smp_mb(); goto out; } if ((end != TLB_FLUSH_ALL) && !(vmflag & VM_HUGETLB)) base_pages_to_flush = (end - start) >> PAGE_SHIFT; /* * Both branches below are implicit full barriers (MOV to CR or * INVLPG) that synchronize with switch_mm. */ if (base_pages_to_flush > tlb_single_page_flush_ceiling) { base_pages_to_flush = TLB_FLUSH_ALL; count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); local_flush_tlb(); } else { /* flush range by one by one 'invlpg' */ for (addr = start; addr < end; addr += PAGE_SIZE) { count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE); __flush_tlb_single(addr); } } trace_tlb_flush(TLB_LOCAL_MM_SHOOTDOWN, base_pages_to_flush); out: if (base_pages_to_flush == TLB_FLUSH_ALL) { start = 0UL; end = TLB_FLUSH_ALL; } if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) flush_tlb_others(mm_cpumask(mm), mm, start, end); preempt_enable(); }

Contributors

PersonTokensPropCommitsCommitProp
Alex Shi12560.10%323.08%
Dave Hansen5626.92%538.46%
Andrew Lutomirski115.29%17.69%
Mel Gorman94.33%215.38%
Glauber de Oliveira Costa62.88%17.69%
JoonSoo Kim10.48%17.69%
Total208100.00%13100.00%


void flush_tlb_page(struct vm_area_struct *vma, unsigned long start) { struct mm_struct *mm = vma->vm_mm; preempt_disable(); if (current->active_mm == mm) { if (current->mm) { /* * Implicit full barrier (INVLPG) that synchronizes * with switch_mm. */ __flush_tlb_one(start); } else { leave_mm(smp_processor_id()); /* Synchronize with switch_mm. */ smp_mb(); } } if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) flush_tlb_others(mm_cpumask(mm), mm, start, 0UL); preempt_enable(); }

Contributors

PersonTokensPropCommitsCommitProp
Glauber de Oliveira Costa6770.53%120.00%
Rusty Russell1414.74%240.00%
Andrew Lutomirski99.47%120.00%
Alex Shi55.26%120.00%
Total95100.00%5100.00%


static void do_flush_tlb_all(void *info) { count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED); __flush_tlb_all(); if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY) leave_mm(smp_processor_id()); }

Contributors

PersonTokensPropCommitsCommitProp
Glauber de Oliveira Costa2468.57%116.67%
Dave Hansen411.43%116.67%
Brian Gerst38.57%116.67%
Borislav Petkov25.71%116.67%
Alex Shi12.86%116.67%
Mel Gorman12.86%116.67%
Total35100.00%6100.00%


void flush_tlb_all(void) { count_vm_tlb_event(NR_TLB_REMOTE_FLUSH); on_each_cpu(do_flush_tlb_all, NULL, 1); }

Contributors

PersonTokensPropCommitsCommitProp
Glauber de Oliveira Costa1676.19%133.33%
Dave Hansen419.05%133.33%
Mel Gorman14.76%133.33%
Total21100.00%3100.00%


static void do_kernel_range_flush(void *info) { struct flush_tlb_info *f = info; unsigned long addr; /* flush range by one by one 'invlpg' */ for (addr = f->flush_start; addr < f->flush_end; addr += PAGE_SIZE) __flush_tlb_single(addr); }

Contributors

PersonTokensPropCommitsCommitProp
Alex Shi45100.00%1100.00%
Total45100.00%1100.00%


void flush_tlb_kernel_range(unsigned long start, unsigned long end) { /* Balance as user space task's flush, a bit conservative */ if (end == TLB_FLUSH_ALL || (end - start) > tlb_single_page_flush_ceiling * PAGE_SIZE) { on_each_cpu(do_flush_tlb_all, NULL, 1); } else { struct flush_tlb_info info; info.flush_start = start; info.flush_end = end; on_each_cpu(do_kernel_range_flush, &info, 1); } }

Contributors

PersonTokensPropCommitsCommitProp
Alex Shi6187.14%150.00%
Dave Hansen912.86%150.00%
Total70100.00%2100.00%


static ssize_t tlbflush_read_file(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) { char buf[32]; unsigned int len; len = sprintf(buf, "%ld\n", tlb_single_page_flush_ceiling); return simple_read_from_buffer(user_buf, count, ppos, buf, len); }

Contributors

PersonTokensPropCommitsCommitProp
Dave Hansen58100.00%1100.00%
Total58100.00%1100.00%


static ssize_t tlbflush_write_file(struct file *file, const char __user *user_buf, size_t count, loff_t *ppos) { char buf[32]; ssize_t len; int ceiling; len = min(count, sizeof(buf) - 1); if (copy_from_user(buf, user_buf, len)) return -EFAULT; buf[len] = '\0'; if (kstrtoint(buf, 0, &ceiling)) return -EINVAL; if (ceiling < 0) return -EINVAL; tlb_single_page_flush_ceiling = ceiling; return count; }

Contributors

PersonTokensPropCommitsCommitProp
Dave Hansen105100.00%1100.00%
Total105100.00%1100.00%

static const struct file_operations fops_tlbflush = { .read = tlbflush_read_file, .write = tlbflush_write_file, .llseek = default_llseek, };
static int __init create_tlb_single_page_flush_ceiling(void) { debugfs_create_file("tlb_single_page_flush_ceiling", S_IRUSR | S_IWUSR, arch_debugfs_dir, NULL, &fops_tlbflush); return 0; }

Contributors

PersonTokensPropCommitsCommitProp
Dave Hansen28100.00%1100.00%
Total28100.00%1100.00%

late_initcall(create_tlb_single_page_flush_ceiling); #endif /* CONFIG_SMP */

Overall Contributors

PersonTokensPropCommitsCommitProp
Dave Hansen35823.65%920.00%
Andrew Lutomirski33922.39%511.11%
Alex Shi33021.80%715.56%
Glauber de Oliveira Costa29319.35%24.44%
Rusty Russell573.76%24.44%
Nadav Amit342.25%24.44%
Suresh B. Siddha251.65%12.22%
Mel Gorman231.52%36.67%
Tejun Heo181.19%24.44%
Brian Gerst110.73%12.22%
David Shaohua Li70.46%12.22%
Tomoki Sekiyama50.33%12.22%
Linus Torvalds30.20%24.44%
Jan Beulich30.20%12.22%
Borislav Petkov20.13%12.22%
Jeremiah Mahler20.13%12.22%
JoonSoo Kim10.07%12.22%
Mike Travis10.07%12.22%
Xiao Guangrong10.07%12.22%
Paul Gortmaker10.07%12.22%
Total1514100.00%45100.00%
Directory: arch/x86/mm
Information contained on this website is for historical information purposes only and does not indicate or represent copyright ownership.
Created with cregit.