Release 4.11 arch/x86/mm/tlb.c

Directory: arch/x86/mm
#include <linux/init.h>

#include <linux/mm.h>
#include <linux/spinlock.h>
#include <linux/smp.h>
#include <linux/interrupt.h>
#include <linux/export.h>
#include <linux/cpu.h>

#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
#include <asm/cache.h>
#include <asm/apic.h>
#include <asm/uv/uv.h>
#include <linux/debugfs.h>

/*
 *      Smarter SMP flushing macros.
 *              c/o Linus Torvalds.
 *
 *      These mean you can really definitely utterly forget about
 *      writing to user space from interrupts. (Its not allowed anyway).
 *
 *      Optimizations Manfred Spraul <manfred@colorfullife.com>
 *
 *      More scalable flush, from Andi Kleen
 *
 *      Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi
 */

#ifdef CONFIG_SMP


struct flush_tlb_info {
	
struct mm_struct *flush_mm;
	
unsigned long flush_start;
	
unsigned long flush_end;
};

/*
 * We cannot call mmdrop() because we are in interrupt context,
 * instead update mm->cpu_vm_mask.
 */


void leave_mm(int cpu)
{
	struct mm_struct *active_mm = this_cpu_read(cpu_tlbstate.active_mm);
	if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
		BUG();
	if (cpumask_test_cpu(cpu, mm_cpumask(active_mm))) {
		cpumask_clear_cpu(cpu, mm_cpumask(active_mm));
		load_cr3(swapper_pg_dir);
		/*
                 * This gets called in the idle path where RCU
                 * functions differently.  Tracing normally
                 * uses RCU, so we have to call the tracepoint
                 * specially here.
                 */
		trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
	}
}
Contributors
Person Tokens Prop Commits CommitProp
Glauber de Oliveira Costa 30 42.25% 1 12.50%
Suresh B. Siddha 25 35.21% 1 12.50%
Dave Hansen 8 11.27% 2 25.00%
Brian Gerst 3 4.23% 1 12.50%
Rusty Russell 3 4.23% 1 12.50%
Linus Torvalds 1 1.41% 1 12.50%
Alex Shi 1 1.41% 1 12.50%
Total 71 100.00% 8 100.00%


EXPORT_SYMBOL_GPL(leave_mm);

#endif /* CONFIG_SMP */



void switch_mm(struct mm_struct *prev, struct mm_struct *next,
	       struct task_struct *tsk)
{
	unsigned long flags;

	local_irq_save(flags);
	switch_mm_irqs_off(prev, next, tsk);
	local_irq_restore(flags);
}

Contributors
Person Tokens Prop Commits CommitProp
Andrew Lutomirski 43 100.00% 2 100.00%
Total 43 100.00% 2 100.00%



void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
			struct task_struct *tsk)
{
	unsigned cpu = smp_processor_id();

	if (likely(prev != next)) {
		if (IS_ENABLED(CONFIG_VMAP_STACK)) {
			/*
                         * If our current stack is in vmalloc space and isn't
                         * mapped in the new pgd, we'll double-fault.  Forcibly
                         * map it.
                         */
			unsigned int stack_pgd_index = pgd_index(current_stack_pointer());

			pgd_t *pgd = next->pgd + stack_pgd_index;

			if (unlikely(pgd_none(*pgd)))
				set_pgd(pgd, init_mm.pgd[stack_pgd_index]);
		}

#ifdef CONFIG_SMP
		this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
		this_cpu_write(cpu_tlbstate.active_mm, next);
#endif

		cpumask_set_cpu(cpu, mm_cpumask(next));

		/*
                 * Re-load page tables.
                 *
                 * This logic has an ordering constraint:
                 *
                 *  CPU 0: Write to a PTE for 'next'
                 *  CPU 0: load bit 1 in mm_cpumask.  if nonzero, send IPI.
                 *  CPU 1: set bit 1 in next's mm_cpumask
                 *  CPU 1: load from the PTE that CPU 0 writes (implicit)
                 *
                 * We need to prevent an outcome in which CPU 1 observes
                 * the new PTE value and CPU 0 observes bit 1 clear in
                 * mm_cpumask.  (If that occurs, then the IPI will never
                 * be sent, and CPU 0's TLB will contain a stale entry.)
                 *
                 * The bad outcome can occur if either CPU's load is
                 * reordered before that CPU's store, so both CPUs must
                 * execute full barriers to prevent this from happening.
                 *
                 * Thus, switch_mm needs a full barrier between the
                 * store to mm_cpumask and any operation that could load
                 * from next->pgd.  TLB fills are special and can happen
                 * due to instruction fetches or for no reason at all,
                 * and neither LOCK nor MFENCE orders them.
                 * Fortunately, load_cr3() is serializing and gives the
                 * ordering guarantee we need.
                 *
                 */
		load_cr3(next->pgd);

		trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);

		/* Stop flush ipis for the previous mm */
		cpumask_clear_cpu(cpu, mm_cpumask(prev));

		/* Load per-mm CR4 state */
		load_mm_cr4(next);

#ifdef CONFIG_MODIFY_LDT_SYSCALL
		/*
                 * Load the LDT, if the LDT is different.
                 *
                 * It's possible that prev->context.ldt doesn't match
                 * the LDT register.  This can happen if leave_mm(prev)
                 * was called and then modify_ldt changed
                 * prev->context.ldt but suppressed an IPI to this CPU.
                 * In this case, prev->context.ldt != NULL, because we
                 * never set context.ldt to NULL while the mm still
                 * exists.  That means that next->context.ldt !=
                 * prev->context.ldt, because mms never share an LDT.
                 */
		if (unlikely(prev->context.ldt != next->context.ldt))
			load_mm_ldt(next);
#endif
	}
#ifdef CONFIG_SMP
	  else {
		this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
		BUG_ON(this_cpu_read(cpu_tlbstate.active_mm) != next);

		if (!cpumask_test_cpu(cpu, mm_cpumask(next))) {
			/*
                         * On established mms, the mm_cpumask is only changed
                         * from irq context, from ptep_clear_flush() while in
                         * lazy tlb mode, and here. Irqs are blocked during
                         * schedule, protecting us from simultaneous changes.
                         */
			cpumask_set_cpu(cpu, mm_cpumask(next));

			/*
                         * We were in lazy tlb mode and leave_mm disabled
                         * tlb flush IPI delivery. We must reload CR3
                         * to make sure to use no freed page tables.
                         *
                         * As above, load_cr3() is serializing and orders TLB
                         * fills with respect to the mm_cpumask write.
                         */
			load_cr3(next->pgd);
			trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
			load_mm_cr4(next);
			load_mm_ldt(next);
		}
	}
#endif
}

Contributors
Person Tokens Prop Commits CommitProp
Andrew Lutomirski 263 100.00% 3 100.00%
Total 263 100.00% 3 100.00%

#ifdef CONFIG_SMP

/*
 * The flush IPI assumes that a thread switch happens in this order:
 * [cpu0: the cpu that switches]
 * 1) switch_mm() either 1a) or 1b)
 * 1a) thread switch to a different mm
 * 1a1) set cpu_tlbstate to TLBSTATE_OK
 *      Now the tlb flush NMI handler flush_tlb_func won't call leave_mm
 *      if cpu0 was in lazy tlb mode.
 * 1a2) update cpu active_mm
 *      Now cpu0 accepts tlb flushes for the new mm.
 * 1a3) cpu_set(cpu, new_mm->cpu_vm_mask);
 *      Now the other cpus will send tlb flush ipis.
 * 1a4) change cr3.
 * 1a5) cpu_clear(cpu, old_mm->cpu_vm_mask);
 *      Stop ipi delivery for the old mm. This is not synchronized with
 *      the other cpus, but flush_tlb_func ignore flush ipis for the wrong
 *      mm, and in the worst case we perform a superfluous tlb flush.
 * 1b) thread switch without mm change
 *      cpu active_mm is correct, cpu0 already handles flush ipis.
 * 1b1) set cpu_tlbstate to TLBSTATE_OK
 * 1b2) test_and_set the cpu bit in cpu_vm_mask.
 *      Atomically set the bit [other cpus will start sending flush ipis],
 *      and test the bit.
 * 1b3) if the bit was 0: leave_mm was called, flush the tlb.
 * 2) switch %%esp, ie current
 *
 * The interrupt must handle 2 special cases:
 * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
 * - the cpu performs speculative tlb reads, i.e. even if the cpu only
 *   runs in kernel space, the cpu could load tlb entries for user space
 *   pages.
 *
 * The good news is that cpu_tlbstate is local to each cpu, no
 * write/read ordering problems.
 */

/*
 * TLB flush funcation:
 * 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
 * 2) Leave the mm if we are in the lazy tlb mode.
 */


static void flush_tlb_func(void *info)
{
	struct flush_tlb_info *f = info;

	inc_irq_stat(irq_tlb_count);

	if (f->flush_mm && f->flush_mm != this_cpu_read(cpu_tlbstate.active_mm))
		return;

	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
	if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) {
		if (f->flush_end == TLB_FLUSH_ALL) {
			local_flush_tlb();
			trace_tlb_flush(TLB_REMOTE_SHOOTDOWN, TLB_FLUSH_ALL);
		} else {
			unsigned long addr;
			unsigned long nr_pages =
				(f->flush_end - f->flush_start) / PAGE_SIZE;
			addr = f->flush_start;
			while (addr < f->flush_end) {
				__flush_tlb_single(addr);
				addr += PAGE_SIZE;
			}
			trace_tlb_flush(TLB_REMOTE_SHOOTDOWN, nr_pages);
		}
	} else
		leave_mm(smp_processor_id());

}

Contributors
Person Tokens Prop Commits CommitProp
Glauber de Oliveira Costa 44 31.43% 1 8.33%
Alex Shi 44 31.43% 3 25.00%
Dave Hansen 36 25.71% 3 25.00%
Tomoki Sekiyama 5 3.57% 1 8.33%
Brian Gerst 5 3.57% 1 8.33%
Nadav Amit 4 2.86% 1 8.33%
Mel Gorman 1 0.71% 1 8.33%
Tejun Heo 1 0.71% 1 8.33%
Total 140 100.00% 12 100.00%



void native_flush_tlb_others(const struct cpumask *cpumask,
				 struct mm_struct *mm, unsigned long start,
				 unsigned long end)
{
	struct flush_tlb_info info;

	if (end == 0)
		end = start + PAGE_SIZE;
	info.flush_mm = mm;
	info.flush_start = start;
	info.flush_end = end;

	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
	if (end == TLB_FLUSH_ALL)
		trace_tlb_flush(TLB_REMOTE_SEND_IPI, TLB_FLUSH_ALL);
	else
		trace_tlb_flush(TLB_REMOTE_SEND_IPI,
				(end - start) >> PAGE_SHIFT);

	if (is_uv_system()) {
		unsigned int cpu;

		cpu = smp_processor_id();
		cpumask = uv_flush_tlb_others(cpumask, mm, start, end, cpu);
		if (cpumask)
			smp_call_function_many(cpumask, flush_tlb_func,
								&info, 1);
		return;
	}
	smp_call_function_many(cpumask, flush_tlb_func, &info, 1);
}

Contributors
Person Tokens Prop Commits CommitProp
Alex Shi 34 22.67% 2 15.38%
Nadav Amit 30 20.00% 1 7.69%
Rusty Russell 26 17.33% 1 7.69%
Glauber de Oliveira Costa 23 15.33% 1 7.69%
Tejun Heo 15 10.00% 1 7.69%
Mel Gorman 10 6.67% 2 15.38%
David Shaohua Li 4 2.67% 1 7.69%
Dave Hansen 4 2.67% 1 7.69%
Linus Torvalds 2 1.33% 1 7.69%
Mike Travis 1 0.67% 1 7.69%
Xiao Guangrong 1 0.67% 1 7.69%
Total 150 100.00% 13 100.00%



void flush_tlb_current_task(void)
{
	struct mm_struct *mm = current->mm;

	preempt_disable();

	count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);

	/* This is an implicit full barrier that synchronizes with switch_mm. */
	local_flush_tlb();

	trace_tlb_flush(TLB_LOCAL_SHOOTDOWN, TLB_FLUSH_ALL);
	if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
		flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL);
	preempt_enable();
}

Contributors
Person Tokens Prop Commits CommitProp
Glauber de Oliveira Costa 38 56.72% 1 12.50%
Rusty Russell 14 20.90% 2 25.00%
Dave Hansen 11 16.42% 2 25.00%
Alex Shi 2 2.99% 1 12.50%
Mel Gorman 1 1.49% 1 12.50%
Andrew Lutomirski 1 1.49% 1 12.50%
Total 67 100.00% 8 100.00%

/*
 * See Documentation/x86/tlb.txt for details.  We choose 33
 * because it is large enough to cover the vast majority (at
 * least 95%) of allocations, and is small enough that we are
 * confident it will not cause too much overhead.  Each single
 * flush is about 100 ns, so this caps the maximum overhead at
 * _about_ 3,000 ns.
 *
 * This is in units of pages.
 */

static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;



void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
				unsigned long end, unsigned long vmflag)
{
	unsigned long addr;
	/* do a global flush by default */
	unsigned long base_pages_to_flush = TLB_FLUSH_ALL;

	preempt_disable();
	if (current->active_mm != mm) {
		/* Synchronize with switch_mm. */
		smp_mb();

		goto out;
	}

	if (!current->mm) {
		leave_mm(smp_processor_id());

		/* Synchronize with switch_mm. */
		smp_mb();

		goto out;
	}

	if ((end != TLB_FLUSH_ALL) && !(vmflag & VM_HUGETLB))
		base_pages_to_flush = (end - start) >> PAGE_SHIFT;

	/*
         * Both branches below are implicit full barriers (MOV to CR or
         * INVLPG) that synchronize with switch_mm.
         */
	if (base_pages_to_flush > tlb_single_page_flush_ceiling) {
		base_pages_to_flush = TLB_FLUSH_ALL;
		count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
		local_flush_tlb();
	} else {
		/* flush range by one by one 'invlpg' */
		for (addr = start; addr < end;	addr += PAGE_SIZE) {
			count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE);
			__flush_tlb_single(addr);
		}
	}
	trace_tlb_flush(TLB_LOCAL_MM_SHOOTDOWN, base_pages_to_flush);
out:
	if (base_pages_to_flush == TLB_FLUSH_ALL) {
		start = 0UL;
		end = TLB_FLUSH_ALL;
	}
	if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
		flush_tlb_others(mm_cpumask(mm), mm, start, end);
	preempt_enable();
}

Contributors
Person Tokens Prop Commits CommitProp
Alex Shi 125 60.10% 3 23.08%
Dave Hansen 56 26.92% 5 38.46%
Andrew Lutomirski 11 5.29% 1 7.69%
Mel Gorman 9 4.33% 2 15.38%
Glauber de Oliveira Costa 6 2.88% 1 7.69%
JoonSoo Kim 1 0.48% 1 7.69%
Total 208 100.00% 13 100.00%



void flush_tlb_page(struct vm_area_struct *vma, unsigned long start)
{
	struct mm_struct *mm = vma->vm_mm;

	preempt_disable();

	if (current->active_mm == mm) {
		if (current->mm) {
			/*
                         * Implicit full barrier (INVLPG) that synchronizes
                         * with switch_mm.
                         */
			__flush_tlb_one(start);
		} else {
			leave_mm(smp_processor_id());

			/* Synchronize with switch_mm. */
			smp_mb();
		}
	}

	if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
		flush_tlb_others(mm_cpumask(mm), mm, start, 0UL);

	preempt_enable();
}

Contributors
Person Tokens Prop Commits CommitProp
Glauber de Oliveira Costa 67 70.53% 1 20.00%
Rusty Russell 14 14.74% 2 40.00%
Andrew Lutomirski 9 9.47% 1 20.00%
Alex Shi 5 5.26% 1 20.00%
Total 95 100.00% 5 100.00%



static void do_flush_tlb_all(void *info)
{
	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
	__flush_tlb_all();
	if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY)
		leave_mm(smp_processor_id());
}

Contributors
Person Tokens Prop Commits CommitProp
Glauber de Oliveira Costa 24 68.57% 1 16.67%
Dave Hansen 4 11.43% 1 16.67%
Brian Gerst 3 8.57% 1 16.67%
Borislav Petkov 2 5.71% 1 16.67%
Alex Shi 1 2.86% 1 16.67%
Mel Gorman 1 2.86% 1 16.67%
Total 35 100.00% 6 100.00%



void flush_tlb_all(void)
{
	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
	on_each_cpu(do_flush_tlb_all, NULL, 1);
}

Contributors
Person Tokens Prop Commits CommitProp
Glauber de Oliveira Costa 16 76.19% 1 33.33%
Dave Hansen 4 19.05% 1 33.33%
Mel Gorman 1 4.76% 1 33.33%
Total 21 100.00% 3 100.00%



static void do_kernel_range_flush(void *info)
{
	struct flush_tlb_info *f = info;
	unsigned long addr;

	/* flush range by one by one 'invlpg' */
	for (addr = f->flush_start; addr < f->flush_end; addr += PAGE_SIZE)
		__flush_tlb_single(addr);
}

Contributors
Person Tokens Prop Commits CommitProp
Alex Shi 45 100.00% 1 100.00%
Total 45 100.00% 1 100.00%



void flush_tlb_kernel_range(unsigned long start, unsigned long end)
{

	/* Balance as user space task's flush, a bit conservative */
	if (end == TLB_FLUSH_ALL ||
	    (end - start) > tlb_single_page_flush_ceiling * PAGE_SIZE) {
		on_each_cpu(do_flush_tlb_all, NULL, 1);
	} else {
		struct flush_tlb_info info;
		info.flush_start = start;
		info.flush_end = end;
		on_each_cpu(do_kernel_range_flush, &info, 1);
	}
}

Contributors
Person Tokens Prop Commits CommitProp
Alex Shi 61 87.14% 1 50.00%
Dave Hansen 9 12.86% 1 50.00%
Total 70 100.00% 2 100.00%



static ssize_t tlbflush_read_file(struct file *file, char __user *user_buf,
			     size_t count, loff_t *ppos)
{
	char buf[32];
	unsigned int len;

	len = sprintf(buf, "%ld\n", tlb_single_page_flush_ceiling);
	return simple_read_from_buffer(user_buf, count, ppos, buf, len);
}

Contributors
Person Tokens Prop Commits CommitProp
Dave Hansen 58 100.00% 1 100.00%
Total 58 100.00% 1 100.00%



static ssize_t tlbflush_write_file(struct file *file,
		 const char __user *user_buf, size_t count, loff_t *ppos)
{
	char buf[32];
	ssize_t len;
	int ceiling;

	len = min(count, sizeof(buf) - 1);
	if (copy_from_user(buf, user_buf, len))
		return -EFAULT;

	buf[len] = '\0';
	if (kstrtoint(buf, 0, &ceiling))
		return -EINVAL;

	if (ceiling < 0)
		return -EINVAL;

	tlb_single_page_flush_ceiling = ceiling;
	return count;
}

Contributors
Person Tokens Prop Commits CommitProp
Dave Hansen 105 100.00% 1 100.00%
Total 105 100.00% 1 100.00%


static const struct file_operations fops_tlbflush = {
	.read = tlbflush_read_file,
	.write = tlbflush_write_file,
	.llseek = default_llseek,
};



static int __init create_tlb_single_page_flush_ceiling(void)
{
	debugfs_create_file("tlb_single_page_flush_ceiling", S_IRUSR | S_IWUSR,
			    arch_debugfs_dir, NULL, &fops_tlbflush);
	return 0;
}
Contributors
Person Tokens Prop Commits CommitProp
Dave Hansen 28 100.00% 1 100.00%
Total 28 100.00% 1 100.00%


late_initcall(create_tlb_single_page_flush_ceiling);

#endif /* CONFIG_SMP */
Overall Contributors
Person Tokens Prop Commits CommitProp
Dave Hansen 358 23.65% 9 20.00%
Andrew Lutomirski 339 22.39% 5 11.11%
Alex Shi 330 21.80% 7 15.56%
Glauber de Oliveira Costa 293 19.35% 2 4.44%
Rusty Russell 57 3.76% 2 4.44%
Nadav Amit 34 2.25% 2 4.44%
Suresh B. Siddha 25 1.65% 1 2.22%
Mel Gorman 23 1.52% 3 6.67%
Tejun Heo 18 1.19% 2 4.44%
Brian Gerst 11 0.73% 1 2.22%
David Shaohua Li 7 0.46% 1 2.22%
Tomoki Sekiyama 5 0.33% 1 2.22%
Linus Torvalds 3 0.20% 2 4.44%
Jan Beulich 3 0.20% 1 2.22%
Borislav Petkov 2 0.13% 1 2.22%
Jeremiah Mahler 2 0.13% 1 2.22%
JoonSoo Kim 1 0.07% 1 2.22%
Mike Travis 1 0.07% 1 2.22%
Xiao Guangrong 1 0.07% 1 2.22%
Paul Gortmaker 1 0.07% 1 2.22%
Total 1514 100.00% 45 100.00%
Directory: arch/x86/mm

Information contained on this website is for historical information purposes only and does not indicate or represent copyright ownership.
Created with cregit.
Person	Tokens	Prop	Commits	CommitProp
Glauber de Oliveira Costa	30	42.25%	1	12.50%
Suresh B. Siddha	25	35.21%	1	12.50%
Dave Hansen	8	11.27%	2	25.00%
Brian Gerst	3	4.23%	1	12.50%
Rusty Russell	3	4.23%	1	12.50%
Linus Torvalds	1	1.41%	1	12.50%
Alex Shi	1	1.41%	1	12.50%
Total	71	100.00%	8	100.00%
cregit-Linux how code gets into the kernel

Release 4.11 arch/x86/mm/tlb.c

Contributors

Contributors

Contributors

Contributors

Contributors

Contributors

Contributors

Contributors

Contributors

Contributors

Contributors

Contributors

Contributors

Contributors

Contributors

Overall Contributors