cregit-Linux how code gets into the kernel

Release 4.14 arch/x86/kernel/setup_percpu.c

Directory: arch/x86/kernel
// SPDX-License-Identifier: GPL-2.0

#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

#include <linux/kernel.h>
#include <linux/export.h>
#include <linux/init.h>
#include <linux/bootmem.h>
#include <linux/percpu.h>
#include <linux/kexec.h>
#include <linux/crash_dump.h>
#include <linux/smp.h>
#include <linux/topology.h>
#include <linux/pfn.h>
#include <asm/sections.h>
#include <asm/processor.h>
#include <asm/desc.h>
#include <asm/setup.h>
#include <asm/mpspec.h>
#include <asm/apicdef.h>
#include <asm/highmem.h>
#include <asm/proto.h>
#include <asm/cpumask.h>
#include <asm/cpu.h>
#include <asm/stackprotector.h>

DEFINE_PER_CPU_READ_MOSTLY(int, cpu_number);

EXPORT_PER_CPU_SYMBOL(cpu_number);

#ifdef CONFIG_X86_64

#define BOOT_PERCPU_OFFSET ((unsigned long)__per_cpu_load)
#else

#define BOOT_PERCPU_OFFSET 0
#endif

DEFINE_PER_CPU_READ_MOSTLY(unsigned long, this_cpu_off) = BOOT_PERCPU_OFFSET;

EXPORT_PER_CPU_SYMBOL(this_cpu_off);


unsigned long __per_cpu_offset[NR_CPUS] __ro_after_init = {
	[0 ... NR_CPUS-1] = BOOT_PERCPU_OFFSET,
};

EXPORT_SYMBOL(__per_cpu_offset);

/*
 * On x86_64 symbols referenced from code should be reachable using
 * 32bit relocations.  Reserve space for static percpu variables in
 * modules so that they are always served from the first chunk which
 * is located at the percpu segment base.  On x86_32, anything can
 * address anywhere.  No need to reserve space in the first chunk.
 */
#ifdef CONFIG_X86_64

#define PERCPU_FIRST_CHUNK_RESERVE	PERCPU_MODULE_RESERVE
#else

#define PERCPU_FIRST_CHUNK_RESERVE	0
#endif

#ifdef CONFIG_X86_32
/**
 * pcpu_need_numa - determine percpu allocation needs to consider NUMA
 *
 * If NUMA is not configured or there is only one NUMA node available,
 * there is no reason to consider NUMA.  This function determines
 * whether percpu allocation should consider NUMA or not.
 *
 * RETURNS:
 * true if NUMA should be considered; otherwise, false.
 */

static bool __init pcpu_need_numa(void) { #ifdef CONFIG_NEED_MULTIPLE_NODES pg_data_t *last = NULL; unsigned int cpu; for_each_possible_cpu(cpu) { int node = early_cpu_to_node(cpu); if (node_online(node) && NODE_DATA(node) && last && last != NODE_DATA(node)) return true; last = NODE_DATA(node); } #endif return false; }

Contributors

PersonTokensPropCommitsCommitProp
Tejun Heo72100.00%1100.00%
Total72100.00%1100.00%

#endif /** * pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu * @cpu: cpu to allocate for * @size: size allocation in bytes * @align: alignment * * Allocate @size bytes aligned at @align for cpu @cpu. This wrapper * does the right thing for NUMA regardless of the current * configuration. * * RETURNS: * Pointer to the allocated area on success, NULL on failure. */
static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size, unsigned long align) { const unsigned long goal = __pa(MAX_DMA_ADDRESS); #ifdef CONFIG_NEED_MULTIPLE_NODES int node = early_cpu_to_node(cpu); void *ptr; if (!node_online(node) || !NODE_DATA(node)) { ptr = __alloc_bootmem_nopanic(size, align, goal); pr_info("cpu %d has no node %d or node-local memory\n", cpu, node); pr_debug("per cpu data for cpu%d %lu bytes at %016lx\n", cpu, size, __pa(ptr)); } else { ptr = __alloc_bootmem_node_nopanic(NODE_DATA(node), size, align, goal); pr_debug("per cpu data for cpu%d %lu bytes on node%d at %016lx\n", cpu, size, node, __pa(ptr)); } return ptr; #else return __alloc_bootmem_nopanic(size, align, goal); #endif }

Contributors

PersonTokensPropCommitsCommitProp
Tejun Heo14699.32%150.00%
Joe Perches10.68%150.00%
Total147100.00%2100.00%

/* * Helpers for first chunk memory allocation */
static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align) { return pcpu_alloc_bootmem(cpu, size, align); }

Contributors

PersonTokensPropCommitsCommitProp
Tejun Heo28100.00%2100.00%
Total28100.00%2100.00%


static void __init pcpu_fc_free(void *ptr, size_t size) { free_bootmem(__pa(ptr), size); }

Contributors

PersonTokensPropCommitsCommitProp
Tejun Heo24100.00%1100.00%
Total24100.00%1100.00%


static int __init pcpu_cpu_distance(unsigned int from, unsigned int to) { #ifdef CONFIG_NEED_MULTIPLE_NODES if (early_cpu_to_node(from) == early_cpu_to_node(to)) return LOCAL_DISTANCE; else return REMOTE_DISTANCE; #else return LOCAL_DISTANCE; #endif }

Contributors

PersonTokensPropCommitsCommitProp
Tejun Heo44100.00%4100.00%
Total44100.00%4100.00%


static void __init pcpup_populate_pte(unsigned long addr) { populate_extra_pte(addr); }

Contributors

PersonTokensPropCommitsCommitProp
Tejun Heo16100.00%2100.00%
Total16100.00%2100.00%


static inline void setup_percpu_segment(int cpu) { #ifdef CONFIG_X86_32 struct desc_struct d = GDT_ENTRY_INIT(0x8092, per_cpu_offset(cpu), 0xFFFFF); write_gdt_entry(get_cpu_gdt_rw(cpu), GDT_ENTRY_PERCPU, &d, DESCTYPE_S); #endif }

Contributors

PersonTokensPropCommitsCommitProp
Tejun Heo2554.35%120.00%
Glauber de Oliveira Costa715.22%120.00%
Yinghai Lu715.22%120.00%
Thomas Gleixner613.04%120.00%
Thomas Garnier12.17%120.00%
Total46100.00%5100.00%


void __init setup_per_cpu_areas(void) { unsigned int cpu; unsigned long delta; int rc; pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%u nr_node_ids:%d\n", NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids); /* * Allocate percpu area. Embedding allocator is our favorite; * however, on NUMA configurations, it can result in very * sparse unit mapping and vmalloc area isn't spacious enough * on 32bit. Use page in that case. */ #ifdef CONFIG_X86_32 if (pcpu_chosen_fc == PCPU_FC_AUTO && pcpu_need_numa()) pcpu_chosen_fc = PCPU_FC_PAGE; #endif rc = -EINVAL; if (pcpu_chosen_fc != PCPU_FC_PAGE) { const size_t dyn_size = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE - PERCPU_FIRST_CHUNK_RESERVE; size_t atom_size; /* * On 64bit, use PMD_SIZE for atom_size so that embedded * percpu areas are aligned to PMD. This, in the future, * can also allow using PMD mappings in vmalloc area. Use * PAGE_SIZE on 32bit as vmalloc space is highly contended * and large vmalloc area allocs can easily fail. */ #ifdef CONFIG_X86_64 atom_size = PMD_SIZE; #else atom_size = PAGE_SIZE; #endif rc = pcpu_embed_first_chunk(PERCPU_FIRST_CHUNK_RESERVE, dyn_size, atom_size, pcpu_cpu_distance, pcpu_fc_alloc, pcpu_fc_free); if (rc < 0) pr_warning("%s allocator failed (%d), falling back to page size\n", pcpu_fc_names[pcpu_chosen_fc], rc); } if (rc < 0) rc = pcpu_page_first_chunk(PERCPU_FIRST_CHUNK_RESERVE, pcpu_fc_alloc, pcpu_fc_free, pcpup_populate_pte); if (rc < 0) panic("cannot initialize percpu area (err=%d)", rc); /* alrighty, percpu areas up and running */ delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; for_each_possible_cpu(cpu) { per_cpu_offset(cpu) = delta + pcpu_unit_offsets[cpu]; per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu); per_cpu(cpu_number, cpu) = cpu; setup_percpu_segment(cpu); setup_stack_canary_segment(cpu); /* * Copy data used in early init routines from the * initial arrays to the per cpu data areas. These * arrays then become expendable and the *_early_ptr's * are zeroed indicating that the static arrays are * gone. */ #ifdef CONFIG_X86_LOCAL_APIC per_cpu(x86_cpu_to_apicid, cpu) = early_per_cpu_map(x86_cpu_to_apicid, cpu); per_cpu(x86_bios_cpu_apicid, cpu) = early_per_cpu_map(x86_bios_cpu_apicid, cpu); per_cpu(x86_cpu_to_acpiid, cpu) = early_per_cpu_map(x86_cpu_to_acpiid, cpu); #endif #ifdef CONFIG_X86_32 per_cpu(x86_cpu_to_logical_apicid, cpu) = early_per_cpu_map(x86_cpu_to_logical_apicid, cpu); #endif #ifdef CONFIG_X86_64 per_cpu(irq_stack_ptr, cpu) = per_cpu(irq_stack_union.irq_stack, cpu) + IRQ_STACK_SIZE; #endif #ifdef CONFIG_NUMA per_cpu(x86_cpu_to_node_map, cpu) = early_per_cpu_map(x86_cpu_to_node_map, cpu); /* * Ensure that the boot cpu numa_node is correct when the boot * cpu is on a node that doesn't have memory installed. * Also cpu_up() will call cpu_to_node() for APs when * MEMORY_HOTPLUG is defined, before per_cpu(numa_node) is set * up later with c_init aka intel_init/amd_init. * So set them all (boot cpu and all APs). */ set_cpu_numa_node(cpu, early_cpu_to_node(cpu)); #endif /* * Up to this point, the boot CPU has been using .init.data * area. Reload any changed state for the boot CPU. */ if (!cpu) switch_to_new_gdt(cpu); } /* indicate the early static arrays will soon be gone */ #ifdef CONFIG_X86_LOCAL_APIC early_per_cpu_ptr(x86_cpu_to_apicid) = NULL; early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL; early_per_cpu_ptr(x86_cpu_to_acpiid) = NULL; #endif #ifdef CONFIG_X86_32 early_per_cpu_ptr(x86_cpu_to_logical_apicid) = NULL; #endif #ifdef CONFIG_NUMA early_per_cpu_ptr(x86_cpu_to_node_map) = NULL; #endif /* Setup node to cpumask map */ setup_node_to_cpumask_map(); /* Setup cpu initialized, callin, callout masks */ setup_cpu_local_masks(); #ifdef CONFIG_X86_32 /* * Sync back kernel address range again. We already did this in * setup_arch(), but percpu data also needs to be available in * the smpboot asm. We can't reliably pick up percpu mappings * using vmalloc_fault(), because exception dispatch needs * percpu data. */ clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY, swapper_pg_dir + KERNEL_PGD_BOUNDARY, KERNEL_PGD_PTRS); /* * sync back low identity map too. It is used for example * in the 32-bit EFI stub. */ clone_pgd_range(initial_page_table, swapper_pg_dir + KERNEL_PGD_BOUNDARY, min(KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY)); #endif }

Contributors

PersonTokensPropCommitsCommitProp
Tejun Heo23351.78%1744.74%
Brian Gerst12527.78%821.05%
Andrew Lutomirski368.00%25.26%
Vitaly Kuznetsov214.67%12.63%
Yinghai Lu102.22%12.63%
Mike Travis81.78%25.26%
Glauber de Oliveira Costa71.56%12.63%
James Bottomley51.11%12.63%
Linus Torvalds10.22%12.63%
Denys Vlasenko10.22%12.63%
Joe Perches10.22%12.63%
Robert Richter10.22%12.63%
Alexey Dobriyan10.22%12.63%
Total450100.00%38100.00%


Overall Contributors

PersonTokensPropCommitsCommitProp
Tejun Heo63564.27%2236.67%
Brian Gerst17017.21%1016.67%
Glauber de Oliveira Costa484.86%11.67%
Andrew Lutomirski363.64%23.33%
Vitaly Kuznetsov212.13%11.67%
Yinghai Lu202.02%35.00%
Mike Travis101.01%35.00%
Joe Perches90.91%11.67%
Thomas Gleixner60.61%11.67%
Bernhard Walle60.61%11.67%
James Bottomley50.51%11.67%
Jaswinder Singh Rajput50.51%23.33%
Alexey Y. Starikovskiy40.40%23.33%
Paul Gortmaker40.40%11.67%
Jan Beulich10.10%11.67%
Vlad Zolotarov10.10%11.67%
Greg Kroah-Hartman10.10%11.67%
Kees Cook10.10%11.67%
Robert Richter10.10%11.67%
Thomas Garnier10.10%11.67%
Linus Torvalds10.10%11.67%
Denys Vlasenko10.10%11.67%
Alexey Dobriyan10.10%11.67%
Total988100.00%60100.00%
Directory: arch/x86/kernel
Information contained on this website is for historical information purposes only and does not indicate or represent copyright ownership.
Created with cregit.