Release 4.14 arch/x86/kernel/setup_percpu.c
// SPDX-License-Identifier: GPL-2.0
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/kernel.h>
#include <linux/export.h>
#include <linux/init.h>
#include <linux/bootmem.h>
#include <linux/percpu.h>
#include <linux/kexec.h>
#include <linux/crash_dump.h>
#include <linux/smp.h>
#include <linux/topology.h>
#include <linux/pfn.h>
#include <asm/sections.h>
#include <asm/processor.h>
#include <asm/desc.h>
#include <asm/setup.h>
#include <asm/mpspec.h>
#include <asm/apicdef.h>
#include <asm/highmem.h>
#include <asm/proto.h>
#include <asm/cpumask.h>
#include <asm/cpu.h>
#include <asm/stackprotector.h>
DEFINE_PER_CPU_READ_MOSTLY(int, cpu_number);
EXPORT_PER_CPU_SYMBOL(cpu_number);
#ifdef CONFIG_X86_64
#define BOOT_PERCPU_OFFSET ((unsigned long)__per_cpu_load)
#else
#define BOOT_PERCPU_OFFSET 0
#endif
DEFINE_PER_CPU_READ_MOSTLY(unsigned long, this_cpu_off) = BOOT_PERCPU_OFFSET;
EXPORT_PER_CPU_SYMBOL(this_cpu_off);
unsigned long __per_cpu_offset[NR_CPUS] __ro_after_init = {
[0 ... NR_CPUS-1] = BOOT_PERCPU_OFFSET,
};
EXPORT_SYMBOL(__per_cpu_offset);
/*
* On x86_64 symbols referenced from code should be reachable using
* 32bit relocations. Reserve space for static percpu variables in
* modules so that they are always served from the first chunk which
* is located at the percpu segment base. On x86_32, anything can
* address anywhere. No need to reserve space in the first chunk.
*/
#ifdef CONFIG_X86_64
#define PERCPU_FIRST_CHUNK_RESERVE PERCPU_MODULE_RESERVE
#else
#define PERCPU_FIRST_CHUNK_RESERVE 0
#endif
#ifdef CONFIG_X86_32
/**
* pcpu_need_numa - determine percpu allocation needs to consider NUMA
*
* If NUMA is not configured or there is only one NUMA node available,
* there is no reason to consider NUMA. This function determines
* whether percpu allocation should consider NUMA or not.
*
* RETURNS:
* true if NUMA should be considered; otherwise, false.
*/
static bool __init pcpu_need_numa(void)
{
#ifdef CONFIG_NEED_MULTIPLE_NODES
pg_data_t *last = NULL;
unsigned int cpu;
for_each_possible_cpu(cpu) {
int node = early_cpu_to_node(cpu);
if (node_online(node) && NODE_DATA(node) &&
last && last != NODE_DATA(node))
return true;
last = NODE_DATA(node);
}
#endif
return false;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Tejun Heo | 72 | 100.00% | 1 | 100.00% |
Total | 72 | 100.00% | 1 | 100.00% |
#endif
/**
* pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu
* @cpu: cpu to allocate for
* @size: size allocation in bytes
* @align: alignment
*
* Allocate @size bytes aligned at @align for cpu @cpu. This wrapper
* does the right thing for NUMA regardless of the current
* configuration.
*
* RETURNS:
* Pointer to the allocated area on success, NULL on failure.
*/
static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size,
unsigned long align)
{
const unsigned long goal = __pa(MAX_DMA_ADDRESS);
#ifdef CONFIG_NEED_MULTIPLE_NODES
int node = early_cpu_to_node(cpu);
void *ptr;
if (!node_online(node) || !NODE_DATA(node)) {
ptr = __alloc_bootmem_nopanic(size, align, goal);
pr_info("cpu %d has no node %d or node-local memory\n",
cpu, node);
pr_debug("per cpu data for cpu%d %lu bytes at %016lx\n",
cpu, size, __pa(ptr));
} else {
ptr = __alloc_bootmem_node_nopanic(NODE_DATA(node),
size, align, goal);
pr_debug("per cpu data for cpu%d %lu bytes on node%d at %016lx\n",
cpu, size, node, __pa(ptr));
}
return ptr;
#else
return __alloc_bootmem_nopanic(size, align, goal);
#endif
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Tejun Heo | 146 | 99.32% | 1 | 50.00% |
Joe Perches | 1 | 0.68% | 1 | 50.00% |
Total | 147 | 100.00% | 2 | 100.00% |
/*
* Helpers for first chunk memory allocation
*/
static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align)
{
return pcpu_alloc_bootmem(cpu, size, align);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Tejun Heo | 28 | 100.00% | 2 | 100.00% |
Total | 28 | 100.00% | 2 | 100.00% |
static void __init pcpu_fc_free(void *ptr, size_t size)
{
free_bootmem(__pa(ptr), size);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Tejun Heo | 24 | 100.00% | 1 | 100.00% |
Total | 24 | 100.00% | 1 | 100.00% |
static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
{
#ifdef CONFIG_NEED_MULTIPLE_NODES
if (early_cpu_to_node(from) == early_cpu_to_node(to))
return LOCAL_DISTANCE;
else
return REMOTE_DISTANCE;
#else
return LOCAL_DISTANCE;
#endif
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Tejun Heo | 44 | 100.00% | 4 | 100.00% |
Total | 44 | 100.00% | 4 | 100.00% |
static void __init pcpup_populate_pte(unsigned long addr)
{
populate_extra_pte(addr);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Tejun Heo | 16 | 100.00% | 2 | 100.00% |
Total | 16 | 100.00% | 2 | 100.00% |
static inline void setup_percpu_segment(int cpu)
{
#ifdef CONFIG_X86_32
struct desc_struct d = GDT_ENTRY_INIT(0x8092, per_cpu_offset(cpu),
0xFFFFF);
write_gdt_entry(get_cpu_gdt_rw(cpu), GDT_ENTRY_PERCPU, &d, DESCTYPE_S);
#endif
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Tejun Heo | 25 | 54.35% | 1 | 20.00% |
Glauber de Oliveira Costa | 7 | 15.22% | 1 | 20.00% |
Yinghai Lu | 7 | 15.22% | 1 | 20.00% |
Thomas Gleixner | 6 | 13.04% | 1 | 20.00% |
Thomas Garnier | 1 | 2.17% | 1 | 20.00% |
Total | 46 | 100.00% | 5 | 100.00% |
void __init setup_per_cpu_areas(void)
{
unsigned int cpu;
unsigned long delta;
int rc;
pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%u nr_node_ids:%d\n",
NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids);
/*
* Allocate percpu area. Embedding allocator is our favorite;
* however, on NUMA configurations, it can result in very
* sparse unit mapping and vmalloc area isn't spacious enough
* on 32bit. Use page in that case.
*/
#ifdef CONFIG_X86_32
if (pcpu_chosen_fc == PCPU_FC_AUTO && pcpu_need_numa())
pcpu_chosen_fc = PCPU_FC_PAGE;
#endif
rc = -EINVAL;
if (pcpu_chosen_fc != PCPU_FC_PAGE) {
const size_t dyn_size = PERCPU_MODULE_RESERVE +
PERCPU_DYNAMIC_RESERVE - PERCPU_FIRST_CHUNK_RESERVE;
size_t atom_size;
/*
* On 64bit, use PMD_SIZE for atom_size so that embedded
* percpu areas are aligned to PMD. This, in the future,
* can also allow using PMD mappings in vmalloc area. Use
* PAGE_SIZE on 32bit as vmalloc space is highly contended
* and large vmalloc area allocs can easily fail.
*/
#ifdef CONFIG_X86_64
atom_size = PMD_SIZE;
#else
atom_size = PAGE_SIZE;
#endif
rc = pcpu_embed_first_chunk(PERCPU_FIRST_CHUNK_RESERVE,
dyn_size, atom_size,
pcpu_cpu_distance,
pcpu_fc_alloc, pcpu_fc_free);
if (rc < 0)
pr_warning("%s allocator failed (%d), falling back to page size\n",
pcpu_fc_names[pcpu_chosen_fc], rc);
}
if (rc < 0)
rc = pcpu_page_first_chunk(PERCPU_FIRST_CHUNK_RESERVE,
pcpu_fc_alloc, pcpu_fc_free,
pcpup_populate_pte);
if (rc < 0)
panic("cannot initialize percpu area (err=%d)", rc);
/* alrighty, percpu areas up and running */
delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
for_each_possible_cpu(cpu) {
per_cpu_offset(cpu) = delta + pcpu_unit_offsets[cpu];
per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu);
per_cpu(cpu_number, cpu) = cpu;
setup_percpu_segment(cpu);
setup_stack_canary_segment(cpu);
/*
* Copy data used in early init routines from the
* initial arrays to the per cpu data areas. These
* arrays then become expendable and the *_early_ptr's
* are zeroed indicating that the static arrays are
* gone.
*/
#ifdef CONFIG_X86_LOCAL_APIC
per_cpu(x86_cpu_to_apicid, cpu) =
early_per_cpu_map(x86_cpu_to_apicid, cpu);
per_cpu(x86_bios_cpu_apicid, cpu) =
early_per_cpu_map(x86_bios_cpu_apicid, cpu);
per_cpu(x86_cpu_to_acpiid, cpu) =
early_per_cpu_map(x86_cpu_to_acpiid, cpu);
#endif
#ifdef CONFIG_X86_32
per_cpu(x86_cpu_to_logical_apicid, cpu) =
early_per_cpu_map(x86_cpu_to_logical_apicid, cpu);
#endif
#ifdef CONFIG_X86_64
per_cpu(irq_stack_ptr, cpu) =
per_cpu(irq_stack_union.irq_stack, cpu) +
IRQ_STACK_SIZE;
#endif
#ifdef CONFIG_NUMA
per_cpu(x86_cpu_to_node_map, cpu) =
early_per_cpu_map(x86_cpu_to_node_map, cpu);
/*
* Ensure that the boot cpu numa_node is correct when the boot
* cpu is on a node that doesn't have memory installed.
* Also cpu_up() will call cpu_to_node() for APs when
* MEMORY_HOTPLUG is defined, before per_cpu(numa_node) is set
* up later with c_init aka intel_init/amd_init.
* So set them all (boot cpu and all APs).
*/
set_cpu_numa_node(cpu, early_cpu_to_node(cpu));
#endif
/*
* Up to this point, the boot CPU has been using .init.data
* area. Reload any changed state for the boot CPU.
*/
if (!cpu)
switch_to_new_gdt(cpu);
}
/* indicate the early static arrays will soon be gone */
#ifdef CONFIG_X86_LOCAL_APIC
early_per_cpu_ptr(x86_cpu_to_apicid) = NULL;
early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL;
early_per_cpu_ptr(x86_cpu_to_acpiid) = NULL;
#endif
#ifdef CONFIG_X86_32
early_per_cpu_ptr(x86_cpu_to_logical_apicid) = NULL;
#endif
#ifdef CONFIG_NUMA
early_per_cpu_ptr(x86_cpu_to_node_map) = NULL;
#endif
/* Setup node to cpumask map */
setup_node_to_cpumask_map();
/* Setup cpu initialized, callin, callout masks */
setup_cpu_local_masks();
#ifdef CONFIG_X86_32
/*
* Sync back kernel address range again. We already did this in
* setup_arch(), but percpu data also needs to be available in
* the smpboot asm. We can't reliably pick up percpu mappings
* using vmalloc_fault(), because exception dispatch needs
* percpu data.
*/
clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY,
swapper_pg_dir + KERNEL_PGD_BOUNDARY,
KERNEL_PGD_PTRS);
/*
* sync back low identity map too. It is used for example
* in the 32-bit EFI stub.
*/
clone_pgd_range(initial_page_table,
swapper_pg_dir + KERNEL_PGD_BOUNDARY,
min(KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY));
#endif
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Tejun Heo | 233 | 51.78% | 17 | 44.74% |
Brian Gerst | 125 | 27.78% | 8 | 21.05% |
Andrew Lutomirski | 36 | 8.00% | 2 | 5.26% |
Vitaly Kuznetsov | 21 | 4.67% | 1 | 2.63% |
Yinghai Lu | 10 | 2.22% | 1 | 2.63% |
Mike Travis | 8 | 1.78% | 2 | 5.26% |
Glauber de Oliveira Costa | 7 | 1.56% | 1 | 2.63% |
James Bottomley | 5 | 1.11% | 1 | 2.63% |
Linus Torvalds | 1 | 0.22% | 1 | 2.63% |
Denys Vlasenko | 1 | 0.22% | 1 | 2.63% |
Joe Perches | 1 | 0.22% | 1 | 2.63% |
Robert Richter | 1 | 0.22% | 1 | 2.63% |
Alexey Dobriyan | 1 | 0.22% | 1 | 2.63% |
Total | 450 | 100.00% | 38 | 100.00% |
Overall Contributors
Person | Tokens | Prop | Commits | CommitProp |
Tejun Heo | 635 | 64.27% | 22 | 36.67% |
Brian Gerst | 170 | 17.21% | 10 | 16.67% |
Glauber de Oliveira Costa | 48 | 4.86% | 1 | 1.67% |
Andrew Lutomirski | 36 | 3.64% | 2 | 3.33% |
Vitaly Kuznetsov | 21 | 2.13% | 1 | 1.67% |
Yinghai Lu | 20 | 2.02% | 3 | 5.00% |
Mike Travis | 10 | 1.01% | 3 | 5.00% |
Joe Perches | 9 | 0.91% | 1 | 1.67% |
Thomas Gleixner | 6 | 0.61% | 1 | 1.67% |
Bernhard Walle | 6 | 0.61% | 1 | 1.67% |
James Bottomley | 5 | 0.51% | 1 | 1.67% |
Jaswinder Singh Rajput | 5 | 0.51% | 2 | 3.33% |
Alexey Y. Starikovskiy | 4 | 0.40% | 2 | 3.33% |
Paul Gortmaker | 4 | 0.40% | 1 | 1.67% |
Jan Beulich | 1 | 0.10% | 1 | 1.67% |
Vlad Zolotarov | 1 | 0.10% | 1 | 1.67% |
Greg Kroah-Hartman | 1 | 0.10% | 1 | 1.67% |
Kees Cook | 1 | 0.10% | 1 | 1.67% |
Robert Richter | 1 | 0.10% | 1 | 1.67% |
Thomas Garnier | 1 | 0.10% | 1 | 1.67% |
Linus Torvalds | 1 | 0.10% | 1 | 1.67% |
Denys Vlasenko | 1 | 0.10% | 1 | 1.67% |
Alexey Dobriyan | 1 | 0.10% | 1 | 1.67% |
Total | 988 | 100.00% | 60 | 100.00% |
Information contained on this website is for historical information purposes only and does not indicate or represent copyright ownership.