Contributors: 17
Author Tokens Token Proportion Commits Commit Proportion
Heiko Carstens 2099 81.64% 25 37.31%
Martin Schwidefsky 291 11.32% 17 25.37%
Jan Glauber 122 4.75% 3 4.48%
Sven Schnelle 13 0.51% 1 1.49%
Linus Torvalds (pre-git) 9 0.35% 6 8.96%
Alexander Gordeev 8 0.31% 1 1.49%
Gerald Schaefer 6 0.23% 2 2.99%
Claudio Imbrenda 6 0.23% 1 1.49%
Laura Abbott 3 0.12% 1 1.49%
Arnd Bergmann 3 0.12% 1 1.49%
Rick Edgecombe 2 0.08% 1 1.49%
Dominik Dingel 2 0.08% 1 1.49%
Mike Rapoport 2 0.08% 2 2.99%
Peter Xu 2 0.08% 2 2.99%
JoonSoo Kim 1 0.04% 1 1.49%
Greg Kroah-Hartman 1 0.04% 1 1.49%
Johannes Weiner 1 0.04% 1 1.49%
Total 2571 67


// SPDX-License-Identifier: GPL-2.0
/*
 * Copyright IBM Corp. 2011
 * Author(s): Jan Glauber <jang@linux.vnet.ibm.com>
 */
#include <linux/hugetlb.h>
#include <linux/proc_fs.h>
#include <linux/vmalloc.h>
#include <linux/mm.h>
#include <asm/cacheflush.h>
#include <asm/facility.h>
#include <asm/pgalloc.h>
#include <asm/kfence.h>
#include <asm/page.h>
#include <asm/set_memory.h>

static inline unsigned long sske_frame(unsigned long addr, unsigned char skey)
{
	asm volatile(".insn rrf,0xb22b0000,%[skey],%[addr],1,0"
		     : [addr] "+a" (addr) : [skey] "d" (skey));
	return addr;
}

void __storage_key_init_range(unsigned long start, unsigned long end)
{
	unsigned long boundary, size;

	while (start < end) {
		if (MACHINE_HAS_EDAT1) {
			/* set storage keys for a 1MB frame */
			size = 1UL << 20;
			boundary = (start + size) & ~(size - 1);
			if (boundary <= end) {
				do {
					start = sske_frame(start, PAGE_DEFAULT_KEY);
				} while (start < boundary);
				continue;
			}
		}
		page_set_storage_key(start, PAGE_DEFAULT_KEY, 1);
		start += PAGE_SIZE;
	}
}

#ifdef CONFIG_PROC_FS
atomic_long_t __bootdata_preserved(direct_pages_count[PG_DIRECT_MAP_MAX]);

void arch_report_meminfo(struct seq_file *m)
{
	seq_printf(m, "DirectMap4k:    %8lu kB\n",
		   atomic_long_read(&direct_pages_count[PG_DIRECT_MAP_4K]) << 2);
	seq_printf(m, "DirectMap1M:    %8lu kB\n",
		   atomic_long_read(&direct_pages_count[PG_DIRECT_MAP_1M]) << 10);
	seq_printf(m, "DirectMap2G:    %8lu kB\n",
		   atomic_long_read(&direct_pages_count[PG_DIRECT_MAP_2G]) << 21);
}
#endif /* CONFIG_PROC_FS */

static void pgt_set(unsigned long *old, unsigned long new, unsigned long addr,
		    unsigned long dtt)
{
	unsigned long *table, mask;

	mask = 0;
	if (MACHINE_HAS_EDAT2) {
		switch (dtt) {
		case CRDTE_DTT_REGION3:
			mask = ~(PTRS_PER_PUD * sizeof(pud_t) - 1);
			break;
		case CRDTE_DTT_SEGMENT:
			mask = ~(PTRS_PER_PMD * sizeof(pmd_t) - 1);
			break;
		case CRDTE_DTT_PAGE:
			mask = ~(PTRS_PER_PTE * sizeof(pte_t) - 1);
			break;
		}
		table = (unsigned long *)((unsigned long)old & mask);
		crdte(*old, new, table, dtt, addr, S390_lowcore.kernel_asce.val);
	} else if (MACHINE_HAS_IDTE) {
		cspg(old, *old, new);
	} else {
		csp((unsigned int *)old + 1, *old, new);
	}
}

static int walk_pte_level(pmd_t *pmdp, unsigned long addr, unsigned long end,
			  unsigned long flags)
{
	pte_t *ptep, new;

	if (flags == SET_MEMORY_4K)
		return 0;
	ptep = pte_offset_kernel(pmdp, addr);
	do {
		new = *ptep;
		if (pte_none(new))
			return -EINVAL;
		if (flags & SET_MEMORY_RO)
			new = pte_wrprotect(new);
		else if (flags & SET_MEMORY_RW)
			new = pte_mkwrite_novma(pte_mkdirty(new));
		if (flags & SET_MEMORY_NX)
			new = set_pte_bit(new, __pgprot(_PAGE_NOEXEC));
		else if (flags & SET_MEMORY_X)
			new = clear_pte_bit(new, __pgprot(_PAGE_NOEXEC));
		if (flags & SET_MEMORY_INV) {
			new = set_pte_bit(new, __pgprot(_PAGE_INVALID));
		} else if (flags & SET_MEMORY_DEF) {
			new = __pte(pte_val(new) & PAGE_MASK);
			new = set_pte_bit(new, PAGE_KERNEL);
			if (!MACHINE_HAS_NX)
				new = clear_pte_bit(new, __pgprot(_PAGE_NOEXEC));
		}
		pgt_set((unsigned long *)ptep, pte_val(new), addr, CRDTE_DTT_PAGE);
		ptep++;
		addr += PAGE_SIZE;
		cond_resched();
	} while (addr < end);
	return 0;
}

static int split_pmd_page(pmd_t *pmdp, unsigned long addr)
{
	unsigned long pte_addr, prot;
	pte_t *pt_dir, *ptep;
	pmd_t new;
	int i, ro, nx;

	pt_dir = vmem_pte_alloc();
	if (!pt_dir)
		return -ENOMEM;
	pte_addr = pmd_pfn(*pmdp) << PAGE_SHIFT;
	ro = !!(pmd_val(*pmdp) & _SEGMENT_ENTRY_PROTECT);
	nx = !!(pmd_val(*pmdp) & _SEGMENT_ENTRY_NOEXEC);
	prot = pgprot_val(ro ? PAGE_KERNEL_RO : PAGE_KERNEL);
	if (!nx)
		prot &= ~_PAGE_NOEXEC;
	ptep = pt_dir;
	for (i = 0; i < PTRS_PER_PTE; i++) {
		set_pte(ptep, __pte(pte_addr | prot));
		pte_addr += PAGE_SIZE;
		ptep++;
	}
	new = __pmd(__pa(pt_dir) | _SEGMENT_ENTRY);
	pgt_set((unsigned long *)pmdp, pmd_val(new), addr, CRDTE_DTT_SEGMENT);
	update_page_count(PG_DIRECT_MAP_4K, PTRS_PER_PTE);
	update_page_count(PG_DIRECT_MAP_1M, -1);
	return 0;
}

static void modify_pmd_page(pmd_t *pmdp, unsigned long addr,
			    unsigned long flags)
{
	pmd_t new = *pmdp;

	if (flags & SET_MEMORY_RO)
		new = pmd_wrprotect(new);
	else if (flags & SET_MEMORY_RW)
		new = pmd_mkwrite_novma(pmd_mkdirty(new));
	if (flags & SET_MEMORY_NX)
		new = set_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_NOEXEC));
	else if (flags & SET_MEMORY_X)
		new = clear_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_NOEXEC));
	if (flags & SET_MEMORY_INV) {
		new = set_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_INVALID));
	} else if (flags & SET_MEMORY_DEF) {
		new = __pmd(pmd_val(new) & PMD_MASK);
		new = set_pmd_bit(new, SEGMENT_KERNEL);
		if (!MACHINE_HAS_NX)
			new = clear_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_NOEXEC));
	}
	pgt_set((unsigned long *)pmdp, pmd_val(new), addr, CRDTE_DTT_SEGMENT);
}

static int walk_pmd_level(pud_t *pudp, unsigned long addr, unsigned long end,
			  unsigned long flags)
{
	unsigned long next;
	int need_split;
	pmd_t *pmdp;
	int rc = 0;

	pmdp = pmd_offset(pudp, addr);
	do {
		if (pmd_none(*pmdp))
			return -EINVAL;
		next = pmd_addr_end(addr, end);
		if (pmd_leaf(*pmdp)) {
			need_split  = !!(flags & SET_MEMORY_4K);
			need_split |= !!(addr & ~PMD_MASK);
			need_split |= !!(addr + PMD_SIZE > next);
			if (need_split) {
				rc = split_pmd_page(pmdp, addr);
				if (rc)
					return rc;
				continue;
			}
			modify_pmd_page(pmdp, addr, flags);
		} else {
			rc = walk_pte_level(pmdp, addr, next, flags);
			if (rc)
				return rc;
		}
		pmdp++;
		addr = next;
		cond_resched();
	} while (addr < end);
	return rc;
}

static int split_pud_page(pud_t *pudp, unsigned long addr)
{
	unsigned long pmd_addr, prot;
	pmd_t *pm_dir, *pmdp;
	pud_t new;
	int i, ro, nx;

	pm_dir = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY);
	if (!pm_dir)
		return -ENOMEM;
	pmd_addr = pud_pfn(*pudp) << PAGE_SHIFT;
	ro = !!(pud_val(*pudp) & _REGION_ENTRY_PROTECT);
	nx = !!(pud_val(*pudp) & _REGION_ENTRY_NOEXEC);
	prot = pgprot_val(ro ? SEGMENT_KERNEL_RO : SEGMENT_KERNEL);
	if (!nx)
		prot &= ~_SEGMENT_ENTRY_NOEXEC;
	pmdp = pm_dir;
	for (i = 0; i < PTRS_PER_PMD; i++) {
		set_pmd(pmdp, __pmd(pmd_addr | prot));
		pmd_addr += PMD_SIZE;
		pmdp++;
	}
	new = __pud(__pa(pm_dir) | _REGION3_ENTRY);
	pgt_set((unsigned long *)pudp, pud_val(new), addr, CRDTE_DTT_REGION3);
	update_page_count(PG_DIRECT_MAP_1M, PTRS_PER_PMD);
	update_page_count(PG_DIRECT_MAP_2G, -1);
	return 0;
}

static void modify_pud_page(pud_t *pudp, unsigned long addr,
			    unsigned long flags)
{
	pud_t new = *pudp;

	if (flags & SET_MEMORY_RO)
		new = pud_wrprotect(new);
	else if (flags & SET_MEMORY_RW)
		new = pud_mkwrite(pud_mkdirty(new));
	if (flags & SET_MEMORY_NX)
		new = set_pud_bit(new, __pgprot(_REGION_ENTRY_NOEXEC));
	else if (flags & SET_MEMORY_X)
		new = clear_pud_bit(new, __pgprot(_REGION_ENTRY_NOEXEC));
	if (flags & SET_MEMORY_INV) {
		new = set_pud_bit(new, __pgprot(_REGION_ENTRY_INVALID));
	} else if (flags & SET_MEMORY_DEF) {
		new = __pud(pud_val(new) & PUD_MASK);
		new = set_pud_bit(new, REGION3_KERNEL);
		if (!MACHINE_HAS_NX)
			new = clear_pud_bit(new, __pgprot(_REGION_ENTRY_NOEXEC));
	}
	pgt_set((unsigned long *)pudp, pud_val(new), addr, CRDTE_DTT_REGION3);
}

static int walk_pud_level(p4d_t *p4d, unsigned long addr, unsigned long end,
			  unsigned long flags)
{
	unsigned long next;
	int need_split;
	pud_t *pudp;
	int rc = 0;

	pudp = pud_offset(p4d, addr);
	do {
		if (pud_none(*pudp))
			return -EINVAL;
		next = pud_addr_end(addr, end);
		if (pud_leaf(*pudp)) {
			need_split  = !!(flags & SET_MEMORY_4K);
			need_split |= !!(addr & ~PUD_MASK);
			need_split |= !!(addr + PUD_SIZE > next);
			if (need_split) {
				rc = split_pud_page(pudp, addr);
				if (rc)
					break;
				continue;
			}
			modify_pud_page(pudp, addr, flags);
		} else {
			rc = walk_pmd_level(pudp, addr, next, flags);
		}
		pudp++;
		addr = next;
		cond_resched();
	} while (addr < end && !rc);
	return rc;
}

static int walk_p4d_level(pgd_t *pgd, unsigned long addr, unsigned long end,
			  unsigned long flags)
{
	unsigned long next;
	p4d_t *p4dp;
	int rc = 0;

	p4dp = p4d_offset(pgd, addr);
	do {
		if (p4d_none(*p4dp))
			return -EINVAL;
		next = p4d_addr_end(addr, end);
		rc = walk_pud_level(p4dp, addr, next, flags);
		p4dp++;
		addr = next;
		cond_resched();
	} while (addr < end && !rc);
	return rc;
}

DEFINE_MUTEX(cpa_mutex);

static int change_page_attr(unsigned long addr, unsigned long end,
			    unsigned long flags)
{
	unsigned long next;
	int rc = -EINVAL;
	pgd_t *pgdp;

	pgdp = pgd_offset_k(addr);
	do {
		if (pgd_none(*pgdp))
			break;
		next = pgd_addr_end(addr, end);
		rc = walk_p4d_level(pgdp, addr, next, flags);
		if (rc)
			break;
		cond_resched();
	} while (pgdp++, addr = next, addr < end && !rc);
	return rc;
}

static int change_page_attr_alias(unsigned long addr, unsigned long end,
				  unsigned long flags)
{
	unsigned long alias, offset, va_start, va_end;
	struct vm_struct *area;
	int rc = 0;

	/*
	 * Changes to read-only permissions on kernel VA mappings are also
	 * applied to the kernel direct mapping. Execute permissions are
	 * intentionally not transferred to keep all allocated pages within
	 * the direct mapping non-executable.
	 */
	flags &= SET_MEMORY_RO | SET_MEMORY_RW;
	if (!flags)
		return 0;
	area = NULL;
	while (addr < end) {
		if (!area)
			area = find_vm_area((void *)addr);
		if (!area || !(area->flags & VM_ALLOC))
			return 0;
		va_start = (unsigned long)area->addr;
		va_end = va_start + area->nr_pages * PAGE_SIZE;
		offset = (addr - va_start) >> PAGE_SHIFT;
		alias = (unsigned long)page_address(area->pages[offset]);
		rc = change_page_attr(alias, alias + PAGE_SIZE, flags);
		if (rc)
			break;
		addr += PAGE_SIZE;
		if (addr >= va_end)
			area = NULL;
	}
	return rc;
}

int __set_memory(unsigned long addr, unsigned long numpages, unsigned long flags)
{
	unsigned long end;
	int rc;

	if (!MACHINE_HAS_NX)
		flags &= ~(SET_MEMORY_NX | SET_MEMORY_X);
	if (!flags)
		return 0;
	if (!numpages)
		return 0;
	addr &= PAGE_MASK;
	end = addr + numpages * PAGE_SIZE;
	mutex_lock(&cpa_mutex);
	rc = change_page_attr(addr, end, flags);
	if (rc)
		goto out;
	rc = change_page_attr_alias(addr, end, flags);
out:
	mutex_unlock(&cpa_mutex);
	return rc;
}

int set_direct_map_invalid_noflush(struct page *page)
{
	return __set_memory((unsigned long)page_to_virt(page), 1, SET_MEMORY_INV);
}

int set_direct_map_default_noflush(struct page *page)
{
	return __set_memory((unsigned long)page_to_virt(page), 1, SET_MEMORY_DEF);
}

#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE)

static void ipte_range(pte_t *pte, unsigned long address, int nr)
{
	int i;

	if (test_facility(13)) {
		__ptep_ipte_range(address, nr - 1, pte, IPTE_GLOBAL);
		return;
	}
	for (i = 0; i < nr; i++) {
		__ptep_ipte(address, pte, 0, 0, IPTE_GLOBAL);
		address += PAGE_SIZE;
		pte++;
	}
}

void __kernel_map_pages(struct page *page, int numpages, int enable)
{
	unsigned long address;
	pte_t *ptep, pte;
	int nr, i, j;

	for (i = 0; i < numpages;) {
		address = (unsigned long)page_to_virt(page + i);
		ptep = virt_to_kpte(address);
		nr = (unsigned long)ptep >> ilog2(sizeof(long));
		nr = PTRS_PER_PTE - (nr & (PTRS_PER_PTE - 1));
		nr = min(numpages - i, nr);
		if (enable) {
			for (j = 0; j < nr; j++) {
				pte = clear_pte_bit(*ptep, __pgprot(_PAGE_INVALID));
				set_pte(ptep, pte);
				address += PAGE_SIZE;
				ptep++;
			}
		} else {
			ipte_range(ptep, address, nr);
		}
		i += nr;
	}
}

#endif /* CONFIG_DEBUG_PAGEALLOC */