Contributors: 1
Author Tokens Token Proportion Commits Commit Proportion
Jason Gunthorpe 383 100.00% 12 100.00%
Total 383 12


/* SPDX-License-Identifier: GPL-2.0-only */
/*
 * Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES
 */
#ifndef __GENERIC_PT_IOMMU_H
#define __GENERIC_PT_IOMMU_H

#include <linux/generic_pt/common.h>
#include <linux/iommu.h>
#include <linux/mm_types.h>

struct iommu_iotlb_gather;
struct pt_iommu_ops;
struct pt_iommu_driver_ops;
struct iommu_dirty_bitmap;

/**
 * DOC: IOMMU Radix Page Table
 *
 * The IOMMU implementation of the Generic Page Table provides an ops struct
 * that is useful to go with an iommu_domain to serve the DMA API, IOMMUFD and
 * the generic map/unmap interface.
 *
 * This interface uses a caller provided locking approach. The caller must have
 * a VA range lock concept that prevents concurrent threads from calling ops on
 * the same VA. Generally the range lock must be at least as large as a single
 * map call.
 */

/**
 * struct pt_iommu - Base structure for IOMMU page tables
 *
 * The format-specific struct will include this as the first member.
 */
struct pt_iommu {
	/**
	 * @domain: The core IOMMU domain. The driver should use a union to
	 * overlay this memory with its previously existing domain struct to
	 * create an alias.
	 */
	struct iommu_domain domain;

	/**
	 * @ops: Function pointers to access the API
	 */
	const struct pt_iommu_ops *ops;

	/**
	 * @driver_ops: Function pointers provided by the HW driver to help
	 * manage HW details like caches.
	 */
	const struct pt_iommu_driver_ops *driver_ops;

	/**
	 * @nid: Node ID to use for table memory allocations. The IOMMU driver
	 * may want to set the NID to the device's NID, if there are multiple
	 * table walkers.
	 */
	int nid;

	/**
	 * @iommu_device: Device pointer used for any DMA cache flushing when
	 * PT_FEAT_DMA_INCOHERENT. This is the iommu device that created the
	 * page table which must have dma ops that perform cache flushing.
	 */
	struct device *iommu_device;
};

/**
 * struct pt_iommu_info - Details about the IOMMU page table
 *
 * Returned from pt_iommu_ops->get_info()
 */
struct pt_iommu_info {
	/**
	 * @pgsize_bitmap: A bitmask where each set bit indicates
	 * a page size that can be natively stored in the page table.
	 */
	u64 pgsize_bitmap;
};

struct pt_iommu_ops {
	/**
	 * @set_dirty: Make the iova write dirty
	 * @iommu_table: Table to manipulate
	 * @iova: IO virtual address to start
	 *
	 * This is only used by iommufd testing. It makes the iova dirty so that
	 * read_and_clear_dirty() will see it as dirty. Unlike all the other ops
	 * this one is safe to call without holding any locking. It may return
	 * -EAGAIN if there is a race.
	 */
	int (*set_dirty)(struct pt_iommu *iommu_table, dma_addr_t iova);

	/**
	 * @get_info: Return the pt_iommu_info structure
	 * @iommu_table: Table to query
	 *
	 * Return some basic static information about the page table.
	 */
	void (*get_info)(struct pt_iommu *iommu_table,
			 struct pt_iommu_info *info);

	/**
	 * @deinit: Undo a format specific init operation
	 * @iommu_table: Table to destroy
	 *
	 * Release all of the memory. The caller must have already removed the
	 * table from all HW access and all caches.
	 */
	void (*deinit)(struct pt_iommu *iommu_table);
};

/**
 * struct pt_iommu_driver_ops - HW IOTLB cache flushing operations
 *
 * The IOMMU driver should implement these using container_of(iommu_table) to
 * get to it's iommu_domain derived structure. All ops can be called in atomic
 * contexts as they are buried under DMA API calls.
 */
struct pt_iommu_driver_ops {
	/**
	 * @change_top: Update the top of table pointer
	 * @iommu_table: Table to operate on
	 * @top_paddr: New CPU physical address of the top pointer
	 * @top_level: IOMMU PT level of the new top
	 *
	 * Called under the get_top_lock() spinlock. The driver must update all
	 * HW references to this domain with a new top address and
	 * configuration. On return mappings placed in the new top must be
	 * reachable by the HW.
	 *
	 * top_level encodes the level in IOMMU PT format, level 0 is the
	 * smallest page size increasing from there. This has to be translated
	 * to any HW specific format. During this call the new top will not be
	 * visible to any other API.
	 *
	 * This op is only used by PT_FEAT_DYNAMIC_TOP, and is required if
	 * enabled.
	 */
	void (*change_top)(struct pt_iommu *iommu_table, phys_addr_t top_paddr,
			   unsigned int top_level);

	/**
	 * @get_top_lock: lock to hold when changing the table top
	 * @iommu_table: Table to operate on
	 *
	 * Return a lock to hold when changing the table top page table from
	 * being stored in HW. The lock will be held prior to calling
	 * change_top() and released once the top is fully visible.
	 *
	 * Typically this would be a lock that protects the iommu_domain's
	 * attachment list.
	 *
	 * This op is only used by PT_FEAT_DYNAMIC_TOP, and is required if
	 * enabled.
	 */
	spinlock_t *(*get_top_lock)(struct pt_iommu *iommu_table);
};

static inline void pt_iommu_deinit(struct pt_iommu *iommu_table)
{
	/*
	 * It is safe to call pt_iommu_deinit() before an init, or if init
	 * fails. The ops pointer will only become non-NULL if deinit needs to be
	 * run.
	 */
	if (iommu_table->ops)
		iommu_table->ops->deinit(iommu_table);
}

/**
 * struct pt_iommu_cfg - Common configuration values for all formats
 */
struct pt_iommu_cfg {
	/**
	 * @features: Features required. Only these features will be turned on.
	 * The feature list should reflect what the IOMMU HW is capable of.
	 */
	unsigned int features;
	/**
	 * @hw_max_vasz_lg2: Maximum VA the IOMMU HW can support. This will
	 * imply the top level of the table.
	 */
	u8 hw_max_vasz_lg2;
	/**
	 * @hw_max_oasz_lg2: Maximum OA the IOMMU HW can support. The format
	 * might select a lower maximum OA.
	 */
	u8 hw_max_oasz_lg2;
};

/* Generate the exported function signatures from iommu_pt.h */
#define IOMMU_PROTOTYPES(fmt)                                                  \
	phys_addr_t pt_iommu_##fmt##_iova_to_phys(struct iommu_domain *domain, \
						  dma_addr_t iova);            \
	int pt_iommu_##fmt##_map_pages(struct iommu_domain *domain,            \
				       unsigned long iova, phys_addr_t paddr,  \
				       size_t pgsize, size_t pgcount,          \
				       int prot, gfp_t gfp, size_t *mapped);   \
	size_t pt_iommu_##fmt##_unmap_pages(                                   \
		struct iommu_domain *domain, unsigned long iova,               \
		size_t pgsize, size_t pgcount,                                 \
		struct iommu_iotlb_gather *iotlb_gather);                      \
	int pt_iommu_##fmt##_read_and_clear_dirty(                             \
		struct iommu_domain *domain, unsigned long iova, size_t size,  \
		unsigned long flags, struct iommu_dirty_bitmap *dirty);        \
	int pt_iommu_##fmt##_init(struct pt_iommu_##fmt *table,                \
				  const struct pt_iommu_##fmt##_cfg *cfg,      \
				  gfp_t gfp);                                  \
	void pt_iommu_##fmt##_hw_info(struct pt_iommu_##fmt *table,            \
				      struct pt_iommu_##fmt##_hw_info *info)
#define IOMMU_FORMAT(fmt, member)       \
	struct pt_iommu_##fmt {         \
		struct pt_iommu iommu;  \
		struct pt_##fmt member; \
	};                              \
	IOMMU_PROTOTYPES(fmt)

/*
 * A driver uses IOMMU_PT_DOMAIN_OPS to populate the iommu_domain_ops for the
 * iommu_pt
 */
#define IOMMU_PT_DOMAIN_OPS(fmt)                        \
	.iova_to_phys = &pt_iommu_##fmt##_iova_to_phys, \
	.map_pages = &pt_iommu_##fmt##_map_pages,       \
	.unmap_pages = &pt_iommu_##fmt##_unmap_pages
#define IOMMU_PT_DIRTY_OPS(fmt) \
	.read_and_clear_dirty = &pt_iommu_##fmt##_read_and_clear_dirty

/*
 * The driver should setup its domain struct like
 *	union {
 *		struct iommu_domain domain;
 *		struct pt_iommu_xxx xx;
 *	};
 * PT_IOMMU_CHECK_DOMAIN(struct mock_iommu_domain, xx.iommu, domain);
 *
 * Which creates an alias between driver_domain.domain and
 * driver_domain.xx.iommu.domain. This is to avoid a mass rename of existing
 * driver_domain.domain users.
 */
#define PT_IOMMU_CHECK_DOMAIN(s, pt_iommu_memb, domain_memb) \
	static_assert(offsetof(s, pt_iommu_memb.domain) ==   \
		      offsetof(s, domain_memb))

struct pt_iommu_amdv1_cfg {
	struct pt_iommu_cfg common;
	unsigned int starting_level;
};

struct pt_iommu_amdv1_hw_info {
	u64 host_pt_root;
	u8 mode;
};

IOMMU_FORMAT(amdv1, amdpt);

/* amdv1_mock is used by the iommufd selftest */
#define pt_iommu_amdv1_mock pt_iommu_amdv1
#define pt_iommu_amdv1_mock_cfg pt_iommu_amdv1_cfg
struct pt_iommu_amdv1_mock_hw_info;
IOMMU_PROTOTYPES(amdv1_mock);

struct pt_iommu_vtdss_cfg {
	struct pt_iommu_cfg common;
	/* 4 is a 57 bit 5 level table */
	unsigned int top_level;
};

struct pt_iommu_vtdss_hw_info {
	u64 ssptptr;
	u8 aw;
};

IOMMU_FORMAT(vtdss, vtdss_pt);

struct pt_iommu_x86_64_cfg {
	struct pt_iommu_cfg common;
	/* 4 is a 57 bit 5 level table */
	unsigned int top_level;
};

struct pt_iommu_x86_64_hw_info {
	u64 gcr3_pt;
	u8 levels;
};

IOMMU_FORMAT(x86_64, x86_64_pt);

#undef IOMMU_PROTOTYPES
#undef IOMMU_FORMAT
#endif