Contributors: 11
Author Tokens Token Proportion Commits Commit Proportion
Duc Dang 871 59.29% 1 4.35%
Marc Zyngier 498 33.90% 12 52.17%
Tanmay Inamdar 75 5.11% 1 4.35%
Sebastian Andrzej Siewior 6 0.41% 1 4.35%
Fabio Estevam 6 0.41% 1 4.35%
Björn Helgaas 5 0.34% 2 8.70%
Li Yang 4 0.27% 1 4.35%
Thomas Gleixner 1 0.07% 1 4.35%
Uwe Kleine-König 1 0.07% 1 4.35%
Christophe Jaillet 1 0.07% 1 4.35%
Sergio Paracuellos 1 0.07% 1 4.35%
Total 1469 23


// SPDX-License-Identifier: GPL-2.0+
/*
 * APM X-Gene MSI Driver
 *
 * Copyright (c) 2014, Applied Micro Circuits Corporation
 * Author: Tanmay Inamdar <tinamdar@apm.com>
 *	   Duc Dang <dhdang@apm.com>
 */
#include <linux/bitfield.h>
#include <linux/cpu.h>
#include <linux/interrupt.h>
#include <linux/irqdomain.h>
#include <linux/module.h>
#include <linux/msi.h>
#include <linux/irqchip/chained_irq.h>
#include <linux/irqchip/irq-msi-lib.h>
#include <linux/pci.h>
#include <linux/platform_device.h>
#include <linux/of_pci.h>

#define MSI_IR0			0x000000
#define MSI_INT0		0x800000
#define IDX_PER_GROUP		8
#define IRQS_PER_IDX		16
#define NR_HW_IRQS		16
#define NR_MSI_BITS		(IDX_PER_GROUP * IRQS_PER_IDX * NR_HW_IRQS)
#define NR_MSI_VEC		(NR_MSI_BITS / num_possible_cpus())

#define MSI_GROUP_MASK		GENMASK(22, 19)
#define MSI_INDEX_MASK		GENMASK(18, 16)
#define MSI_INTR_MASK		GENMASK(19, 16)

#define MSInRx_HWIRQ_MASK	GENMASK(6, 4)
#define DATA_HWIRQ_MASK		GENMASK(3, 0)

struct xgene_msi {
	struct irq_domain	*inner_domain;
	u64			msi_addr;
	void __iomem		*msi_regs;
	unsigned long		*bitmap;
	struct mutex		bitmap_lock;
	unsigned int		gic_irq[NR_HW_IRQS];
};

/* Global data */
static struct xgene_msi *xgene_msi_ctrl;

/*
 * X-Gene v1 has 16 frames of MSI termination registers MSInIRx, where n is
 * frame number (0..15), x is index of registers in each frame (0..7).  Each
 * 32b register is at the beginning of a 64kB region, each frame occupying
 * 512kB (and the whole thing 8MB of PA space).
 *
 * Each register supports 16 MSI vectors (0..15) to generate interrupts. A
 * write to the MSInIRx from the PCI side generates an interrupt. A read
 * from the MSInRx on the CPU side returns a bitmap of the pending MSIs in
 * the lower 16 bits. A side effect of this read is that all pending
 * interrupts are acknowledged and cleared).
 *
 * Additionally, each MSI termination frame has 1 MSIINTn register (n is
 * 0..15) to indicate the MSI pending status caused by any of its 8
 * termination registers, reported as a bitmap in the lower 8 bits. Each 32b
 * register is at the beginning of a 64kB region (and overall occupying an
 * extra 1MB).
 *
 * There is one GIC IRQ assigned for each MSI termination frame, 16 in
 * total.
 *
 * The register layout is as follows:
 * MSI0IR0			base_addr
 * MSI0IR1			base_addr +  0x10000
 * ...				...
 * MSI0IR6			base_addr +  0x60000
 * MSI0IR7			base_addr +  0x70000
 * MSI1IR0			base_addr +  0x80000
 * MSI1IR1			base_addr +  0x90000
 * ...				...
 * MSI1IR7			base_addr +  0xF0000
 * MSI2IR0			base_addr + 0x100000
 * ...				...
 * MSIFIR0			base_addr + 0x780000
 * MSIFIR1			base_addr + 0x790000
 * ...				...
 * MSIFIR7			base_addr + 0x7F0000
 * MSIINT0			base_addr + 0x800000
 * MSIINT1			base_addr + 0x810000
 * ...				...
 * MSIINTF			base_addr + 0x8F0000
 */

/* MSInIRx read helper */
static u32 xgene_msi_ir_read(struct xgene_msi *msi, u32 msi_grp, u32 msir_idx)
{
	return readl_relaxed(msi->msi_regs + MSI_IR0 +
			     (FIELD_PREP(MSI_GROUP_MASK, msi_grp) |
			      FIELD_PREP(MSI_INDEX_MASK, msir_idx)));
}

/* MSIINTn read helper */
static u32 xgene_msi_int_read(struct xgene_msi *msi, u32 msi_grp)
{
	return readl_relaxed(msi->msi_regs + MSI_INT0 +
			     FIELD_PREP(MSI_INTR_MASK, msi_grp));
}

/*
 * In order to allow an MSI to be moved from one CPU to another without
 * having to repaint both the address and the data (which cannot be done
 * atomically), we statically partitions the MSI frames between CPUs. Given
 * that XGene-1 has 8 CPUs, each CPU gets two frames assigned to it
 *
 * We adopt the convention that when an MSI is moved, it is configured to
 * target the same register number in the congruent frame assigned to the
 * new target CPU. This reserves a given MSI across all CPUs, and reduces
 * the MSI capacity from 2048 to 256.
 *
 * Effectively, this amounts to:
 * - hwirq[7]::cpu[2:0] is the target frame number (n in MSInIRx)
 * - hwirq[6:4] is the register index in any given frame (x in MSInIRx)
 * - hwirq[3:0] is the MSI data
 */
static irq_hw_number_t compute_hwirq(u8 frame, u8 index, u8 data)
{
	return (FIELD_PREP(BIT(7), FIELD_GET(BIT(3), frame))	|
		FIELD_PREP(MSInRx_HWIRQ_MASK, index)		|
		FIELD_PREP(DATA_HWIRQ_MASK, data));
}

static void xgene_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
{
	struct xgene_msi *msi = irq_data_get_irq_chip_data(data);
	u64 target_addr;
	u32 frame, msir;
	int cpu;

	cpu	= cpumask_first(irq_data_get_effective_affinity_mask(data));
	msir	= FIELD_GET(MSInRx_HWIRQ_MASK, data->hwirq);
	frame	= FIELD_PREP(BIT(3), FIELD_GET(BIT(7), data->hwirq)) | cpu;

	target_addr = msi->msi_addr;
	target_addr += (FIELD_PREP(MSI_GROUP_MASK, frame) |
			FIELD_PREP(MSI_INTR_MASK, msir));

	msg->address_hi = upper_32_bits(target_addr);
	msg->address_lo = lower_32_bits(target_addr);
	msg->data = FIELD_GET(DATA_HWIRQ_MASK, data->hwirq);
}

static int xgene_msi_set_affinity(struct irq_data *irqdata,
				  const struct cpumask *mask, bool force)
{
	int target_cpu = cpumask_first(mask);

	irq_data_update_effective_affinity(irqdata, cpumask_of(target_cpu));

	/* Force the core code to regenerate the message */
	return IRQ_SET_MASK_OK;
}

static struct irq_chip xgene_msi_bottom_irq_chip = {
	.name			= "MSI",
	.irq_set_affinity       = xgene_msi_set_affinity,
	.irq_compose_msi_msg	= xgene_compose_msi_msg,
};

static int xgene_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
				  unsigned int nr_irqs, void *args)
{
	struct xgene_msi *msi = domain->host_data;
	irq_hw_number_t hwirq;

	mutex_lock(&msi->bitmap_lock);

	hwirq = find_first_zero_bit(msi->bitmap, NR_MSI_VEC);
	if (hwirq < NR_MSI_VEC)
		set_bit(hwirq, msi->bitmap);

	mutex_unlock(&msi->bitmap_lock);

	if (hwirq >= NR_MSI_VEC)
		return -ENOSPC;

	irq_domain_set_info(domain, virq, hwirq,
			    &xgene_msi_bottom_irq_chip, domain->host_data,
			    handle_simple_irq, NULL, NULL);
	irqd_set_resend_when_in_progress(irq_get_irq_data(virq));

	return 0;
}

static void xgene_irq_domain_free(struct irq_domain *domain,
				  unsigned int virq, unsigned int nr_irqs)
{
	struct irq_data *d = irq_domain_get_irq_data(domain, virq);
	struct xgene_msi *msi = irq_data_get_irq_chip_data(d);

	mutex_lock(&msi->bitmap_lock);

	clear_bit(d->hwirq, msi->bitmap);

	mutex_unlock(&msi->bitmap_lock);

	irq_domain_free_irqs_parent(domain, virq, nr_irqs);
}

static const struct irq_domain_ops xgene_msi_domain_ops = {
	.alloc  = xgene_irq_domain_alloc,
	.free   = xgene_irq_domain_free,
};

static const struct msi_parent_ops xgene_msi_parent_ops = {
	.supported_flags	= (MSI_GENERIC_FLAGS_MASK	|
				   MSI_FLAG_PCI_MSIX),
	.required_flags		= (MSI_FLAG_USE_DEF_DOM_OPS	|
				   MSI_FLAG_USE_DEF_CHIP_OPS),
	.bus_select_token	= DOMAIN_BUS_PCI_MSI,
	.init_dev_msi_info	= msi_lib_init_dev_msi_info,
};

static int xgene_allocate_domains(struct device_node *node,
				  struct xgene_msi *msi)
{
	struct irq_domain_info info = {
		.fwnode		= of_fwnode_handle(node),
		.ops		= &xgene_msi_domain_ops,
		.size		= NR_MSI_VEC,
		.host_data	= msi,
	};

	msi->inner_domain = msi_create_parent_irq_domain(&info, &xgene_msi_parent_ops);
	return msi->inner_domain ? 0 : -ENOMEM;
}

static int xgene_msi_init_allocator(struct device *dev)
{
	xgene_msi_ctrl->bitmap = devm_bitmap_zalloc(dev, NR_MSI_VEC, GFP_KERNEL);
	if (!xgene_msi_ctrl->bitmap)
		return -ENOMEM;

	mutex_init(&xgene_msi_ctrl->bitmap_lock);

	return 0;
}

static void xgene_msi_isr(struct irq_desc *desc)
{
	unsigned int *irqp = irq_desc_get_handler_data(desc);
	struct irq_chip *chip = irq_desc_get_chip(desc);
	struct xgene_msi *xgene_msi = xgene_msi_ctrl;
	unsigned long grp_pending;
	int msir_idx;
	u32 msi_grp;

	chained_irq_enter(chip, desc);

	msi_grp = irqp - xgene_msi->gic_irq;

	grp_pending = xgene_msi_int_read(xgene_msi, msi_grp);

	for_each_set_bit(msir_idx, &grp_pending, IDX_PER_GROUP) {
		unsigned long msir;
		int intr_idx;

		msir = xgene_msi_ir_read(xgene_msi, msi_grp, msir_idx);

		for_each_set_bit(intr_idx, &msir, IRQS_PER_IDX) {
			irq_hw_number_t hwirq;
			int ret;

			hwirq = compute_hwirq(msi_grp, msir_idx, intr_idx);
			ret = generic_handle_domain_irq(xgene_msi->inner_domain,
							hwirq);
			WARN_ON_ONCE(ret);
		}
	}

	chained_irq_exit(chip, desc);
}

static void xgene_msi_remove(struct platform_device *pdev)
{
	for (int i = 0; i < NR_HW_IRQS; i++) {
		unsigned int irq = xgene_msi_ctrl->gic_irq[i];
		if (!irq)
			continue;
		irq_set_chained_handler_and_data(irq, NULL, NULL);
	}

	if (xgene_msi_ctrl->inner_domain)
		irq_domain_remove(xgene_msi_ctrl->inner_domain);
}

static int xgene_msi_handler_setup(struct platform_device *pdev)
{
	struct xgene_msi *xgene_msi = xgene_msi_ctrl;
	int i;

	for (i = 0; i < NR_HW_IRQS; i++) {
		u32 msi_val;
		int irq, err;

		/*
		 * MSInIRx registers are read-to-clear; before registering
		 * interrupt handlers, read all of them to clear spurious
		 * interrupts that may occur before the driver is probed.
		 */
		for (int msi_idx = 0; msi_idx < IDX_PER_GROUP; msi_idx++)
			xgene_msi_ir_read(xgene_msi, i, msi_idx);

		/* Read MSIINTn to confirm */
		msi_val = xgene_msi_int_read(xgene_msi, i);
		if (msi_val) {
			dev_err(&pdev->dev, "Failed to clear spurious IRQ\n");
			return EINVAL;
		}

		irq = platform_get_irq(pdev, i);
		if (irq < 0)
			return irq;

		xgene_msi->gic_irq[i] = irq;

		/*
		 * Statically allocate MSI GIC IRQs to each CPU core.
		 * With 8-core X-Gene v1, 2 MSI GIC IRQs are allocated
		 * to each core.
		 */
		irq_set_status_flags(irq, IRQ_NO_BALANCING);
		err = irq_set_affinity(irq, cpumask_of(i % num_possible_cpus()));
		if (err) {
			pr_err("failed to set affinity for GIC IRQ");
			return err;
		}

		irq_set_chained_handler_and_data(irq, xgene_msi_isr,
						 &xgene_msi_ctrl->gic_irq[i]);
	}

	return 0;
}

static const struct of_device_id xgene_msi_match_table[] = {
	{.compatible = "apm,xgene1-msi"},
	{},
};

static int xgene_msi_probe(struct platform_device *pdev)
{
	struct resource *res;
	struct xgene_msi *xgene_msi;
	int rc;

	xgene_msi_ctrl = devm_kzalloc(&pdev->dev, sizeof(*xgene_msi_ctrl),
				      GFP_KERNEL);
	if (!xgene_msi_ctrl)
		return -ENOMEM;

	xgene_msi = xgene_msi_ctrl;

	xgene_msi->msi_regs = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
	if (IS_ERR(xgene_msi->msi_regs)) {
		rc = PTR_ERR(xgene_msi->msi_regs);
		goto error;
	}
	xgene_msi->msi_addr = res->start;

	rc = xgene_msi_init_allocator(&pdev->dev);
	if (rc) {
		dev_err(&pdev->dev, "Error allocating MSI bitmap\n");
		goto error;
	}

	rc = xgene_allocate_domains(dev_of_node(&pdev->dev), xgene_msi);
	if (rc) {
		dev_err(&pdev->dev, "Failed to allocate MSI domain\n");
		goto error;
	}

	rc = xgene_msi_handler_setup(pdev);
	if (rc)
		goto error;

	dev_info(&pdev->dev, "APM X-Gene PCIe MSI driver loaded\n");

	return 0;
error:
	xgene_msi_remove(pdev);
	return rc;
}

static struct platform_driver xgene_msi_driver = {
	.driver = {
		.name = "xgene-msi",
		.of_match_table = xgene_msi_match_table,
	},
	.probe = xgene_msi_probe,
	.remove = xgene_msi_remove,
};
builtin_platform_driver(xgene_msi_driver);