Contributors: 13
Author Tokens Token Proportion Commits Commit Proportion
Sebastian Ott 1749 68.24% 17 32.69%
Jan Glauber 218 8.51% 3 5.77%
Gerd Bayer 167 6.52% 2 3.85%
Martin Schwidefsky 146 5.70% 5 9.62%
Niklas Schnelle 112 4.37% 6 11.54%
Matthew Rosato 87 3.39% 4 7.69%
Alexander Gordeev 46 1.79% 1 1.92%
Thomas Gleixner 15 0.59% 4 7.69%
Linus Torvalds (pre-git) 8 0.31% 5 9.62%
Peter Zijlstra 6 0.23% 1 1.92%
Heiko Carstens 5 0.20% 2 3.85%
Gerald Schaefer 3 0.12% 1 1.92%
Greg Kroah-Hartman 1 0.04% 1 1.92%
Total 2563 52


// SPDX-License-Identifier: GPL-2.0
#define KMSG_COMPONENT "zpci"
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt

#include <linux/kernel.h>
#include <linux/irq.h>
#include <linux/kernel_stat.h>
#include <linux/pci.h>
#include <linux/msi.h>
#include <linux/smp.h>

#include <asm/isc.h>
#include <asm/airq.h>
#include <asm/tpi.h>

static enum {FLOATING, DIRECTED} irq_delivery;

/*
 * summary bit vector
 * FLOATING - summary bit per function
 * DIRECTED - summary bit per cpu (only used in fallback path)
 */
static struct airq_iv *zpci_sbv;

/*
 * interrupt bit vectors
 * FLOATING - interrupt bit vector per function
 * DIRECTED - interrupt bit vector per cpu
 */
static struct airq_iv **zpci_ibv;

/* Modify PCI: Register floating adapter interruptions */
static int zpci_set_airq(struct zpci_dev *zdev)
{
	u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_REG_INT);
	struct zpci_fib fib = {0};
	u8 status;

	fib.fmt0.isc = PCI_ISC;
	fib.fmt0.sum = 1;	/* enable summary notifications */
	fib.fmt0.noi = airq_iv_end(zdev->aibv);
	fib.fmt0.aibv = virt_to_phys(zdev->aibv->vector);
	fib.fmt0.aibvo = 0;	/* each zdev has its own interrupt vector */
	fib.fmt0.aisb = virt_to_phys(zpci_sbv->vector) + (zdev->aisb / 64) * 8;
	fib.fmt0.aisbo = zdev->aisb & 63;
	fib.gd = zdev->gisa;

	return zpci_mod_fc(req, &fib, &status) ? -EIO : 0;
}

/* Modify PCI: Unregister floating adapter interruptions */
static int zpci_clear_airq(struct zpci_dev *zdev)
{
	u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_DEREG_INT);
	struct zpci_fib fib = {0};
	u8 cc, status;

	fib.gd = zdev->gisa;

	cc = zpci_mod_fc(req, &fib, &status);
	if (cc == 3 || (cc == 1 && status == 24))
		/* Function already gone or IRQs already deregistered. */
		cc = 0;

	return cc ? -EIO : 0;
}

/* Modify PCI: Register CPU directed interruptions */
static int zpci_set_directed_irq(struct zpci_dev *zdev)
{
	u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_REG_INT_D);
	struct zpci_fib fib = {0};
	u8 status;

	fib.fmt = 1;
	fib.fmt1.noi = zdev->msi_nr_irqs;
	fib.fmt1.dibvo = zdev->msi_first_bit;
	fib.gd = zdev->gisa;

	return zpci_mod_fc(req, &fib, &status) ? -EIO : 0;
}

/* Modify PCI: Unregister CPU directed interruptions */
static int zpci_clear_directed_irq(struct zpci_dev *zdev)
{
	u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_DEREG_INT_D);
	struct zpci_fib fib = {0};
	u8 cc, status;

	fib.fmt = 1;
	fib.gd = zdev->gisa;
	cc = zpci_mod_fc(req, &fib, &status);
	if (cc == 3 || (cc == 1 && status == 24))
		/* Function already gone or IRQs already deregistered. */
		cc = 0;

	return cc ? -EIO : 0;
}

/* Register adapter interruptions */
static int zpci_set_irq(struct zpci_dev *zdev)
{
	int rc;

	if (irq_delivery == DIRECTED)
		rc = zpci_set_directed_irq(zdev);
	else
		rc = zpci_set_airq(zdev);

	if (!rc)
		zdev->irqs_registered = 1;

	return rc;
}

/* Clear adapter interruptions */
static int zpci_clear_irq(struct zpci_dev *zdev)
{
	int rc;

	if (irq_delivery == DIRECTED)
		rc = zpci_clear_directed_irq(zdev);
	else
		rc = zpci_clear_airq(zdev);

	if (!rc)
		zdev->irqs_registered = 0;

	return rc;
}

static int zpci_set_irq_affinity(struct irq_data *data, const struct cpumask *dest,
				 bool force)
{
	struct msi_desc *entry = irq_data_get_msi_desc(data);
	struct msi_msg msg = entry->msg;
	int cpu_addr = smp_cpu_get_cpu_address(cpumask_first(dest));

	msg.address_lo &= 0xff0000ff;
	msg.address_lo |= (cpu_addr << 8);
	pci_write_msi_msg(data->irq, &msg);

	return IRQ_SET_MASK_OK;
}

static struct irq_chip zpci_irq_chip = {
	.name = "PCI-MSI",
	.irq_unmask = pci_msi_unmask_irq,
	.irq_mask = pci_msi_mask_irq,
};

static void zpci_handle_cpu_local_irq(bool rescan)
{
	struct airq_iv *dibv = zpci_ibv[smp_processor_id()];
	union zpci_sic_iib iib = {{0}};
	unsigned long bit;
	int irqs_on = 0;

	for (bit = 0;;) {
		/* Scan the directed IRQ bit vector */
		bit = airq_iv_scan(dibv, bit, airq_iv_end(dibv));
		if (bit == -1UL) {
			if (!rescan || irqs_on++)
				/* End of second scan with interrupts on. */
				break;
			/* First scan complete, re-enable interrupts. */
			if (zpci_set_irq_ctrl(SIC_IRQ_MODE_D_SINGLE, PCI_ISC, &iib))
				break;
			bit = 0;
			continue;
		}
		inc_irq_stat(IRQIO_MSI);
		generic_handle_irq(airq_iv_get_data(dibv, bit));
	}
}

struct cpu_irq_data {
	call_single_data_t csd;
	atomic_t scheduled;
};
static DEFINE_PER_CPU_SHARED_ALIGNED(struct cpu_irq_data, irq_data);

static void zpci_handle_remote_irq(void *data)
{
	atomic_t *scheduled = data;

	do {
		zpci_handle_cpu_local_irq(false);
	} while (atomic_dec_return(scheduled));
}

static void zpci_handle_fallback_irq(void)
{
	struct cpu_irq_data *cpu_data;
	union zpci_sic_iib iib = {{0}};
	unsigned long cpu;
	int irqs_on = 0;

	for (cpu = 0;;) {
		cpu = airq_iv_scan(zpci_sbv, cpu, airq_iv_end(zpci_sbv));
		if (cpu == -1UL) {
			if (irqs_on++)
				/* End of second scan with interrupts on. */
				break;
			/* First scan complete, re-enable interrupts. */
			if (zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, PCI_ISC, &iib))
				break;
			cpu = 0;
			continue;
		}
		cpu_data = &per_cpu(irq_data, cpu);
		if (atomic_inc_return(&cpu_data->scheduled) > 1)
			continue;

		INIT_CSD(&cpu_data->csd, zpci_handle_remote_irq, &cpu_data->scheduled);
		smp_call_function_single_async(cpu, &cpu_data->csd);
	}
}

static void zpci_directed_irq_handler(struct airq_struct *airq,
				      struct tpi_info *tpi_info)
{
	bool floating = !tpi_info->directed_irq;

	if (floating) {
		inc_irq_stat(IRQIO_PCF);
		zpci_handle_fallback_irq();
	} else {
		inc_irq_stat(IRQIO_PCD);
		zpci_handle_cpu_local_irq(true);
	}
}

static void zpci_floating_irq_handler(struct airq_struct *airq,
				      struct tpi_info *tpi_info)
{
	union zpci_sic_iib iib = {{0}};
	unsigned long si, ai;
	struct airq_iv *aibv;
	int irqs_on = 0;

	inc_irq_stat(IRQIO_PCF);
	for (si = 0;;) {
		/* Scan adapter summary indicator bit vector */
		si = airq_iv_scan(zpci_sbv, si, airq_iv_end(zpci_sbv));
		if (si == -1UL) {
			if (irqs_on++)
				/* End of second scan with interrupts on. */
				break;
			/* First scan complete, re-enable interrupts. */
			if (zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, PCI_ISC, &iib))
				break;
			si = 0;
			continue;
		}

		/* Scan the adapter interrupt vector for this device. */
		aibv = zpci_ibv[si];
		for (ai = 0;;) {
			ai = airq_iv_scan(aibv, ai, airq_iv_end(aibv));
			if (ai == -1UL)
				break;
			inc_irq_stat(IRQIO_MSI);
			airq_iv_lock(aibv, ai);
			generic_handle_irq(airq_iv_get_data(aibv, ai));
			airq_iv_unlock(aibv, ai);
		}
	}
}

static int __alloc_airq(struct zpci_dev *zdev, int msi_vecs,
			unsigned long *bit)
{
	if (irq_delivery == DIRECTED) {
		/* Allocate cpu vector bits */
		*bit = airq_iv_alloc(zpci_ibv[0], msi_vecs);
		if (*bit == -1UL)
			return -EIO;
	} else {
		/* Allocate adapter summary indicator bit */
		*bit = airq_iv_alloc_bit(zpci_sbv);
		if (*bit == -1UL)
			return -EIO;
		zdev->aisb = *bit;

		/* Create adapter interrupt vector */
		zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA | AIRQ_IV_BITLOCK, NULL);
		if (!zdev->aibv)
			return -ENOMEM;

		/* Wire up shortcut pointer */
		zpci_ibv[*bit] = zdev->aibv;
		/* Each function has its own interrupt vector */
		*bit = 0;
	}
	return 0;
}

int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
{
	unsigned int hwirq, msi_vecs, irqs_per_msi, i, cpu;
	struct zpci_dev *zdev = to_zpci(pdev);
	struct msi_desc *msi;
	struct msi_msg msg;
	unsigned long bit;
	int cpu_addr;
	int rc, irq;

	zdev->aisb = -1UL;
	zdev->msi_first_bit = -1U;

	msi_vecs = min_t(unsigned int, nvec, zdev->max_msi);
	if (msi_vecs < nvec) {
		pr_info("%s requested %d irqs, allocate system limit of %d",
			pci_name(pdev), nvec, zdev->max_msi);
	}

	rc = __alloc_airq(zdev, msi_vecs, &bit);
	if (rc < 0)
		return rc;

	/*
	 * Request MSI interrupts:
	 * When using MSI, nvec_used interrupt sources and their irq
	 * descriptors are controlled through one msi descriptor.
	 * Thus the outer loop over msi descriptors shall run only once,
	 * while two inner loops iterate over the interrupt vectors.
	 * When using MSI-X, each interrupt vector/irq descriptor
	 * is bound to exactly one msi descriptor (nvec_used is one).
	 * So the inner loops are executed once, while the outer iterates
	 * over the MSI-X descriptors.
	 */
	hwirq = bit;
	msi_for_each_desc(msi, &pdev->dev, MSI_DESC_NOTASSOCIATED) {
		if (hwirq - bit >= msi_vecs)
			break;
		irqs_per_msi = min_t(unsigned int, msi_vecs, msi->nvec_used);
		irq = __irq_alloc_descs(-1, 0, irqs_per_msi, 0, THIS_MODULE,
					(irq_delivery == DIRECTED) ?
					msi->affinity : NULL);
		if (irq < 0)
			return -ENOMEM;

		for (i = 0; i < irqs_per_msi; i++) {
			rc = irq_set_msi_desc_off(irq, i, msi);
			if (rc)
				return rc;
			irq_set_chip_and_handler(irq + i, &zpci_irq_chip,
						 handle_percpu_irq);
		}

		msg.data = hwirq - bit;
		if (irq_delivery == DIRECTED) {
			if (msi->affinity)
				cpu = cpumask_first(&msi->affinity->mask);
			else
				cpu = 0;
			cpu_addr = smp_cpu_get_cpu_address(cpu);

			msg.address_lo = zdev->msi_addr & 0xff0000ff;
			msg.address_lo |= (cpu_addr << 8);

			for_each_possible_cpu(cpu) {
				for (i = 0; i < irqs_per_msi; i++)
					airq_iv_set_data(zpci_ibv[cpu],
							 hwirq + i, irq + i);
			}
		} else {
			msg.address_lo = zdev->msi_addr & 0xffffffff;
			for (i = 0; i < irqs_per_msi; i++)
				airq_iv_set_data(zdev->aibv, hwirq + i, irq + i);
		}
		msg.address_hi = zdev->msi_addr >> 32;
		pci_write_msi_msg(irq, &msg);
		hwirq += irqs_per_msi;
	}

	zdev->msi_first_bit = bit;
	zdev->msi_nr_irqs = hwirq - bit;

	rc = zpci_set_irq(zdev);
	if (rc)
		return rc;

	return (zdev->msi_nr_irqs == nvec) ? 0 : zdev->msi_nr_irqs;
}

void arch_teardown_msi_irqs(struct pci_dev *pdev)
{
	struct zpci_dev *zdev = to_zpci(pdev);
	struct msi_desc *msi;
	unsigned int i;
	int rc;

	/* Disable interrupts */
	rc = zpci_clear_irq(zdev);
	if (rc)
		return;

	/* Release MSI interrupts */
	msi_for_each_desc(msi, &pdev->dev, MSI_DESC_ASSOCIATED) {
		for (i = 0; i < msi->nvec_used; i++) {
			irq_set_msi_desc(msi->irq + i, NULL);
			irq_free_desc(msi->irq + i);
		}
		msi->msg.address_lo = 0;
		msi->msg.address_hi = 0;
		msi->msg.data = 0;
		msi->irq = 0;
	}

	if (zdev->aisb != -1UL) {
		zpci_ibv[zdev->aisb] = NULL;
		airq_iv_free_bit(zpci_sbv, zdev->aisb);
		zdev->aisb = -1UL;
	}
	if (zdev->aibv) {
		airq_iv_release(zdev->aibv);
		zdev->aibv = NULL;
	}

	if ((irq_delivery == DIRECTED) && zdev->msi_first_bit != -1U)
		airq_iv_free(zpci_ibv[0], zdev->msi_first_bit, zdev->msi_nr_irqs);
}

bool arch_restore_msi_irqs(struct pci_dev *pdev)
{
	struct zpci_dev *zdev = to_zpci(pdev);

	if (!zdev->irqs_registered)
		zpci_set_irq(zdev);
	return true;
}

static struct airq_struct zpci_airq = {
	.handler = zpci_floating_irq_handler,
	.isc = PCI_ISC,
};

static void __init cpu_enable_directed_irq(void *unused)
{
	union zpci_sic_iib iib = {{0}};
	union zpci_sic_iib ziib = {{0}};

	iib.cdiib.dibv_addr = virt_to_phys(zpci_ibv[smp_processor_id()]->vector);

	zpci_set_irq_ctrl(SIC_IRQ_MODE_SET_CPU, 0, &iib);
	zpci_set_irq_ctrl(SIC_IRQ_MODE_D_SINGLE, PCI_ISC, &ziib);
}

static int __init zpci_directed_irq_init(void)
{
	union zpci_sic_iib iib = {{0}};
	unsigned int cpu;

	zpci_sbv = airq_iv_create(num_possible_cpus(), 0, NULL);
	if (!zpci_sbv)
		return -ENOMEM;

	iib.diib.isc = PCI_ISC;
	iib.diib.nr_cpus = num_possible_cpus();
	iib.diib.disb_addr = virt_to_phys(zpci_sbv->vector);
	zpci_set_irq_ctrl(SIC_IRQ_MODE_DIRECT, 0, &iib);

	zpci_ibv = kcalloc(num_possible_cpus(), sizeof(*zpci_ibv),
			   GFP_KERNEL);
	if (!zpci_ibv)
		return -ENOMEM;

	for_each_possible_cpu(cpu) {
		/*
		 * Per CPU IRQ vectors look the same but bit-allocation
		 * is only done on the first vector.
		 */
		zpci_ibv[cpu] = airq_iv_create(cache_line_size() * BITS_PER_BYTE,
					       AIRQ_IV_DATA |
					       AIRQ_IV_CACHELINE |
					       (!cpu ? AIRQ_IV_ALLOC : 0), NULL);
		if (!zpci_ibv[cpu])
			return -ENOMEM;
	}
	on_each_cpu(cpu_enable_directed_irq, NULL, 1);

	zpci_irq_chip.irq_set_affinity = zpci_set_irq_affinity;

	return 0;
}

static int __init zpci_floating_irq_init(void)
{
	zpci_ibv = kcalloc(ZPCI_NR_DEVICES, sizeof(*zpci_ibv), GFP_KERNEL);
	if (!zpci_ibv)
		return -ENOMEM;

	zpci_sbv = airq_iv_create(ZPCI_NR_DEVICES, AIRQ_IV_ALLOC, NULL);
	if (!zpci_sbv)
		goto out_free;

	return 0;

out_free:
	kfree(zpci_ibv);
	return -ENOMEM;
}

int __init zpci_irq_init(void)
{
	union zpci_sic_iib iib = {{0}};
	int rc;

	irq_delivery = sclp.has_dirq ? DIRECTED : FLOATING;
	if (s390_pci_force_floating)
		irq_delivery = FLOATING;

	if (irq_delivery == DIRECTED)
		zpci_airq.handler = zpci_directed_irq_handler;

	rc = register_adapter_interrupt(&zpci_airq);
	if (rc)
		goto out;
	/* Set summary to 1 to be called every time for the ISC. */
	*zpci_airq.lsi_ptr = 1;

	switch (irq_delivery) {
	case FLOATING:
		rc = zpci_floating_irq_init();
		break;
	case DIRECTED:
		rc = zpci_directed_irq_init();
		break;
	}

	if (rc)
		goto out_airq;

	/*
	 * Enable floating IRQs (with suppression after one IRQ). When using
	 * directed IRQs this enables the fallback path.
	 */
	zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, PCI_ISC, &iib);

	return 0;
out_airq:
	unregister_adapter_interrupt(&zpci_airq);
out:
	return rc;
}

void __init zpci_irq_exit(void)
{
	unsigned int cpu;

	if (irq_delivery == DIRECTED) {
		for_each_possible_cpu(cpu) {
			airq_iv_release(zpci_ibv[cpu]);
		}
	}
	kfree(zpci_ibv);
	if (zpci_sbv)
		airq_iv_release(zpci_sbv);
	unregister_adapter_interrupt(&zpci_airq);
}