Contributors: 3
Author Tokens Token Proportion Commits Commit Proportion
Sukadev Bhattiprolu 5212 99.67% 21 91.30%
Matthew Wilcox 10 0.19% 1 4.35%
Nicholas Piggin 7 0.13% 1 4.35%
Total 5229 23


/*
 * Copyright 2016-17 IBM Corp.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version
 * 2 of the License, or (at your option) any later version.
 */

#define pr_fmt(fmt) "vas: " fmt

#include <linux/types.h>
#include <linux/mutex.h>
#include <linux/slab.h>
#include <linux/io.h>
#include <linux/log2.h>
#include <linux/rcupdate.h>
#include <linux/cred.h>
#include <asm/switch_to.h>
#include <asm/ppc-opcode.h>
#include "vas.h"
#include "copy-paste.h"

#define CREATE_TRACE_POINTS
#include "vas-trace.h"

/*
 * Compute the paste address region for the window @window using the
 * ->paste_base_addr and ->paste_win_id_shift we got from device tree.
 */
static void compute_paste_address(struct vas_window *window, u64 *addr, int *len)
{
	int winid;
	u64 base, shift;

	base = window->vinst->paste_base_addr;
	shift = window->vinst->paste_win_id_shift;
	winid = window->winid;

	*addr  = base + (winid << shift);
	if (len)
		*len = PAGE_SIZE;

	pr_debug("Txwin #%d: Paste addr 0x%llx\n", winid, *addr);
}

u64 vas_win_paste_addr(struct vas_window *win)
{
	u64 addr;

	compute_paste_address(win, &addr, NULL);

	return addr;
}
EXPORT_SYMBOL(vas_win_paste_addr);

static inline void get_hvwc_mmio_bar(struct vas_window *window,
			u64 *start, int *len)
{
	u64 pbaddr;

	pbaddr = window->vinst->hvwc_bar_start;
	*start = pbaddr + window->winid * VAS_HVWC_SIZE;
	*len = VAS_HVWC_SIZE;
}

static inline void get_uwc_mmio_bar(struct vas_window *window,
			u64 *start, int *len)
{
	u64 pbaddr;

	pbaddr = window->vinst->uwc_bar_start;
	*start = pbaddr + window->winid * VAS_UWC_SIZE;
	*len = VAS_UWC_SIZE;
}

/*
 * Map the paste bus address of the given send window into kernel address
 * space. Unlike MMIO regions (map_mmio_region() below), paste region must
 * be mapped cache-able and is only applicable to send windows.
 */
static void *map_paste_region(struct vas_window *txwin)
{
	int len;
	void *map;
	char *name;
	u64 start;

	name = kasprintf(GFP_KERNEL, "window-v%d-w%d", txwin->vinst->vas_id,
				txwin->winid);
	if (!name)
		goto free_name;

	txwin->paste_addr_name = name;
	compute_paste_address(txwin, &start, &len);

	if (!request_mem_region(start, len, name)) {
		pr_devel("%s(): request_mem_region(0x%llx, %d) failed\n",
				__func__, start, len);
		goto free_name;
	}

	map = ioremap_cache(start, len);
	if (!map) {
		pr_devel("%s(): ioremap_cache(0x%llx, %d) failed\n", __func__,
				start, len);
		goto free_name;
	}

	pr_devel("Mapped paste addr 0x%llx to kaddr 0x%p\n", start, map);
	return map;

free_name:
	kfree(name);
	return ERR_PTR(-ENOMEM);
}

static void *map_mmio_region(char *name, u64 start, int len)
{
	void *map;

	if (!request_mem_region(start, len, name)) {
		pr_devel("%s(): request_mem_region(0x%llx, %d) failed\n",
				__func__, start, len);
		return NULL;
	}

	map = ioremap(start, len);
	if (!map) {
		pr_devel("%s(): ioremap(0x%llx, %d) failed\n", __func__, start,
				len);
		return NULL;
	}

	return map;
}

static void unmap_region(void *addr, u64 start, int len)
{
	iounmap(addr);
	release_mem_region((phys_addr_t)start, len);
}

/*
 * Unmap the paste address region for a window.
 */
static void unmap_paste_region(struct vas_window *window)
{
	int len;
	u64 busaddr_start;

	if (window->paste_kaddr) {
		compute_paste_address(window, &busaddr_start, &len);
		unmap_region(window->paste_kaddr, busaddr_start, len);
		window->paste_kaddr = NULL;
		kfree(window->paste_addr_name);
		window->paste_addr_name = NULL;
	}
}

/*
 * Unmap the MMIO regions for a window. Hold the vas_mutex so we don't
 * unmap when the window's debugfs dir is in use. This serializes close
 * of a window even on another VAS instance but since its not a critical
 * path, just minimize the time we hold the mutex for now. We can add
 * a per-instance mutex later if necessary.
 */
static void unmap_winctx_mmio_bars(struct vas_window *window)
{
	int len;
	void *uwc_map;
	void *hvwc_map;
	u64 busaddr_start;

	mutex_lock(&vas_mutex);

	hvwc_map = window->hvwc_map;
	window->hvwc_map = NULL;

	uwc_map = window->uwc_map;
	window->uwc_map = NULL;

	mutex_unlock(&vas_mutex);

	if (hvwc_map) {
		get_hvwc_mmio_bar(window, &busaddr_start, &len);
		unmap_region(hvwc_map, busaddr_start, len);
	}

	if (uwc_map) {
		get_uwc_mmio_bar(window, &busaddr_start, &len);
		unmap_region(uwc_map, busaddr_start, len);
	}
}

/*
 * Find the Hypervisor Window Context (HVWC) MMIO Base Address Region and the
 * OS/User Window Context (UWC) MMIO Base Address Region for the given window.
 * Map these bus addresses and save the mapped kernel addresses in @window.
 */
int map_winctx_mmio_bars(struct vas_window *window)
{
	int len;
	u64 start;

	get_hvwc_mmio_bar(window, &start, &len);
	window->hvwc_map = map_mmio_region("HVWCM_Window", start, len);

	get_uwc_mmio_bar(window, &start, &len);
	window->uwc_map = map_mmio_region("UWCM_Window", start, len);

	if (!window->hvwc_map || !window->uwc_map) {
		unmap_winctx_mmio_bars(window);
		return -1;
	}

	return 0;
}

/*
 * Reset all valid registers in the HV and OS/User Window Contexts for
 * the window identified by @window.
 *
 * NOTE: We cannot really use a for loop to reset window context. Not all
 *	 offsets in a window context are valid registers and the valid
 *	 registers are not sequential. And, we can only write to offsets
 *	 with valid registers.
 */
void reset_window_regs(struct vas_window *window)
{
	write_hvwc_reg(window, VREG(LPID), 0ULL);
	write_hvwc_reg(window, VREG(PID), 0ULL);
	write_hvwc_reg(window, VREG(XLATE_MSR), 0ULL);
	write_hvwc_reg(window, VREG(XLATE_LPCR), 0ULL);
	write_hvwc_reg(window, VREG(XLATE_CTL), 0ULL);
	write_hvwc_reg(window, VREG(AMR), 0ULL);
	write_hvwc_reg(window, VREG(SEIDR), 0ULL);
	write_hvwc_reg(window, VREG(FAULT_TX_WIN), 0ULL);
	write_hvwc_reg(window, VREG(OSU_INTR_SRC_RA), 0ULL);
	write_hvwc_reg(window, VREG(HV_INTR_SRC_RA), 0ULL);
	write_hvwc_reg(window, VREG(PSWID), 0ULL);
	write_hvwc_reg(window, VREG(LFIFO_BAR), 0ULL);
	write_hvwc_reg(window, VREG(LDATA_STAMP_CTL), 0ULL);
	write_hvwc_reg(window, VREG(LDMA_CACHE_CTL), 0ULL);
	write_hvwc_reg(window, VREG(LRFIFO_PUSH), 0ULL);
	write_hvwc_reg(window, VREG(CURR_MSG_COUNT), 0ULL);
	write_hvwc_reg(window, VREG(LNOTIFY_AFTER_COUNT), 0ULL);
	write_hvwc_reg(window, VREG(LRX_WCRED), 0ULL);
	write_hvwc_reg(window, VREG(LRX_WCRED_ADDER), 0ULL);
	write_hvwc_reg(window, VREG(TX_WCRED), 0ULL);
	write_hvwc_reg(window, VREG(TX_WCRED_ADDER), 0ULL);
	write_hvwc_reg(window, VREG(LFIFO_SIZE), 0ULL);
	write_hvwc_reg(window, VREG(WINCTL), 0ULL);
	write_hvwc_reg(window, VREG(WIN_STATUS), 0ULL);
	write_hvwc_reg(window, VREG(WIN_CTX_CACHING_CTL), 0ULL);
	write_hvwc_reg(window, VREG(TX_RSVD_BUF_COUNT), 0ULL);
	write_hvwc_reg(window, VREG(LRFIFO_WIN_PTR), 0ULL);
	write_hvwc_reg(window, VREG(LNOTIFY_CTL), 0ULL);
	write_hvwc_reg(window, VREG(LNOTIFY_PID), 0ULL);
	write_hvwc_reg(window, VREG(LNOTIFY_LPID), 0ULL);
	write_hvwc_reg(window, VREG(LNOTIFY_TID), 0ULL);
	write_hvwc_reg(window, VREG(LNOTIFY_SCOPE), 0ULL);
	write_hvwc_reg(window, VREG(NX_UTIL_ADDER), 0ULL);

	/* Skip read-only registers: NX_UTIL and NX_UTIL_SE */

	/*
	 * The send and receive window credit adder registers are also
	 * accessible from HVWC and have been initialized above. We don't
	 * need to initialize from the OS/User Window Context, so skip
	 * following calls:
	 *
	 *	write_uwc_reg(window, VREG(TX_WCRED_ADDER), 0ULL);
	 *	write_uwc_reg(window, VREG(LRX_WCRED_ADDER), 0ULL);
	 */
}

/*
 * Initialize window context registers related to Address Translation.
 * These registers are common to send/receive windows although they
 * differ for user/kernel windows. As we resolve the TODOs we may
 * want to add fields to vas_winctx and move the initialization to
 * init_vas_winctx_regs().
 */
static void init_xlate_regs(struct vas_window *window, bool user_win)
{
	u64 lpcr, val;

	/*
	 * MSR_TA, MSR_US are false for both kernel and user.
	 * MSR_DR and MSR_PR are false for kernel.
	 */
	val = 0ULL;
	val = SET_FIELD(VAS_XLATE_MSR_HV, val, 1);
	val = SET_FIELD(VAS_XLATE_MSR_SF, val, 1);
	if (user_win) {
		val = SET_FIELD(VAS_XLATE_MSR_DR, val, 1);
		val = SET_FIELD(VAS_XLATE_MSR_PR, val, 1);
	}
	write_hvwc_reg(window, VREG(XLATE_MSR), val);

	lpcr = mfspr(SPRN_LPCR);
	val = 0ULL;
	/*
	 * NOTE: From Section 5.7.8.1 Segment Lookaside Buffer of the
	 *	 Power ISA, v3.0B, Page size encoding is 0 = 4KB, 5 = 64KB.
	 *
	 * NOTE: From Section 1.3.1, Address Translation Context of the
	 *	 Nest MMU Workbook, LPCR_SC should be 0 for Power9.
	 */
	val = SET_FIELD(VAS_XLATE_LPCR_PAGE_SIZE, val, 5);
	val = SET_FIELD(VAS_XLATE_LPCR_ISL, val, lpcr & LPCR_ISL);
	val = SET_FIELD(VAS_XLATE_LPCR_TC, val, lpcr & LPCR_TC);
	val = SET_FIELD(VAS_XLATE_LPCR_SC, val, 0);
	write_hvwc_reg(window, VREG(XLATE_LPCR), val);

	/*
	 * Section 1.3.1 (Address translation Context) of NMMU workbook.
	 *	0b00	Hashed Page Table mode
	 *	0b01	Reserved
	 *	0b10	Radix on HPT
	 *	0b11	Radix on Radix
	 */
	val = 0ULL;
	val = SET_FIELD(VAS_XLATE_MODE, val, radix_enabled() ? 3 : 2);
	write_hvwc_reg(window, VREG(XLATE_CTL), val);

	/*
	 * TODO: Can we mfspr(AMR) even for user windows?
	 */
	val = 0ULL;
	val = SET_FIELD(VAS_AMR, val, mfspr(SPRN_AMR));
	write_hvwc_reg(window, VREG(AMR), val);

	val = 0ULL;
	val = SET_FIELD(VAS_SEIDR, val, 0);
	write_hvwc_reg(window, VREG(SEIDR), val);
}

/*
 * Initialize Reserved Send Buffer Count for the send window. It involves
 * writing to the register, reading it back to confirm that the hardware
 * has enough buffers to reserve. See section 1.3.1.2.1 of VAS workbook.
 *
 * Since we can only make a best-effort attempt to fulfill the request,
 * we don't return any errors if we cannot.
 *
 * TODO: Reserved (aka dedicated) send buffers are not supported yet.
 */
static void init_rsvd_tx_buf_count(struct vas_window *txwin,
				struct vas_winctx *winctx)
{
	write_hvwc_reg(txwin, VREG(TX_RSVD_BUF_COUNT), 0ULL);
}

/*
 * init_winctx_regs()
 *	Initialize window context registers for a receive window.
 *	Except for caching control and marking window open, the registers
 *	are initialized in the order listed in Section 3.1.4 (Window Context
 *	Cache Register Details) of the VAS workbook although they don't need
 *	to be.
 *
 * Design note: For NX receive windows, NX allocates the FIFO buffer in OPAL
 *	(so that it can get a large contiguous area) and passes that buffer
 *	to kernel via device tree. We now write that buffer address to the
 *	FIFO BAR. Would it make sense to do this all in OPAL? i.e have OPAL
 *	write the per-chip RX FIFO addresses to the windows during boot-up
 *	as a one-time task? That could work for NX but what about other
 *	receivers?  Let the receivers tell us the rx-fifo buffers for now.
 */
int init_winctx_regs(struct vas_window *window, struct vas_winctx *winctx)
{
	u64 val;
	int fifo_size;

	reset_window_regs(window);

	val = 0ULL;
	val = SET_FIELD(VAS_LPID, val, winctx->lpid);
	write_hvwc_reg(window, VREG(LPID), val);

	val = 0ULL;
	val = SET_FIELD(VAS_PID_ID, val, winctx->pidr);
	write_hvwc_reg(window, VREG(PID), val);

	init_xlate_regs(window, winctx->user_win);

	val = 0ULL;
	val = SET_FIELD(VAS_FAULT_TX_WIN, val, 0);
	write_hvwc_reg(window, VREG(FAULT_TX_WIN), val);

	/* In PowerNV, interrupts go to HV. */
	write_hvwc_reg(window, VREG(OSU_INTR_SRC_RA), 0ULL);

	val = 0ULL;
	val = SET_FIELD(VAS_HV_INTR_SRC_RA, val, winctx->irq_port);
	write_hvwc_reg(window, VREG(HV_INTR_SRC_RA), val);

	val = 0ULL;
	val = SET_FIELD(VAS_PSWID_EA_HANDLE, val, winctx->pswid);
	write_hvwc_reg(window, VREG(PSWID), val);

	write_hvwc_reg(window, VREG(SPARE1), 0ULL);
	write_hvwc_reg(window, VREG(SPARE2), 0ULL);
	write_hvwc_reg(window, VREG(SPARE3), 0ULL);

	/*
	 * NOTE: VAS expects the FIFO address to be copied into the LFIFO_BAR
	 *	 register as is - do NOT shift the address into VAS_LFIFO_BAR
	 *	 bit fields! Ok to set the page migration select fields -
	 *	 VAS ignores the lower 10+ bits in the address anyway, because
	 *	 the minimum FIFO size is 1K?
	 *
	 * See also: Design note in function header.
	 */
	val = __pa(winctx->rx_fifo);
	val = SET_FIELD(VAS_PAGE_MIGRATION_SELECT, val, 0);
	write_hvwc_reg(window, VREG(LFIFO_BAR), val);

	val = 0ULL;
	val = SET_FIELD(VAS_LDATA_STAMP, val, winctx->data_stamp);
	write_hvwc_reg(window, VREG(LDATA_STAMP_CTL), val);

	val = 0ULL;
	val = SET_FIELD(VAS_LDMA_TYPE, val, winctx->dma_type);
	val = SET_FIELD(VAS_LDMA_FIFO_DISABLE, val, winctx->fifo_disable);
	write_hvwc_reg(window, VREG(LDMA_CACHE_CTL), val);

	write_hvwc_reg(window, VREG(LRFIFO_PUSH), 0ULL);
	write_hvwc_reg(window, VREG(CURR_MSG_COUNT), 0ULL);
	write_hvwc_reg(window, VREG(LNOTIFY_AFTER_COUNT), 0ULL);

	val = 0ULL;
	val = SET_FIELD(VAS_LRX_WCRED, val, winctx->wcreds_max);
	write_hvwc_reg(window, VREG(LRX_WCRED), val);

	val = 0ULL;
	val = SET_FIELD(VAS_TX_WCRED, val, winctx->wcreds_max);
	write_hvwc_reg(window, VREG(TX_WCRED), val);

	write_hvwc_reg(window, VREG(LRX_WCRED_ADDER), 0ULL);
	write_hvwc_reg(window, VREG(TX_WCRED_ADDER), 0ULL);

	fifo_size = winctx->rx_fifo_size / 1024;

	val = 0ULL;
	val = SET_FIELD(VAS_LFIFO_SIZE, val, ilog2(fifo_size));
	write_hvwc_reg(window, VREG(LFIFO_SIZE), val);

	/* Update window control and caching control registers last so
	 * we mark the window open only after fully initializing it and
	 * pushing context to cache.
	 */

	write_hvwc_reg(window, VREG(WIN_STATUS), 0ULL);

	init_rsvd_tx_buf_count(window, winctx);

	/* for a send window, point to the matching receive window */
	val = 0ULL;
	val = SET_FIELD(VAS_LRX_WIN_ID, val, winctx->rx_win_id);
	write_hvwc_reg(window, VREG(LRFIFO_WIN_PTR), val);

	write_hvwc_reg(window, VREG(SPARE4), 0ULL);

	val = 0ULL;
	val = SET_FIELD(VAS_NOTIFY_DISABLE, val, winctx->notify_disable);
	val = SET_FIELD(VAS_INTR_DISABLE, val, winctx->intr_disable);
	val = SET_FIELD(VAS_NOTIFY_EARLY, val, winctx->notify_early);
	val = SET_FIELD(VAS_NOTIFY_OSU_INTR, val, winctx->notify_os_intr_reg);
	write_hvwc_reg(window, VREG(LNOTIFY_CTL), val);

	val = 0ULL;
	val = SET_FIELD(VAS_LNOTIFY_PID, val, winctx->lnotify_pid);
	write_hvwc_reg(window, VREG(LNOTIFY_PID), val);

	val = 0ULL;
	val = SET_FIELD(VAS_LNOTIFY_LPID, val, winctx->lnotify_lpid);
	write_hvwc_reg(window, VREG(LNOTIFY_LPID), val);

	val = 0ULL;
	val = SET_FIELD(VAS_LNOTIFY_TID, val, winctx->lnotify_tid);
	write_hvwc_reg(window, VREG(LNOTIFY_TID), val);

	val = 0ULL;
	val = SET_FIELD(VAS_LNOTIFY_MIN_SCOPE, val, winctx->min_scope);
	val = SET_FIELD(VAS_LNOTIFY_MAX_SCOPE, val, winctx->max_scope);
	write_hvwc_reg(window, VREG(LNOTIFY_SCOPE), val);

	/* Skip read-only registers NX_UTIL and NX_UTIL_SE */

	write_hvwc_reg(window, VREG(SPARE5), 0ULL);
	write_hvwc_reg(window, VREG(NX_UTIL_ADDER), 0ULL);
	write_hvwc_reg(window, VREG(SPARE6), 0ULL);

	/* Finally, push window context to memory and... */
	val = 0ULL;
	val = SET_FIELD(VAS_PUSH_TO_MEM, val, 1);
	write_hvwc_reg(window, VREG(WIN_CTX_CACHING_CTL), val);

	/* ... mark the window open for business */
	val = 0ULL;
	val = SET_FIELD(VAS_WINCTL_REJ_NO_CREDIT, val, winctx->rej_no_credit);
	val = SET_FIELD(VAS_WINCTL_PIN, val, winctx->pin_win);
	val = SET_FIELD(VAS_WINCTL_TX_WCRED_MODE, val, winctx->tx_wcred_mode);
	val = SET_FIELD(VAS_WINCTL_RX_WCRED_MODE, val, winctx->rx_wcred_mode);
	val = SET_FIELD(VAS_WINCTL_TX_WORD_MODE, val, winctx->tx_word_mode);
	val = SET_FIELD(VAS_WINCTL_RX_WORD_MODE, val, winctx->rx_word_mode);
	val = SET_FIELD(VAS_WINCTL_FAULT_WIN, val, winctx->fault_win);
	val = SET_FIELD(VAS_WINCTL_NX_WIN, val, winctx->nx_win);
	val = SET_FIELD(VAS_WINCTL_OPEN, val, 1);
	write_hvwc_reg(window, VREG(WINCTL), val);

	return 0;
}

static void vas_release_window_id(struct ida *ida, int winid)
{
	ida_free(ida, winid);
}

static int vas_assign_window_id(struct ida *ida)
{
	int winid = ida_alloc_max(ida, VAS_WINDOWS_PER_CHIP - 1, GFP_KERNEL);

	if (winid == -ENOSPC) {
		pr_err("Too many (%d) open windows\n", VAS_WINDOWS_PER_CHIP);
		return -EAGAIN;
	}

	return winid;
}

static void vas_window_free(struct vas_window *window)
{
	int winid = window->winid;
	struct vas_instance *vinst = window->vinst;

	unmap_winctx_mmio_bars(window);

	vas_window_free_dbgdir(window);

	kfree(window);

	vas_release_window_id(&vinst->ida, winid);
}

static struct vas_window *vas_window_alloc(struct vas_instance *vinst)
{
	int winid;
	struct vas_window *window;

	winid = vas_assign_window_id(&vinst->ida);
	if (winid < 0)
		return ERR_PTR(winid);

	window = kzalloc(sizeof(*window), GFP_KERNEL);
	if (!window)
		goto out_free;

	window->vinst = vinst;
	window->winid = winid;

	if (map_winctx_mmio_bars(window))
		goto out_free;

	vas_window_init_dbgdir(window);

	return window;

out_free:
	kfree(window);
	vas_release_window_id(&vinst->ida, winid);
	return ERR_PTR(-ENOMEM);
}

static void put_rx_win(struct vas_window *rxwin)
{
	/* Better not be a send window! */
	WARN_ON_ONCE(rxwin->tx_win);

	atomic_dec(&rxwin->num_txwins);
}

/*
 * Find the user space receive window given the @pswid.
 *      - We must have a valid vasid and it must belong to this instance.
 *        (so both send and receive windows are on the same VAS instance)
 *      - The window must refer to an OPEN, FTW, RECEIVE window.
 *
 * NOTE: We access ->windows[] table and assume that vinst->mutex is held.
 */
static struct vas_window *get_user_rxwin(struct vas_instance *vinst, u32 pswid)
{
	int vasid, winid;
	struct vas_window *rxwin;

	decode_pswid(pswid, &vasid, &winid);

	if (vinst->vas_id != vasid)
		return ERR_PTR(-EINVAL);

	rxwin = vinst->windows[winid];

	if (!rxwin || rxwin->tx_win || rxwin->cop != VAS_COP_TYPE_FTW)
		return ERR_PTR(-EINVAL);

	return rxwin;
}

/*
 * Get the VAS receive window associated with NX engine identified
 * by @cop and if applicable, @pswid.
 *
 * See also function header of set_vinst_win().
 */
static struct vas_window *get_vinst_rxwin(struct vas_instance *vinst,
			enum vas_cop_type cop, u32 pswid)
{
	struct vas_window *rxwin;

	mutex_lock(&vinst->mutex);

	if (cop == VAS_COP_TYPE_FTW)
		rxwin = get_user_rxwin(vinst, pswid);
	else
		rxwin = vinst->rxwin[cop] ?: ERR_PTR(-EINVAL);

	if (!IS_ERR(rxwin))
		atomic_inc(&rxwin->num_txwins);

	mutex_unlock(&vinst->mutex);

	return rxwin;
}

/*
 * We have two tables of windows in a VAS instance. The first one,
 * ->windows[], contains all the windows in the instance and allows
 * looking up a window by its id. It is used to look up send windows
 * during fault handling and receive windows when pairing user space
 * send/receive windows.
 *
 * The second table, ->rxwin[], contains receive windows that are
 * associated with NX engines. This table has VAS_COP_TYPE_MAX
 * entries and is used to look up a receive window by its
 * coprocessor type.
 *
 * Here, we save @window in the ->windows[] table. If it is a receive
 * window, we also save the window in the ->rxwin[] table.
 */
static void set_vinst_win(struct vas_instance *vinst,
			struct vas_window *window)
{
	int id = window->winid;

	mutex_lock(&vinst->mutex);

	/*
	 * There should only be one receive window for a coprocessor type
	 * unless its a user (FTW) window.
	 */
	if (!window->user_win && !window->tx_win) {
		WARN_ON_ONCE(vinst->rxwin[window->cop]);
		vinst->rxwin[window->cop] = window;
	}

	WARN_ON_ONCE(vinst->windows[id] != NULL);
	vinst->windows[id] = window;

	mutex_unlock(&vinst->mutex);
}

/*
 * Clear this window from the table(s) of windows for this VAS instance.
 * See also function header of set_vinst_win().
 */
static void clear_vinst_win(struct vas_window *window)
{
	int id = window->winid;
	struct vas_instance *vinst = window->vinst;

	mutex_lock(&vinst->mutex);

	if (!window->user_win && !window->tx_win) {
		WARN_ON_ONCE(!vinst->rxwin[window->cop]);
		vinst->rxwin[window->cop] = NULL;
	}

	WARN_ON_ONCE(vinst->windows[id] != window);
	vinst->windows[id] = NULL;

	mutex_unlock(&vinst->mutex);
}

static void init_winctx_for_rxwin(struct vas_window *rxwin,
			struct vas_rx_win_attr *rxattr,
			struct vas_winctx *winctx)
{
	/*
	 * We first zero (memset()) all fields and only set non-zero fields.
	 * Following fields are 0/false but maybe deserve a comment:
	 *
	 *	->notify_os_intr_reg	In powerNV, send intrs to HV
	 *	->notify_disable	False for NX windows
	 *	->intr_disable		False for Fault Windows
	 *	->xtra_write		False for NX windows
	 *	->notify_early		NA for NX windows
	 *	->rsvd_txbuf_count	NA for Rx windows
	 *	->lpid, ->pid, ->tid	NA for Rx windows
	 */

	memset(winctx, 0, sizeof(struct vas_winctx));

	winctx->rx_fifo = rxattr->rx_fifo;
	winctx->rx_fifo_size = rxattr->rx_fifo_size;
	winctx->wcreds_max = rxwin->wcreds_max;
	winctx->pin_win = rxattr->pin_win;

	winctx->nx_win = rxattr->nx_win;
	winctx->fault_win = rxattr->fault_win;
	winctx->user_win = rxattr->user_win;
	winctx->rej_no_credit = rxattr->rej_no_credit;
	winctx->rx_word_mode = rxattr->rx_win_ord_mode;
	winctx->tx_word_mode = rxattr->tx_win_ord_mode;
	winctx->rx_wcred_mode = rxattr->rx_wcred_mode;
	winctx->tx_wcred_mode = rxattr->tx_wcred_mode;
	winctx->notify_early = rxattr->notify_early;

	if (winctx->nx_win) {
		winctx->data_stamp = true;
		winctx->intr_disable = true;
		winctx->pin_win = true;

		WARN_ON_ONCE(winctx->fault_win);
		WARN_ON_ONCE(!winctx->rx_word_mode);
		WARN_ON_ONCE(!winctx->tx_word_mode);
		WARN_ON_ONCE(winctx->notify_after_count);
	} else if (winctx->fault_win) {
		winctx->notify_disable = true;
	} else if (winctx->user_win) {
		/*
		 * Section 1.8.1 Low Latency Core-Core Wake up of
		 * the VAS workbook:
		 *
		 *      - disable credit checks ([tr]x_wcred_mode = false)
		 *      - disable FIFO writes
		 *      - enable ASB_Notify, disable interrupt
		 */
		winctx->fifo_disable = true;
		winctx->intr_disable = true;
		winctx->rx_fifo = NULL;
	}

	winctx->lnotify_lpid = rxattr->lnotify_lpid;
	winctx->lnotify_pid = rxattr->lnotify_pid;
	winctx->lnotify_tid = rxattr->lnotify_tid;
	winctx->pswid = rxattr->pswid;
	winctx->dma_type = VAS_DMA_TYPE_INJECT;
	winctx->tc_mode = rxattr->tc_mode;

	winctx->min_scope = VAS_SCOPE_LOCAL;
	winctx->max_scope = VAS_SCOPE_VECTORED_GROUP;
}

static bool rx_win_args_valid(enum vas_cop_type cop,
			struct vas_rx_win_attr *attr)
{
	pr_debug("Rxattr: fault %d, notify %d, intr %d, early %d, fifo %d\n",
			attr->fault_win, attr->notify_disable,
			attr->intr_disable, attr->notify_early,
			attr->rx_fifo_size);

	if (cop >= VAS_COP_TYPE_MAX)
		return false;

	if (cop != VAS_COP_TYPE_FTW &&
				attr->rx_fifo_size < VAS_RX_FIFO_SIZE_MIN)
		return false;

	if (attr->rx_fifo_size > VAS_RX_FIFO_SIZE_MAX)
		return false;

	if (attr->wcreds_max > VAS_RX_WCREDS_MAX)
		return false;

	if (attr->nx_win) {
		/* cannot be fault or user window if it is nx */
		if (attr->fault_win || attr->user_win)
			return false;
		/*
		 * Section 3.1.4.32: NX Windows must not disable notification,
		 *	and must not enable interrupts or early notification.
		 */
		if (attr->notify_disable || !attr->intr_disable ||
				attr->notify_early)
			return false;
	} else if (attr->fault_win) {
		/* cannot be both fault and user window */
		if (attr->user_win)
			return false;

		/*
		 * Section 3.1.4.32: Fault windows must disable notification
		 *	but not interrupts.
		 */
		if (!attr->notify_disable || attr->intr_disable)
			return false;

	} else if (attr->user_win) {
		/*
		 * User receive windows are only for fast-thread-wakeup
		 * (FTW). They don't need a FIFO and must disable interrupts
		 */
		if (attr->rx_fifo || attr->rx_fifo_size || !attr->intr_disable)
			return false;
	} else {
		/* Rx window must be one of NX or Fault or User window. */
		return false;
	}

	return true;
}

void vas_init_rx_win_attr(struct vas_rx_win_attr *rxattr, enum vas_cop_type cop)
{
	memset(rxattr, 0, sizeof(*rxattr));

	if (cop == VAS_COP_TYPE_842 || cop == VAS_COP_TYPE_842_HIPRI) {
		rxattr->pin_win = true;
		rxattr->nx_win = true;
		rxattr->fault_win = false;
		rxattr->intr_disable = true;
		rxattr->rx_wcred_mode = true;
		rxattr->tx_wcred_mode = true;
		rxattr->rx_win_ord_mode = true;
		rxattr->tx_win_ord_mode = true;
	} else if (cop == VAS_COP_TYPE_FAULT) {
		rxattr->pin_win = true;
		rxattr->fault_win = true;
		rxattr->notify_disable = true;
		rxattr->rx_wcred_mode = true;
		rxattr->tx_wcred_mode = true;
		rxattr->rx_win_ord_mode = true;
		rxattr->tx_win_ord_mode = true;
	} else if (cop == VAS_COP_TYPE_FTW) {
		rxattr->user_win = true;
		rxattr->intr_disable = true;

		/*
		 * As noted in the VAS Workbook we disable credit checks.
		 * If we enable credit checks in the future, we must also
		 * implement a mechanism to return the user credits or new
		 * paste operations will fail.
		 */
	}
}
EXPORT_SYMBOL_GPL(vas_init_rx_win_attr);

struct vas_window *vas_rx_win_open(int vasid, enum vas_cop_type cop,
			struct vas_rx_win_attr *rxattr)
{
	struct vas_window *rxwin;
	struct vas_winctx winctx;
	struct vas_instance *vinst;

	trace_vas_rx_win_open(current, vasid, cop, rxattr);

	if (!rx_win_args_valid(cop, rxattr))
		return ERR_PTR(-EINVAL);

	vinst = find_vas_instance(vasid);
	if (!vinst) {
		pr_devel("vasid %d not found!\n", vasid);
		return ERR_PTR(-EINVAL);
	}
	pr_devel("Found instance %d\n", vasid);

	rxwin = vas_window_alloc(vinst);
	if (IS_ERR(rxwin)) {
		pr_devel("Unable to allocate memory for Rx window\n");
		return rxwin;
	}

	rxwin->tx_win = false;
	rxwin->nx_win = rxattr->nx_win;
	rxwin->user_win = rxattr->user_win;
	rxwin->cop = cop;
	rxwin->wcreds_max = rxattr->wcreds_max ?: VAS_WCREDS_DEFAULT;
	if (rxattr->user_win)
		rxwin->pid = task_pid_vnr(current);

	init_winctx_for_rxwin(rxwin, rxattr, &winctx);
	init_winctx_regs(rxwin, &winctx);

	set_vinst_win(vinst, rxwin);

	return rxwin;
}
EXPORT_SYMBOL_GPL(vas_rx_win_open);

void vas_init_tx_win_attr(struct vas_tx_win_attr *txattr, enum vas_cop_type cop)
{
	memset(txattr, 0, sizeof(*txattr));

	if (cop == VAS_COP_TYPE_842 || cop == VAS_COP_TYPE_842_HIPRI) {
		txattr->rej_no_credit = false;
		txattr->rx_wcred_mode = true;
		txattr->tx_wcred_mode = true;
		txattr->rx_win_ord_mode = true;
		txattr->tx_win_ord_mode = true;
	} else if (cop == VAS_COP_TYPE_FTW) {
		txattr->user_win = true;
	}
}
EXPORT_SYMBOL_GPL(vas_init_tx_win_attr);

static void init_winctx_for_txwin(struct vas_window *txwin,
			struct vas_tx_win_attr *txattr,
			struct vas_winctx *winctx)
{
	/*
	 * We first zero all fields and only set non-zero ones. Following
	 * are some fields set to 0/false for the stated reason:
	 *
	 *	->notify_os_intr_reg	In powernv, send intrs to HV
	 *	->rsvd_txbuf_count	Not supported yet.
	 *	->notify_disable	False for NX windows
	 *	->xtra_write		False for NX windows
	 *	->notify_early		NA for NX windows
	 *	->lnotify_lpid		NA for Tx windows
	 *	->lnotify_pid		NA for Tx windows
	 *	->lnotify_tid		NA for Tx windows
	 *	->tx_win_cred_mode	Ignore for now for NX windows
	 *	->rx_win_cred_mode	Ignore for now for NX windows
	 */
	memset(winctx, 0, sizeof(struct vas_winctx));

	winctx->wcreds_max = txwin->wcreds_max;

	winctx->user_win = txattr->user_win;
	winctx->nx_win = txwin->rxwin->nx_win;
	winctx->pin_win = txattr->pin_win;
	winctx->rej_no_credit = txattr->rej_no_credit;
	winctx->rsvd_txbuf_enable = txattr->rsvd_txbuf_enable;

	winctx->rx_wcred_mode = txattr->rx_wcred_mode;
	winctx->tx_wcred_mode = txattr->tx_wcred_mode;
	winctx->rx_word_mode = txattr->rx_win_ord_mode;
	winctx->tx_word_mode = txattr->tx_win_ord_mode;
	winctx->rsvd_txbuf_count = txattr->rsvd_txbuf_count;

	winctx->intr_disable = true;
	if (winctx->nx_win)
		winctx->data_stamp = true;

	winctx->lpid = txattr->lpid;
	winctx->pidr = txattr->pidr;
	winctx->rx_win_id = txwin->rxwin->winid;

	winctx->dma_type = VAS_DMA_TYPE_INJECT;
	winctx->tc_mode = txattr->tc_mode;
	winctx->min_scope = VAS_SCOPE_LOCAL;
	winctx->max_scope = VAS_SCOPE_VECTORED_GROUP;

	winctx->pswid = 0;
}

static bool tx_win_args_valid(enum vas_cop_type cop,
			struct vas_tx_win_attr *attr)
{
	if (attr->tc_mode != VAS_THRESH_DISABLED)
		return false;

	if (cop > VAS_COP_TYPE_MAX)
		return false;

	if (attr->wcreds_max > VAS_TX_WCREDS_MAX)
		return false;

	if (attr->user_win &&
			(cop != VAS_COP_TYPE_FTW || attr->rsvd_txbuf_count))
		return false;

	return true;
}

struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop,
			struct vas_tx_win_attr *attr)
{
	int rc;
	struct vas_window *txwin;
	struct vas_window *rxwin;
	struct vas_winctx winctx;
	struct vas_instance *vinst;

	trace_vas_tx_win_open(current, vasid, cop, attr);

	if (!tx_win_args_valid(cop, attr))
		return ERR_PTR(-EINVAL);

	/*
	 * If caller did not specify a vasid but specified the PSWID of a
	 * receive window (applicable only to FTW windows), use the vasid
	 * from that receive window.
	 */
	if (vasid == -1 && attr->pswid)
		decode_pswid(attr->pswid, &vasid, NULL);

	vinst = find_vas_instance(vasid);
	if (!vinst) {
		pr_devel("vasid %d not found!\n", vasid);
		return ERR_PTR(-EINVAL);
	}

	rxwin = get_vinst_rxwin(vinst, cop, attr->pswid);
	if (IS_ERR(rxwin)) {
		pr_devel("No RxWin for vasid %d, cop %d\n", vasid, cop);
		return rxwin;
	}

	txwin = vas_window_alloc(vinst);
	if (IS_ERR(txwin)) {
		rc = PTR_ERR(txwin);
		goto put_rxwin;
	}

	txwin->cop = cop;
	txwin->tx_win = 1;
	txwin->rxwin = rxwin;
	txwin->nx_win = txwin->rxwin->nx_win;
	txwin->pid = attr->pid;
	txwin->user_win = attr->user_win;
	txwin->wcreds_max = attr->wcreds_max ?: VAS_WCREDS_DEFAULT;

	init_winctx_for_txwin(txwin, attr, &winctx);

	init_winctx_regs(txwin, &winctx);

	/*
	 * If its a kernel send window, map the window address into the
	 * kernel's address space. For user windows, user must issue an
	 * mmap() to map the window into their address space.
	 *
	 * NOTE: If kernel ever resubmits a user CRB after handling a page
	 *	 fault, we will need to map this into kernel as well.
	 */
	if (!txwin->user_win) {
		txwin->paste_kaddr = map_paste_region(txwin);
		if (IS_ERR(txwin->paste_kaddr)) {
			rc = PTR_ERR(txwin->paste_kaddr);
			goto free_window;
		}
	} else {
		/*
		 * A user mapping must ensure that context switch issues
		 * CP_ABORT for this thread.
		 */
		rc = set_thread_uses_vas();
		if (rc)
			goto free_window;
	}

	set_vinst_win(vinst, txwin);

	return txwin;

free_window:
	vas_window_free(txwin);

put_rxwin:
	put_rx_win(rxwin);
	return ERR_PTR(rc);

}
EXPORT_SYMBOL_GPL(vas_tx_win_open);

int vas_copy_crb(void *crb, int offset)
{
	return vas_copy(crb, offset);
}
EXPORT_SYMBOL_GPL(vas_copy_crb);

#define RMA_LSMP_REPORT_ENABLE PPC_BIT(53)
int vas_paste_crb(struct vas_window *txwin, int offset, bool re)
{
	int rc;
	void *addr;
	uint64_t val;

	trace_vas_paste_crb(current, txwin);

	/*
	 * Only NX windows are supported for now and hardware assumes
	 * report-enable flag is set for NX windows. Ensure software
	 * complies too.
	 */
	WARN_ON_ONCE(txwin->nx_win && !re);

	addr = txwin->paste_kaddr;
	if (re) {
		/*
		 * Set the REPORT_ENABLE bit (equivalent to writing
		 * to 1K offset of the paste address)
		 */
		val = SET_FIELD(RMA_LSMP_REPORT_ENABLE, 0ULL, 1);
		addr += val;
	}

	/*
	 * Map the raw CR value from vas_paste() to an error code (there
	 * is just pass or fail for now though).
	 */
	rc = vas_paste(addr, offset);
	if (rc == 2)
		rc = 0;
	else
		rc = -EINVAL;

	pr_debug("Txwin #%d: Msg count %llu\n", txwin->winid,
			read_hvwc_reg(txwin, VREG(LRFIFO_PUSH)));

	return rc;
}
EXPORT_SYMBOL_GPL(vas_paste_crb);

/*
 * If credit checking is enabled for this window, poll for the return
 * of window credits (i.e for NX engines to process any outstanding CRBs).
 * Since NX-842 waits for the CRBs to be processed before closing the
 * window, we should not have to wait for too long.
 *
 * TODO: We retry in 10ms intervals now. We could/should probably peek at
 *	the VAS_LRFIFO_PUSH_OFFSET register to get an estimate of pending
 *	CRBs on the FIFO and compute the delay dynamically on each retry.
 *	But that is not really needed until we support NX-GZIP access from
 *	user space. (NX-842 driver waits for CSB and Fast thread-wakeup
 *	doesn't use credit checking).
 */
static void poll_window_credits(struct vas_window *window)
{
	u64 val;
	int creds, mode;

	val = read_hvwc_reg(window, VREG(WINCTL));
	if (window->tx_win)
		mode = GET_FIELD(VAS_WINCTL_TX_WCRED_MODE, val);
	else
		mode = GET_FIELD(VAS_WINCTL_RX_WCRED_MODE, val);

	if (!mode)
		return;
retry:
	if (window->tx_win) {
		val = read_hvwc_reg(window, VREG(TX_WCRED));
		creds = GET_FIELD(VAS_TX_WCRED, val);
	} else {
		val = read_hvwc_reg(window, VREG(LRX_WCRED));
		creds = GET_FIELD(VAS_LRX_WCRED, val);
	}

	if (creds < window->wcreds_max) {
		val = 0;
		set_current_state(TASK_UNINTERRUPTIBLE);
		schedule_timeout(msecs_to_jiffies(10));
		goto retry;
	}
}

/*
 * Wait for the window to go to "not-busy" state. It should only take a
 * short time to queue a CRB, so window should not be busy for too long.
 * Trying 5ms intervals.
 */
static void poll_window_busy_state(struct vas_window *window)
{
	int busy;
	u64 val;

retry:
	val = read_hvwc_reg(window, VREG(WIN_STATUS));
	busy = GET_FIELD(VAS_WIN_BUSY, val);
	if (busy) {
		val = 0;
		set_current_state(TASK_UNINTERRUPTIBLE);
		schedule_timeout(msecs_to_jiffies(5));
		goto retry;
	}
}

/*
 * Have the hardware cast a window out of cache and wait for it to
 * be completed.
 *
 * NOTE: It can take a relatively long time to cast the window context
 *	out of the cache. It is not strictly necessary to cast out if:
 *
 *	- we clear the "Pin Window" bit (so hardware is free to evict)
 *
 *	- we re-initialize the window context when it is reassigned.
 *
 *	We do the former in vas_win_close() and latter in vas_win_open().
 *	So, ignoring the cast-out for now. We can add it as needed. If
 *	casting out becomes necessary we should consider offloading the
 *	job to a worker thread, so the window close can proceed quickly.
 */
static void poll_window_castout(struct vas_window *window)
{
	/* stub for now */
}

/*
 * Unpin and close a window so no new requests are accepted and the
 * hardware can evict this window from cache if necessary.
 */
static void unpin_close_window(struct vas_window *window)
{
	u64 val;

	val = read_hvwc_reg(window, VREG(WINCTL));
	val = SET_FIELD(VAS_WINCTL_PIN, val, 0);
	val = SET_FIELD(VAS_WINCTL_OPEN, val, 0);
	write_hvwc_reg(window, VREG(WINCTL), val);
}

/*
 * Close a window.
 *
 * See Section 1.12.1 of VAS workbook v1.05 for details on closing window:
 *	- Disable new paste operations (unmap paste address)
 *	- Poll for the "Window Busy" bit to be cleared
 *	- Clear the Open/Enable bit for the Window.
 *	- Poll for return of window Credits (implies FIFO empty for Rx win?)
 *	- Unpin and cast window context out of cache
 *
 * Besides the hardware, kernel has some bookkeeping of course.
 */
int vas_win_close(struct vas_window *window)
{
	if (!window)
		return 0;

	if (!window->tx_win && atomic_read(&window->num_txwins) != 0) {
		pr_devel("Attempting to close an active Rx window!\n");
		WARN_ON_ONCE(1);
		return -EBUSY;
	}

	unmap_paste_region(window);

	clear_vinst_win(window);

	poll_window_busy_state(window);

	unpin_close_window(window);

	poll_window_credits(window);

	poll_window_castout(window);

	/* if send window, drop reference to matching receive window */
	if (window->tx_win)
		put_rx_win(window->rxwin);

	vas_window_free(window);

	return 0;
}
EXPORT_SYMBOL_GPL(vas_win_close);

/*
 * Return a system-wide unique window id for the window @win.
 */
u32 vas_win_id(struct vas_window *win)
{
	return encode_pswid(win->vinst->vas_id, win->winid);
}
EXPORT_SYMBOL_GPL(vas_win_id);