cregit-Linux how code gets into the kernel

Release 4.14 drivers/ntb/test/ntb_perf.c

Directory: drivers/ntb/test
/*
 * This file is provided under a dual BSD/GPLv2 license.  When using or
 *   redistributing this file, you may do so under either license.
 *
 *   GPL LICENSE SUMMARY
 *
 *   Copyright(c) 2015 Intel Corporation. All rights reserved.
 *
 *   This program is free software; you can redistribute it and/or modify
 *   it under the terms of version 2 of the GNU General Public License as
 *   published by the Free Software Foundation.
 *
 *   BSD LICENSE
 *
 *   Copyright(c) 2015 Intel Corporation. All rights reserved.
 *
 *   Redistribution and use in source and binary forms, with or without
 *   modification, are permitted provided that the following conditions
 *   are met:
 *
 *     * Redistributions of source code must retain the above copyright
 *       notice, this list of conditions and the following disclaimer.
 *     * Redistributions in binary form must reproduce the above copy
 *       notice, this list of conditions and the following disclaimer in
 *       the documentation and/or other materials provided with the
 *       distribution.
 *     * Neither the name of Intel Corporation nor the names of its
 *       contributors may be used to endorse or promote products derived
 *       from this software without specific prior written permission.
 *
 *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 *   PCIe NTB Perf Linux driver
 */

#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/kthread.h>
#include <linux/time.h>
#include <linux/timer.h>
#include <linux/dma-mapping.h>
#include <linux/pci.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/debugfs.h>
#include <linux/dmaengine.h>
#include <linux/delay.h>
#include <linux/sizes.h>
#include <linux/ntb.h>
#include <linux/mutex.h>


#define DRIVER_NAME		"ntb_perf"

#define DRIVER_DESCRIPTION	"PCIe NTB Performance Measurement Tool"


#define DRIVER_LICENSE		"Dual BSD/GPL"

#define DRIVER_VERSION		"1.0"

#define DRIVER_AUTHOR		"Dave Jiang <dave.jiang@intel.com>"


#define PERF_LINK_DOWN_TIMEOUT	10

#define PERF_VERSION		0xffff0001

#define MAX_THREADS		32

#define MAX_TEST_SIZE		SZ_1M

#define MAX_SRCS		32

#define DMA_OUT_RESOURCE_TO	msecs_to_jiffies(50)

#define DMA_RETRIES		20

#define SZ_4G			(1ULL << 32)

#define MAX_SEG_ORDER		20 
/* no larger than 1M for kmalloc buffer */

#define PIDX			NTB_DEF_PEER_IDX


MODULE_LICENSE(DRIVER_LICENSE);

MODULE_VERSION(DRIVER_VERSION);

MODULE_AUTHOR(DRIVER_AUTHOR);

MODULE_DESCRIPTION(DRIVER_DESCRIPTION);


static struct dentry *perf_debugfs_dir;


static unsigned long max_mw_size;
module_param(max_mw_size, ulong, 0644);
MODULE_PARM_DESC(max_mw_size, "Limit size of large memory windows");


static unsigned int seg_order = 19; 
/* 512K */
module_param(seg_order, uint, 0644);
MODULE_PARM_DESC(seg_order, "size order [2^n] of buffer segment for testing");


static unsigned int run_order = 32; 
/* 4G */
module_param(run_order, uint, 0644);
MODULE_PARM_DESC(run_order, "size order [2^n] of total data to transfer");


static bool use_dma; 
/* default to 0 */
module_param(use_dma, bool, 0644);
MODULE_PARM_DESC(use_dma, "Using DMA engine to measure performance");


static bool on_node = true; 
/* default to 1 */
module_param(on_node, bool, 0644);
MODULE_PARM_DESC(on_node, "Run threads only on NTB device node (default: true)");


struct perf_mw {
	
phys_addr_t	phys_addr;
	
resource_size_t	phys_size;
	
resource_size_t	xlat_align;
	
resource_size_t	xlat_align_size;
	
void __iomem	*vbase;
	
size_t		xlat_size;
	
size_t		buf_size;
	
void		*virt_addr;
	
dma_addr_t	dma_addr;
};

struct perf_ctx;


struct pthr_ctx {
	
struct task_struct	*thread;
	
struct perf_ctx		*perf;
	
atomic_t		dma_sync;
	
struct dma_chan		*dma_chan;
	
int			dma_prep_err;
	
int			src_idx;
	
void			*srcs[MAX_SRCS];
	
wait_queue_head_t       *wq;
	
int			status;
	
u64			copied;
	
u64			diff_us;
};


struct perf_ctx {
	
struct ntb_dev		*ntb;
	
spinlock_t		db_lock;
	
struct perf_mw		mw;
	
bool			link_is_up;
	
struct delayed_work	link_work;
	
wait_queue_head_t	link_wq;
	
u8			perf_threads;
	/* mutex ensures only one set of threads run at once */
	
struct mutex		run_mutex;
	
struct pthr_ctx		pthr_ctx[MAX_THREADS];
	
atomic_t		tsync;
	
atomic_t                tdone;
};

enum {
	
VERSION = 0,
	
MW_SZ_HIGH,
	
MW_SZ_LOW,
	
MAX_SPAD
};


static void perf_link_event(void *ctx) { struct perf_ctx *perf = ctx; if (ntb_link_is_up(perf->ntb, NULL, NULL) == 1) { schedule_delayed_work(&perf->link_work, 2*HZ); } else { dev_dbg(&perf->ntb->pdev->dev, "link down\n"); if (!perf->link_is_up) cancel_delayed_work_sync(&perf->link_work); perf->link_is_up = false; } }

Contributors

PersonTokensPropCommitsCommitProp
Dave Jiang5160.71%150.00%
Logan Gunthorpe3339.29%150.00%
Total84100.00%2100.00%


static void perf_db_event(void *ctx, int vec) { struct perf_ctx *perf = ctx; u64 db_bits, db_mask; db_mask = ntb_db_vector_mask(perf->ntb, vec); db_bits = ntb_db_read(perf->ntb); dev_dbg(&perf->ntb->dev, "doorbell vec %d mask %#llx bits %#llx\n", vec, db_mask, db_bits); }

Contributors

PersonTokensPropCommitsCommitProp
Dave Jiang63100.00%1100.00%
Total63100.00%1100.00%

static const struct ntb_ctx_ops perf_ops = { .link_event = perf_link_event, .db_event = perf_db_event, };
static void perf_copy_callback(void *data) { struct pthr_ctx *pctx = data; atomic_dec(&pctx->dma_sync); }

Contributors

PersonTokensPropCommitsCommitProp
Dave Jiang25100.00%1100.00%
Total25100.00%1100.00%


static ssize_t perf_copy(struct pthr_ctx *pctx, char __iomem *dst, char *src, size_t size) { struct perf_ctx *perf = pctx->perf; struct dma_async_tx_descriptor *txd; struct dma_chan *chan = pctx->dma_chan; struct dma_device *device; struct dmaengine_unmap_data *unmap; dma_cookie_t cookie; size_t src_off, dst_off; struct perf_mw *mw = &perf->mw; void __iomem *vbase; void __iomem *dst_vaddr; dma_addr_t dst_phys; int retries = 0; if (!use_dma) { memcpy_toio(dst, src, size); return size; } if (!chan) { dev_err(&perf->ntb->dev, "DMA engine does not exist\n"); return -EINVAL; } device = chan->device; src_off = (uintptr_t)src & ~PAGE_MASK; dst_off = (uintptr_t __force)dst & ~PAGE_MASK; if (!is_dma_copy_aligned(device, src_off, dst_off, size)) return -ENODEV; vbase = mw->vbase; dst_vaddr = dst; dst_phys = mw->phys_addr + (dst_vaddr - vbase); unmap = dmaengine_get_unmap_data(device->dev, 1, GFP_NOWAIT); if (!unmap) return -ENOMEM; unmap->len = size; unmap->addr[0] = dma_map_page(device->dev, virt_to_page(src), src_off, size, DMA_TO_DEVICE); if (dma_mapping_error(device->dev, unmap->addr[0])) goto err_get_unmap; unmap->to_cnt = 1; do { txd = device->device_prep_dma_memcpy(chan, dst_phys, unmap->addr[0], size, DMA_PREP_INTERRUPT); if (!txd) { set_current_state(TASK_INTERRUPTIBLE); schedule_timeout(DMA_OUT_RESOURCE_TO); } } while (!txd && (++retries < DMA_RETRIES)); if (!txd) { pctx->dma_prep_err++; goto err_get_unmap; } txd->callback = perf_copy_callback; txd->callback_param = pctx; dma_set_unmap(txd, unmap); cookie = dmaengine_submit(txd); if (dma_submit_error(cookie)) goto err_set_unmap; dmaengine_unmap_put(unmap); atomic_inc(&pctx->dma_sync); dma_async_issue_pending(chan); return size; err_set_unmap: dmaengine_unmap_put(unmap); err_get_unmap: dmaengine_unmap_put(unmap); return 0; }

Contributors

PersonTokensPropCommitsCommitProp
Dave Jiang41297.40%266.67%
Arnd Bergmann112.60%133.33%
Total423100.00%3100.00%


static int perf_move_data(struct pthr_ctx *pctx, char __iomem *dst, char *src, u64 buf_size, u64 win_size, u64 total) { int chunks, total_chunks, i; int copied_chunks = 0; u64 copied = 0, result; char __iomem *tmp = dst; u64 perf, diff_us; ktime_t kstart, kstop, kdiff; unsigned long last_sleep = jiffies; chunks = div64_u64(win_size, buf_size); total_chunks = div64_u64(total, buf_size); kstart = ktime_get(); for (i = 0; i < total_chunks; i++) { result = perf_copy(pctx, tmp, src, buf_size); copied += result; copied_chunks++; if (copied_chunks == chunks) { tmp = dst; copied_chunks = 0; } else tmp += buf_size; /* Probably should schedule every 5s to prevent soft hang. */ if (unlikely((jiffies - last_sleep) > 5 * HZ)) { last_sleep = jiffies; set_current_state(TASK_INTERRUPTIBLE); schedule_timeout(1); } if (unlikely(kthread_should_stop())) break; } if (use_dma) { pr_debug("%s: All DMA descriptors submitted\n", current->comm); while (atomic_read(&pctx->dma_sync) != 0) { if (kthread_should_stop()) break; msleep(20); } } kstop = ktime_get(); kdiff = ktime_sub(kstop, kstart); diff_us = ktime_to_us(kdiff); pr_debug("%s: copied %llu bytes\n", current->comm, copied); pr_debug("%s: lasted %llu usecs\n", current->comm, diff_us); perf = div64_u64(copied, diff_us); pr_debug("%s: MBytes/s: %llu\n", current->comm, perf); pctx->copied = copied; pctx->diff_us = diff_us; return 0; }

Contributors

PersonTokensPropCommitsCommitProp
Dave Jiang25581.99%120.00%
Logan Gunthorpe5417.36%360.00%
Arnd Bergmann20.64%120.00%
Total311100.00%5100.00%


static bool perf_dma_filter_fn(struct dma_chan *chan, void *node) { /* Is the channel required to be on the same node as the device? */ if (!on_node) return true; return dev_to_node(&chan->dev->device) == (int)(unsigned long)node; }

Contributors

PersonTokensPropCommitsCommitProp
Dave Jiang3579.55%150.00%
Gary R Hook920.45%150.00%
Total44100.00%2100.00%


static int ntb_perf_thread(void *data) { struct pthr_ctx *pctx = data; struct perf_ctx *perf = pctx->perf; struct pci_dev *pdev = perf->ntb->pdev; struct perf_mw *mw = &perf->mw; char __iomem *dst; u64 win_size, buf_size, total; void *src; int rc, node, i; struct dma_chan *dma_chan = NULL; pr_debug("kthread %s starting...\n", current->comm); node = on_node ? dev_to_node(&pdev->dev) : NUMA_NO_NODE; if (use_dma && !pctx->dma_chan) { dma_cap_mask_t dma_mask; dma_cap_zero(dma_mask); dma_cap_set(DMA_MEMCPY, dma_mask); dma_chan = dma_request_channel(dma_mask, perf_dma_filter_fn, (void *)(unsigned long)node); if (!dma_chan) { pr_warn("%s: cannot acquire DMA channel, quitting\n", current->comm); return -ENODEV; } pctx->dma_chan = dma_chan; } for (i = 0; i < MAX_SRCS; i++) { pctx->srcs[i] = kmalloc_node(MAX_TEST_SIZE, GFP_KERNEL, node); if (!pctx->srcs[i]) { rc = -ENOMEM; goto err; } } win_size = mw->phys_size; buf_size = 1ULL << seg_order; total = 1ULL << run_order; if (buf_size > MAX_TEST_SIZE) buf_size = MAX_TEST_SIZE; dst = (char __iomem *)mw->vbase; atomic_inc(&perf->tsync); while (atomic_read(&perf->tsync) != perf->perf_threads) schedule(); src = pctx->srcs[pctx->src_idx]; pctx->src_idx = (pctx->src_idx + 1) & (MAX_SRCS - 1); rc = perf_move_data(pctx, dst, src, buf_size, win_size, total); atomic_dec(&perf->tsync); if (rc < 0) { pr_err("%s: failed\n", current->comm); rc = -ENXIO; goto err; } for (i = 0; i < MAX_SRCS; i++) { kfree(pctx->srcs[i]); pctx->srcs[i] = NULL; } atomic_inc(&perf->tdone); wake_up(pctx->wq); rc = 0; goto done; err: for (i = 0; i < MAX_SRCS; i++) { kfree(pctx->srcs[i]); pctx->srcs[i] = NULL; } if (dma_chan) { dma_release_channel(dma_chan); pctx->dma_chan = NULL; } done: /* Wait until we are told to stop */ for (;;) { set_current_state(TASK_INTERRUPTIBLE); if (kthread_should_stop()) break; schedule(); } __set_current_state(TASK_RUNNING); return rc; }

Contributors

PersonTokensPropCommitsCommitProp
Dave Jiang44988.74%120.00%
Logan Gunthorpe5110.08%240.00%
Gary R Hook40.79%120.00%
Arnd Bergmann20.40%120.00%
Total506100.00%5100.00%


static void perf_free_mw(struct perf_ctx *perf) { struct perf_mw *mw = &perf->mw; struct pci_dev *pdev = perf->ntb->pdev; if (!mw->virt_addr) return; ntb_mw_clear_trans(perf->ntb, PIDX, 0); dma_free_coherent(&pdev->dev, mw->buf_size, mw->virt_addr, mw->dma_addr); mw->xlat_size = 0; mw->buf_size = 0; mw->virt_addr = NULL; }

Contributors

PersonTokensPropCommitsCommitProp
Dave Jiang8797.75%150.00%
Serge Semin22.25%150.00%
Total89100.00%2100.00%


static int perf_set_mw(struct perf_ctx *perf, resource_size_t size) { struct perf_mw *mw = &perf->mw; size_t xlat_size, buf_size; int rc; if (!size) return -EINVAL; xlat_size = round_up(size, mw->xlat_align_size); buf_size = round_up(size, mw->xlat_align); if (mw->xlat_size == xlat_size) return 0; if (mw->buf_size) perf_free_mw(perf); mw->xlat_size = xlat_size; mw->buf_size = buf_size; mw->virt_addr = dma_alloc_coherent(&perf->ntb->pdev->dev, buf_size, &mw->dma_addr, GFP_KERNEL); if (!mw->virt_addr) { mw->xlat_size = 0; mw->buf_size = 0; } rc = ntb_mw_set_trans(perf->ntb, PIDX, 0, mw->dma_addr, mw->xlat_size); if (rc) { dev_err(&perf->ntb->dev, "Unable to set mw0 translation\n"); perf_free_mw(perf); return -EIO; } return 0; }

Contributors

PersonTokensPropCommitsCommitProp
Dave Jiang19298.97%266.67%
Serge Semin21.03%133.33%
Total194100.00%3100.00%


static void perf_link_work(struct work_struct *work) { struct perf_ctx *perf = container_of(work, struct perf_ctx, link_work.work); struct ntb_dev *ndev = perf->ntb; struct pci_dev *pdev = ndev->pdev; u32 val; u64 size; int rc; dev_dbg(&perf->ntb->pdev->dev, "%s called\n", __func__); size = perf->mw.phys_size; if (max_mw_size && size > max_mw_size) size = max_mw_size; ntb_peer_spad_write(ndev, PIDX, MW_SZ_HIGH, upper_32_bits(size)); ntb_peer_spad_write(ndev, PIDX, MW_SZ_LOW, lower_32_bits(size)); ntb_peer_spad_write(ndev, PIDX, VERSION, PERF_VERSION); /* now read what peer wrote */ val = ntb_spad_read(ndev, VERSION); if (val != PERF_VERSION) { dev_dbg(&pdev->dev, "Remote version = %#x\n", val); goto out; } val = ntb_spad_read(ndev, MW_SZ_HIGH); size = (u64)val << 32; val = ntb_spad_read(ndev, MW_SZ_LOW); size |= val; dev_dbg(&pdev->dev, "Remote MW size = %#llx\n", size); rc = perf_set_mw(perf, size); if (rc) goto out1; perf->link_is_up = true; wake_up(&perf->link_wq); return; out1: perf_free_mw(perf); out: if (ntb_link_is_up(ndev, NULL, NULL) == 1) schedule_delayed_work(&perf->link_work, msecs_to_jiffies(PERF_LINK_DOWN_TIMEOUT)); }

Contributors

PersonTokensPropCommitsCommitProp
Dave Jiang24690.44%125.00%
Logan Gunthorpe207.35%250.00%
Serge Semin62.21%125.00%
Total272100.00%4100.00%


static int perf_setup_mw(struct ntb_dev *ntb, struct perf_ctx *perf) { struct perf_mw *mw; int rc; mw = &perf->mw; rc = ntb_mw_get_align(ntb, PIDX, 0, &mw->xlat_align, &mw->xlat_align_size, NULL); if (rc) return rc; rc = ntb_peer_mw_get_addr(ntb, 0, &mw->phys_addr, &mw->phys_size); if (rc) return rc; perf->mw.vbase = ioremap_wc(mw->phys_addr, mw->phys_size); if (!mw->vbase) return -ENOMEM; return 0; }

Contributors

PersonTokensPropCommitsCommitProp
Dave Jiang9378.81%150.00%
Serge Semin2521.19%150.00%
Total118100.00%2100.00%


static ssize_t debugfs_run_read(struct file *filp, char __user *ubuf, size_t count, loff_t *offp) { struct perf_ctx *perf = filp->private_data; char *buf; ssize_t ret, out_off = 0; struct pthr_ctx *pctx; int i; u64 rate; if (!perf) return 0; buf = kmalloc(1024, GFP_KERNEL); if (!buf) return -ENOMEM; if (mutex_is_locked(&perf->run_mutex)) { out_off = scnprintf(buf, 64, "running\n"); goto read_from_buf; } for (i = 0; i < MAX_THREADS; i++) { pctx = &perf->pthr_ctx[i]; if (pctx->status == -ENODATA) break; if (pctx->status) { out_off += scnprintf(buf + out_off, 1024 - out_off, "%d: error %d\n", i, pctx->status); continue; } rate = div64_u64(pctx->copied, pctx->diff_us); out_off += scnprintf(buf + out_off, 1024 - out_off, "%d: copied %llu bytes in %llu usecs, %llu MBytes/s\n", i, pctx->copied, pctx->diff_us, rate); } read_from_buf: ret = simple_read_from_buffer(ubuf, count, offp, buf, out_off); kfree(buf); return ret; }

Contributors

PersonTokensPropCommitsCommitProp
Logan Gunthorpe13758.05%240.00%
Dave Jiang8736.86%120.00%
Sudip Mukherjee93.81%120.00%
Dan Carpenter31.27%120.00%
Total236100.00%5100.00%


static void threads_cleanup(struct perf_ctx *perf) { struct pthr_ctx *pctx; int i; for (i = 0; i < MAX_THREADS; i++) { pctx = &perf->pthr_ctx[i]; if (pctx->thread) { pctx->status = kthread_stop(pctx->thread); pctx->thread = NULL; } } }

Contributors

PersonTokensPropCommitsCommitProp
Dave Jiang6594.20%150.00%
Logan Gunthorpe45.80%150.00%
Total69100.00%2100.00%


static void perf_clear_thread_status(struct perf_ctx *perf) { int i; for (i = 0; i < MAX_THREADS; i++) perf->pthr_ctx[i].status = -ENODATA; }

Contributors

PersonTokensPropCommitsCommitProp
Logan Gunthorpe39100.00%1100.00%
Total39100.00%1100.00%


static ssize_t debugfs_run_write(struct file *filp, const char __user *ubuf, size_t count, loff_t *offp) { struct perf_ctx *perf = filp->private_data; int node, i; DECLARE_WAIT_QUEUE_HEAD(wq); if (wait_event_interruptible(perf->link_wq, perf->link_is_up)) return -ENOLINK; if (perf->perf_threads == 0) return -EINVAL; if (!mutex_trylock(&perf->run_mutex)) return -EBUSY; perf_clear_thread_status(perf); if (perf->perf_threads > MAX_THREADS) { perf->perf_threads = MAX_THREADS; pr_info("Reset total threads to: %u\n", MAX_THREADS); } /* no greater than 1M */ if (seg_order > MAX_SEG_ORDER) { seg_order = MAX_SEG_ORDER; pr_info("Fix seg_order to %u\n", seg_order); } if (run_order < seg_order) { run_order = seg_order; pr_info("Fix run_order to %u\n", run_order); } node = on_node ? dev_to_node(&perf->ntb->pdev->dev) : NUMA_NO_NODE; atomic_set(&perf->tdone, 0); /* launch kernel thread */ for (i = 0; i < perf->perf_threads; i++) { struct pthr_ctx *pctx; pctx = &perf->pthr_ctx[i]; atomic_set(&pctx->dma_sync, 0); pctx->perf = perf; pctx->wq = &wq; pctx->thread = kthread_create_on_node(ntb_perf_thread, (void *)pctx, node, "ntb_perf %d", i); if (IS_ERR(pctx->thread)) { pctx->thread = NULL; goto err; } else { wake_up_process(pctx->thread); } } wait_event_interruptible(wq, atomic_read(&perf->tdone) == perf->perf_threads); threads_cleanup(perf); mutex_unlock(&perf->run_mutex); return count; err: threads_cleanup(perf); mutex_unlock(&perf->run_mutex); return -