cregit-Linux how code gets into the kernel

Release 4.15 kernel/trace/blktrace.c

Directory: kernel/trace
/*
 * Copyright (C) 2006 Jens Axboe <axboe@kernel.dk>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 *
 */
#include <linux/kernel.h>
#include <linux/blkdev.h>
#include <linux/blktrace_api.h>
#include <linux/percpu.h>
#include <linux/init.h>
#include <linux/mutex.h>
#include <linux/slab.h>
#include <linux/debugfs.h>
#include <linux/export.h>
#include <linux/time.h>
#include <linux/uaccess.h>
#include <linux/list.h>
#include <linux/blk-cgroup.h>

#include "../../block/blk.h"

#include <trace/events/block.h>

#include "trace_output.h"

#ifdef CONFIG_BLK_DEV_IO_TRACE


static unsigned int blktrace_seq __read_mostly = 1;


static struct trace_array *blk_tr;

static bool blk_tracer_enabled __read_mostly;

static LIST_HEAD(running_trace_list);
static __cacheline_aligned_in_smp DEFINE_SPINLOCK(running_trace_lock);

/* Select an alternative, minimalistic output than the original one */

#define TRACE_BLK_OPT_CLASSIC	0x1

#define TRACE_BLK_OPT_CGROUP	0x2

#define TRACE_BLK_OPT_CGNAME	0x4


static struct tracer_opt blk_tracer_opts[] = {
	/* Default disable the minimalistic output */
	{ TRACER_OPT(blk_classic, TRACE_BLK_OPT_CLASSIC) },
#ifdef CONFIG_BLK_CGROUP
	{ TRACER_OPT(blk_cgroup, TRACE_BLK_OPT_CGROUP) },
	{ TRACER_OPT(blk_cgname, TRACE_BLK_OPT_CGNAME) },
#endif
	{ }
};


static struct tracer_flags blk_tracer_flags = {
	.val  = 0,
	.opts = blk_tracer_opts,
};

/* Global reference count of probes */
static DEFINE_MUTEX(blk_probe_mutex);

static int blk_probes_ref;

static void blk_register_tracepoints(void);
static void blk_unregister_tracepoints(void);

/*
 * Send out a notify message.
 */

static void trace_note(struct blk_trace *bt, pid_t pid, int action, const void *data, size_t len, union kernfs_node_id *cgid) { struct blk_io_trace *t; struct ring_buffer_event *event = NULL; struct ring_buffer *buffer = NULL; int pc = 0; int cpu = smp_processor_id(); bool blk_tracer = blk_tracer_enabled; ssize_t cgid_len = cgid ? sizeof(*cgid) : 0; if (blk_tracer) { buffer = blk_tr->trace_buffer.buffer; pc = preempt_count(); event = trace_buffer_lock_reserve(buffer, TRACE_BLK, sizeof(*t) + len + cgid_len, 0, pc); if (!event) return; t = ring_buffer_event_data(event); goto record_it; } if (!bt->rchan) return; t = relay_reserve(bt->rchan, sizeof(*t) + len + cgid_len); if (t) { t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION; t->time = ktime_to_ns(ktime_get()); record_it: t->device = bt->dev; t->action = action | (cgid ? __BLK_TN_CGROUP : 0); t->pid = pid; t->cpu = cpu; t->pdu_len = len + cgid_len; if (cgid) memcpy((void *)t + sizeof(*t), cgid, cgid_len); memcpy((void *) t + sizeof(*t) + cgid_len, data, len); if (blk_tracer) trace_buffer_unlock_commit(blk_tr, buffer, event, 0, pc); } }

Contributors

PersonTokensPropCommitsCommitProp
Jens Axboe9231.51%325.00%
Li Zefan8629.45%18.33%
Shaohua Li5719.52%18.33%
Olaf Kirch268.90%18.33%
Steven Rostedt196.51%325.00%
Arnaldo Carvalho de Melo82.74%18.33%
Ingo Molnar41.37%216.67%
Total292100.00%12100.00%

/* * Send out a notify for this process, if we haven't done so since a trace * started */
static void trace_note_tsk(struct task_struct *tsk) { unsigned long flags; struct blk_trace *bt; tsk->btrace_seq = blktrace_seq; spin_lock_irqsave(&running_trace_lock, flags); list_for_each_entry(bt, &running_trace_list, running_list) { trace_note(bt, tsk->pid, BLK_TN_PROCESS, tsk->comm, sizeof(tsk->comm), NULL); } spin_unlock_irqrestore(&running_trace_lock, flags); }

Contributors

PersonTokensPropCommitsCommitProp
Jan Kara3546.05%120.00%
Olaf Kirch3140.79%120.00%
Jens Axboe810.53%240.00%
Shaohua Li22.63%120.00%
Total76100.00%5100.00%


static void trace_note_time(struct blk_trace *bt) { struct timespec64 now; unsigned long flags; u32 words[2]; /* need to check user space to see if this breaks in y2038 or y2106 */ ktime_get_real_ts64(&now); words[0] = (u32)now.tv_sec; words[1] = now.tv_nsec; local_irq_save(flags); trace_note(bt, 0, BLK_TN_TIMESTAMP, words, sizeof(words), NULL); local_irq_restore(flags); }

Contributors

PersonTokensPropCommitsCommitProp
Olaf Kirch7288.89%125.00%
Arnd Bergmann67.41%125.00%
Shaohua Li22.47%125.00%
Jens Axboe11.23%125.00%
Total81100.00%4100.00%


void __trace_note_message(struct blk_trace *bt, struct blkcg *blkcg, const char *fmt, ...) { int n; va_list args; unsigned long flags; char *buf; if (unlikely(bt->trace_state != Blktrace_running && !blk_tracer_enabled)) return; /* * If the BLK_TC_NOTIFY action mask isn't set, don't send any note * message to the trace. */ if (!(bt->act_mask & BLK_TC_NOTIFY)) return; local_irq_save(flags); buf = this_cpu_ptr(bt->msg_data); va_start(args, fmt); n = vscnprintf(buf, BLK_TN_MAX_MSG, fmt, args); va_end(args); if (!(blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP)) blkcg = NULL; #ifdef CONFIG_BLK_CGROUP trace_note(bt, 0, BLK_TN_MESSAGE, buf, n, blkcg ? cgroup_get_kernfs_id(blkcg->css.cgroup) : NULL); #else trace_note(bt, 0, BLK_TN_MESSAGE, buf, n, NULL); #endif local_irq_restore(flags); }

Contributors

PersonTokensPropCommitsCommitProp
Alan D. Brunelle6135.67%111.11%
Shaohua Li5532.16%222.22%
Jens Axboe148.19%111.11%
Tao Ma137.60%111.11%
Carl Henrik Lunde127.02%111.11%
Li Zefan84.68%111.11%
Arnaldo Carvalho de Melo74.09%111.11%
Shan Wei10.58%111.11%
Total171100.00%9100.00%

EXPORT_SYMBOL_GPL(__trace_note_message);
static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector, pid_t pid) { if (((bt->act_mask << BLK_TC_SHIFT) & what) == 0) return 1; if (sector && (sector < bt->start_lba || sector > bt->end_lba)) return 1; if (bt->pid && pid != bt->pid) return 1; return 0; }

Contributors

PersonTokensPropCommitsCommitProp
Jens Axboe7494.87%150.00%
Shawn Du45.13%150.00%
Total78100.00%2100.00%

/* * Data direction bit lookup */ static const u32 ddir_act[2] = { BLK_TC_ACT(BLK_TC_READ), BLK_TC_ACT(BLK_TC_WRITE) }; #define BLK_TC_RAHEAD BLK_TC_AHEAD #define BLK_TC_PREFLUSH BLK_TC_FLUSH /* The ilog2() calls fall out because they're constant */ #define MASK_TC_BIT(rw, __name) ((rw & REQ_ ## __name) << \ (ilog2(BLK_TC_ ## __name) + BLK_TC_SHIFT - __REQ_ ## __name)) /* * The worker for the various blk_add_trace*() types. Fills out a * blk_io_trace structure and places it in a per-cpu subbuffer. */
static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, int op, int op_flags, u32 what, int error, int pdu_len, void *pdu_data, union kernfs_node_id *cgid) { struct task_struct *tsk = current; struct ring_buffer_event *event = NULL; struct ring_buffer *buffer = NULL; struct blk_io_trace *t; unsigned long flags = 0; unsigned long *sequence; pid_t pid; int cpu, pc = 0; bool blk_tracer = blk_tracer_enabled; ssize_t cgid_len = cgid ? sizeof(*cgid) : 0; if (unlikely(bt->trace_state != Blktrace_running && !blk_tracer)) return; what |= ddir_act[op_is_write(op) ? WRITE : READ]; what |= MASK_TC_BIT(op_flags, SYNC); what |= MASK_TC_BIT(op_flags, RAHEAD); what |= MASK_TC_BIT(op_flags, META); what |= MASK_TC_BIT(op_flags, PREFLUSH); what |= MASK_TC_BIT(op_flags, FUA); if (op == REQ_OP_DISCARD || op == REQ_OP_SECURE_ERASE) what |= BLK_TC_ACT(BLK_TC_DISCARD); if (op == REQ_OP_FLUSH) what |= BLK_TC_ACT(BLK_TC_FLUSH); if (cgid) what |= __BLK_TA_CGROUP; pid = tsk->pid; if (act_log_check(bt, what, sector, pid)) return; cpu = raw_smp_processor_id(); if (blk_tracer) { tracing_record_cmdline(current); buffer = blk_tr->trace_buffer.buffer; pc = preempt_count(); event = trace_buffer_lock_reserve(buffer, TRACE_BLK, sizeof(*t) + pdu_len + cgid_len, 0, pc); if (!event) return; t = ring_buffer_event_data(event); goto record_it; } if (unlikely(tsk->btrace_seq != blktrace_seq)) trace_note_tsk(tsk); /* * A word about the locking here - we disable interrupts to reserve * some space in the relay per-cpu buffer, to prevent an irq * from coming in and stepping on our toes. */ local_irq_save(flags); t = relay_reserve(bt->rchan, sizeof(*t) + pdu_len + cgid_len); if (t) { sequence = per_cpu_ptr(bt->sequence, cpu); t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION; t->sequence = ++(*sequence); t->time = ktime_to_ns(ktime_get()); record_it: /* * These two are not needed in ftrace as they are in the * generic trace_entry, filled by tracing_generic_entry_update, * but for the trace_event->bin() synthesizer benefit we do it * here too. */ t->cpu = cpu; t->pid = pid; t->sector = sector; t->bytes = bytes; t->action = what; t->device = bt->dev; t->error = error; t->pdu_len = pdu_len + cgid_len; if (cgid_len) memcpy((void *)t + sizeof(*t), cgid, cgid_len); if (pdu_len) memcpy((void *)t + sizeof(*t) + cgid_len, pdu_data, pdu_len); if (blk_tracer) { trace_buffer_unlock_commit(blk_tr, buffer, event, 0, pc); return; } } local_irq_restore(flags); }

Contributors

PersonTokensPropCommitsCommitProp
Jens Axboe24747.50%27.69%
Arnaldo Carvalho de Melo10219.62%623.08%
Shaohua Li5710.96%13.85%
Michael Christie397.50%311.54%
Steven Rostedt193.65%311.54%
David Woodhouse142.69%27.69%
Namhyung Kim122.31%13.85%
Li Zefan91.73%27.69%
Jan Kara61.15%13.85%
Nathan Scott50.96%13.85%
Adrian Hunter40.77%13.85%
Ingo Molnar40.77%27.69%
Christoph Hellwig20.38%13.85%
Total520100.00%26100.00%


static void blk_trace_free(struct blk_trace *bt) { debugfs_remove(bt->msg_file); debugfs_remove(bt->dropped_file); relay_close(bt->rchan); debugfs_remove(bt->dir); free_percpu(bt->sequence); free_percpu(bt->msg_data); kfree(bt); }

Contributors

PersonTokensPropCommitsCommitProp
Jens Axboe4068.97%233.33%
Alan D. Brunelle1118.97%233.33%
Stefan Raspl58.62%116.67%
Li Zefan23.45%116.67%
Total58100.00%6100.00%


static void get_probe_ref(void) { mutex_lock(&blk_probe_mutex); if (++blk_probes_ref == 1) blk_register_tracepoints(); mutex_unlock(&blk_probe_mutex); }

Contributors

PersonTokensPropCommitsCommitProp
Jens Axboe30100.00%1100.00%
Total30100.00%1100.00%


static void put_probe_ref(void) { mutex_lock(&blk_probe_mutex); if (!--blk_probes_ref) blk_unregister_tracepoints(); mutex_unlock(&blk_probe_mutex); }

Contributors

PersonTokensPropCommitsCommitProp
Jens Axboe29100.00%1100.00%
Total29100.00%1100.00%


static void blk_trace_cleanup(struct blk_trace *bt) { blk_trace_free(bt); put_probe_ref(); }

Contributors

PersonTokensPropCommitsCommitProp
Li Zefan1578.95%125.00%
Arnaldo Carvalho de Melo210.53%125.00%
Jens Axboe210.53%250.00%
Total19100.00%4100.00%


static int __blk_trace_remove(struct request_queue *q) { struct blk_trace *bt; bt = xchg(&q->blk_trace, NULL); if (!bt) return -EINVAL; if (bt->trace_state != Blktrace_running) blk_trace_cleanup(bt); return 0; }

Contributors

PersonTokensPropCommitsCommitProp
Jens Axboe5196.23%375.00%
Li Zefan23.77%125.00%
Total53100.00%4100.00%


int blk_trace_remove(struct request_queue *q) { int ret; mutex_lock(&q->blk_trace_mutex); ret = __blk_trace_remove(q); mutex_unlock(&q->blk_trace_mutex); return ret; }

Contributors

PersonTokensPropCommitsCommitProp
Jens Axboe39100.00%1100.00%
Total39100.00%1100.00%

EXPORT_SYMBOL_GPL(blk_trace_remove);
static ssize_t blk_dropped_read(struct file *filp, char __user *buffer, size_t count, loff_t *ppos) { struct blk_trace *bt = filp->private_data; char buf[16]; snprintf(buf, sizeof(buf), "%u\n", atomic_read(&bt->dropped)); return simple_read_from_buffer(buffer, count, ppos, buf, strlen(buf)); }

Contributors

PersonTokensPropCommitsCommitProp
Jens Axboe75100.00%1100.00%
Total75100.00%1100.00%

static const struct file_operations blk_dropped_fops = { .owner = THIS_MODULE, .open = simple_open, .read = blk_dropped_read, .llseek = default_llseek, };
static ssize_t blk_msg_write(struct file *filp, const char __user *buffer, size_t count, loff_t *ppos) { char *msg; struct blk_trace *bt; if (count >= BLK_TN_MAX_MSG) return -EINVAL; msg = memdup_user_nul(buffer, count); if (IS_ERR(msg)) return PTR_ERR(msg); bt = filp->private_data; __trace_note_message(bt, NULL, "%s", msg); kfree(msg); return count; }

Contributors

PersonTokensPropCommitsCommitProp
Alan D. Brunelle8190.00%125.00%
Al Viro66.67%125.00%
Shaohua Li22.22%125.00%
Li Zefan11.11%125.00%
Total90100.00%4100.00%

static const struct file_operations blk_msg_fops = { .owner = THIS_MODULE, .open = simple_open, .write = blk_msg_write, .llseek = noop_llseek, }; /* * Keep track of how many times we encountered a full subbuffer, to aid * the user space app in telling how many lost events there were. */
static int blk_subbuf_start_callback(struct rchan_buf *buf, void *subbuf, void *prev_subbuf, size_t prev_padding) { struct blk_trace *bt; if (!relay_buf_full(buf)) return 1; bt = buf->chan->private_data; atomic_inc(&bt->dropped); return 0; }

Contributors

PersonTokensPropCommitsCommitProp
Jens Axboe57100.00%1100.00%
Total57100.00%1100.00%


static int blk_remove_buf_file_callback(struct dentry *dentry) { debugfs_remove(dentry); return 0; }

Contributors

PersonTokensPropCommitsCommitProp
Jens Axboe19100.00%1100.00%
Total19100.00%1100.00%


static struct dentry *blk_create_buf_file_callback(const char *filename, struct dentry *parent, umode_t mode, struct rchan_buf *buf, int *is_global) { return debugfs_create_file(filename, mode, parent, buf, &relay_file_operations); }

Contributors

PersonTokensPropCommitsCommitProp
Jens Axboe4497.78%150.00%
Al Viro12.22%150.00%
Total45100.00%2100.00%

static struct rchan_callbacks blk_relay_callbacks = { .subbuf_start = blk_subbuf_start_callback, .create_buf_file = blk_create_buf_file_callback, .remove_buf_file = blk_remove_buf_file_callback, };
static void blk_trace_setup_lba(struct blk_trace *bt, struct block_device *bdev) { struct hd_struct *part = NULL; if (bdev) part = bdev->bd_part; if (part) { bt->start_lba = part->start_sect; bt->end_lba = part->start_sect + part->nr_sects; } else { bt->start_lba = 0; bt->end_lba = -1ULL; } }

Contributors

PersonTokensPropCommitsCommitProp
Li Zefan75100.00%1100.00%
Total75100.00%1100.00%

/* * Setup everything required to start tracing */
static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, struct block_device *bdev, struct blk_user_trace_setup *buts) { struct blk_trace *bt = NULL; struct dentry *dir = NULL; int ret; if (!buts->buf_size || !buts->buf_nr) return -EINVAL; strncpy(buts->name, name, BLKTRACE_BDEV_SIZE); buts->name[BLKTRACE_BDEV_SIZE - 1] = '\0'; /* * some device names have larger paths - convert the slashes * to underscores for this to work as expected */ strreplace(buts->name, '/', '_'); bt = kzalloc(sizeof(*bt), GFP_KERNEL); if (!bt) return -ENOMEM; ret = -ENOMEM; bt->sequence = alloc_percpu(unsigned long); if (!bt->sequence) goto err; bt->msg_data = __alloc_percpu(BLK_TN_MAX_MSG, __alignof__(char)); if (!bt->msg_data) goto err; ret = -ENOENT; if (!blk_debugfs_root) goto err; dir = debugfs_lookup(buts->name, blk_debugfs_root); if (!dir) bt->dir = dir = debugfs_create_dir(buts->name, blk_debugfs_root); if (!dir) goto err; bt->dev = dev; atomic_set(&bt->dropped, 0); INIT_LIST_HEAD(&bt->running_list); ret = -EIO; bt->dropped_file = debugfs_create_file("dropped", 0444, dir, bt, &blk_dropped_fops); if (!bt->dropped_file) goto err; bt->msg_file = debugfs_create_file("msg", 0222, dir, bt, &blk_msg_fops); if (!bt->msg_file) goto err; bt->rchan = relay_open("trace", dir, buts->buf_size, buts->buf_nr, &blk_relay_callbacks, bt); if (!bt->rchan) goto err; bt->act_mask = buts->act_mask; if (!bt->act_mask) bt->act_mask = (u16) -1; blk_trace_setup_lba(bt, bdev); /* overwrite with user settings */ if (buts->start_lba) bt->start_lba = buts->start_lba; if (buts->end_lba) bt->end_lba = buts->end_lba; bt->pid = buts->pid; bt->trace_state = Blktrace_setup; ret = -EBUSY; if (cmpxchg(&q->blk_trace, NULL, bt)) goto err; get_probe_ref(); ret = 0; err: if (dir && !bt->dir) dput(dir); if (ret) blk_trace_free(bt); return ret; }

Contributors

PersonTokensPropCommitsCommitProp
Jens Axboe30665.38%627.27%
Omar Sandoval428.97%313.64%
Alan D. Brunelle326.84%14.55%
Shawn Du245.13%14.55%
Li Zefan204.27%418.18%
Arnd Bergmann132.78%14.55%
Jan Kara81.71%14.55%
Christof Schmitt71.50%14.55%
Rusty Russell51.07%14.55%
Davidlohr Bueso A51.07%14.55%
Rasmus Villemoes40.85%14.55%
Mathieu Desnoyers20.43%14.55%
Total468100.00%22100.00%


static int __blk_trace_setup(struct request_queue *q, char *name, dev_t dev, struct block_device *bdev, char __user *arg) { struct blk_user_trace_setup buts; int ret; ret = copy_from_user(&buts, arg, sizeof(buts)); if (ret) return -EFAULT; ret = do_blk_trace_setup(q, name, dev, bdev, &buts); if (ret) return ret; if (copy_to_user(arg, &buts, sizeof(buts))) { __blk_trace_remove(q); return -EFAULT; } return 0; }

Contributors

PersonTokensPropCommitsCommitProp
Arnd Bergmann8678.18%116.67%
Christof Schmitt87.27%116.67%
Shawn Du76.36%116.67%
Dmitriy Monakhov65.45%116.67%
Jens Axboe32.73%233.33%
Total110100.00%6100.00%


int blk_trace_setup(struct request_queue *q, char *name, dev_t dev, struct block_device *bdev, char __user *arg) { int ret; mutex_lock(&q->blk_trace_mutex); ret = __blk_trace_setup(q, name, dev, bdev, arg