 * intel_pt.c: Intel Processor Trace support
 * Copyright (c) 2013-2015, Intel Corporation.
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
 * version 2, as published by the Free Software Foundation.
 * This program is distributed in the hope it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.

#include <stdio.h>
#include <stdbool.h>
#include <errno.h>
#include <linux/kernel.h>
#include <linux/types.h>

#include "../perf.h"
#include "session.h"
#include "machine.h"
#include "sort.h"
#include "tool.h"
#include "event.h"
#include "evlist.h"
#include "evsel.h"
#include "map.h"
#include "color.h"
#include "util.h"
#include "thread.h"
#include "thread-stack.h"
#include "symbol.h"
#include "callchain.h"
#include "dso.h"
#include "debug.h"
#include "auxtrace.h"
#include "tsc.h"
#include "intel-pt.h"
#include "config.h"

#include "intel-pt-decoder/intel-pt-log.h"
#include "intel-pt-decoder/intel-pt-decoder.h"
#include "intel-pt-decoder/intel-pt-insn-decoder.h"
#include "intel-pt-decoder/intel-pt-pkt-decoder.h"


struct intel_pt {
struct auxtrace auxtrace;
struct auxtrace_queues queues;
struct auxtrace_heap heap;
u32 auxtrace_type;
struct perf_session *session;
struct machine *machine;
struct perf_evsel *switch_evsel;
struct thread *unknown_thread;
bool timeless_decoding;
bool sampling_mode;
bool snapshot_mode;
bool per_cpu_mmaps;
bool have_tsc;
bool data_queued;
bool est_tsc;
bool sync_switch;
bool mispred_all;
int have_sched_switch;
u32 pmu_type;
u64 kernel_start;
u64 switch_ip;
u64 ptss_ip;

struct perf_tsc_conversion tc;
bool cap_user_time_zero;

struct itrace_synth_opts synth_opts;

bool sample_instructions;
u64 instructions_sample_type;
u64 instructions_sample_period;
u64 instructions_id;

bool sample_branches;
u32 branches_filter;
u64 branches_sample_type;
u64 branches_id;

bool sample_transactions;
u64 transactions_sample_type;
u64 transactions_id;

bool synth_needs_swap;

u64 tsc_bit;
u64 mtc_bit;
u64 mtc_freq_bits;
u32 tsc_ctc_ratio_n;
u32 tsc_ctc_ratio_d;
u64 cyc_bit;
u64 noretcomp_bit;
unsigned max_non_turbo_ratio;

unsigned long num_events;

char *filter;
struct addr_filters filts;

enum switch_state {

struct intel_pt_queue {
struct intel_pt *pt;
unsigned int queue_nr;
struct auxtrace_buffer *buffer;
void *decoder;
const struct intel_pt_state *state;
struct ip_callchain *chain;
struct branch_stack *last_branch;
struct branch_stack *last_branch_rb;
size_t last_branch_pos;
union perf_event *event_buf;
bool on_heap;
bool stop;
bool step_through_buffers;
bool use_buffer_pid_tid;

pid_t pid, tid;
int cpu;
int switch_state;
pid_t next_tid;
struct thread *thread;
bool exclude_kernel;
bool have_sample;
u64 time;
u64 timestamp;
u32 flags;
u16 insn_len;
u64 last_insn_cnt;
char insn[INTEL_PT_INSN_BUF_SZ];

static void intel_pt_dump(struct intel_pt *pt __maybe_unused, unsigned char *buf, size_t len) { struct intel_pt_pkt packet; size_t pos = 0; int ret, pkt_len, i; char desc[INTEL_PT_PKT_DESC_MAX]; const char *color = PERF_COLOR_BLUE; color_fprintf(stdout, color, ". ... Intel Processor Trace data: size %zu bytes\n", len); while (len) { ret = intel_pt_get_packet(buf, len, &packet); if (ret > 0) pkt_len = ret; else pkt_len = 1; printf("."); color_fprintf(stdout, color, " %08x: ", pos); for (i = 0; i < pkt_len; i++) color_fprintf(stdout, color, " %02x", buf[i]); for (; i < 16; i++) color_fprintf(stdout, color, " "); if (ret > 0) { ret = intel_pt_pkt_desc(&packet, desc, INTEL_PT_PKT_DESC_MAX); if (ret > 0) color_fprintf(stdout, color, " %s\n", desc); } else { color_fprintf(stdout, color, " Bad packet!\n"); } pos += pkt_len; buf += pkt_len; len -= pkt_len; } }


static void intel_pt_dump_event(struct intel_pt *pt, unsigned char *buf, size_t len) { printf(".\n"); intel_pt_dump(pt, buf, len); }


static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer *a, struct auxtrace_buffer *b) { void *start; start = intel_pt_find_overlap(a->data, a->size, b->data, b->size, pt->have_tsc); if (!start) return -EINVAL; b->use_size = b->data + b->size - start; b->use_data = start; return 0; }


static void intel_pt_use_buffer_pid_tid(struct intel_pt_queue *ptq, struct auxtrace_queue *queue, struct auxtrace_buffer *buffer) { if (queue->cpu == -1 && buffer->cpu != -1) ptq->cpu = buffer->cpu; ptq->pid = buffer->pid; ptq->tid = buffer->tid; intel_pt_log("queue %u cpu %d pid %d tid %d\n", ptq->queue_nr, ptq->cpu, ptq->pid, ptq->tid); thread__zput(ptq->thread); if (ptq->tid != -1) { if (ptq->pid != -1) ptq->thread = machine__findnew_thread(ptq->pt->machine, ptq->pid, ptq->tid); else ptq->thread = machine__find_thread(ptq->pt->machine, -1, ptq->tid); } }


/* This function assumes data is processed sequentially only */
static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data) { struct intel_pt_queue *ptq = data; struct auxtrace_buffer *buffer = ptq->buffer, *old_buffer = buffer; struct auxtrace_queue *queue; if (ptq->stop) { b->len = 0; return 0; } queue = &ptq->pt->queues.queue_array[ptq->queue_nr]; next: buffer = auxtrace_buffer__next(queue, buffer); if (!buffer) { if (old_buffer) auxtrace_buffer__drop_data(old_buffer); b->len = 0; return 0; } ptq->buffer = buffer; if (!buffer->data) { int fd = perf_data_file__fd(ptq->pt->session->file); buffer->data = auxtrace_buffer__get_data(buffer, fd); if (!buffer->data) return -ENOMEM; } if (ptq->pt->snapshot_mode && !buffer->consecutive && old_buffer && intel_pt_do_fix_overlap(ptq->pt, old_buffer, buffer)) return -ENOMEM; if (buffer->use_data) { b->len = buffer->use_size; b->buf = buffer->use_data; } else { b->len = buffer->size; b->buf = buffer->data; } b->ref_timestamp = buffer->reference; /* * If in snapshot mode and the buffer has no usable data, get next * buffer and again check overlap against old_buffer. */ if (ptq->pt->snapshot_mode && !b->len) goto next; if (old_buffer) auxtrace_buffer__drop_data(old_buffer); if (!old_buffer || ptq->pt->sampling_mode || (ptq->pt->snapshot_mode && !buffer->consecutive)) { b->consecutive = false; b->trace_nr = buffer->buffer_nr + 1; } else { b->consecutive = true; } if (ptq->use_buffer_pid_tid && (ptq->pid != buffer->pid || ptq->tid != buffer->tid)) intel_pt_use_buffer_pid_tid(ptq, queue, buffer); if (ptq->step_through_buffers) ptq->stop = true; if (!b->len) return intel_pt_get_trace(b, data); return 0; }


struct intel_pt_cache_entry { struct auxtrace_cache_entry entry; u64 insn_cnt; u64 byte_cnt; enum intel_pt_insn_op op; enum intel_pt_insn_branch branch; int length; int32_t rel; char insn[INTEL_PT_INSN_BUF_SZ]; };
static int intel_pt_config_div(const char *var, const char *value, void *data) { int *d = data; long val; if (!strcmp(var, "intel-pt.cache-divisor")) { val = strtol(value, NULL, 0); if (val > 0 && val <= INT_MAX) *d = val; } return 0; }


static int intel_pt_cache_divisor(void) { static int d; if (d) return d; perf_config(intel_pt_config_div, &d); if (!d) d = 64; return d; }


static unsigned int intel_pt_cache_size(struct dso *dso, struct machine *machine) { off_t size; size = dso__data_size(dso, machine); size /= intel_pt_cache_divisor(); if (size < 1000) return 10; if (size > (1 << 21)) return 21; return 32 - __builtin_clz(size); }


static struct auxtrace_cache *intel_pt_cache(struct dso *dso, struct machine *machine) { struct auxtrace_cache *c; unsigned int bits; if (dso->auxtrace_cache) return dso->auxtrace_cache; bits = intel_pt_cache_size(dso, machine); /* Ignoring cache creation failure */ c = auxtrace_cache__new(bits, sizeof(struct intel_pt_cache_entry), 200); dso->auxtrace_cache = c; return c; }


static int intel_pt_cache_add(struct dso *dso, struct machine *machine, u64 offset, u64 insn_cnt, u64 byte_cnt, struct intel_pt_insn *intel_pt_insn) { struct auxtrace_cache *c = intel_pt_cache(dso, machine); struct intel_pt_cache_entry *e; int err; if (!c) return -ENOMEM; e = auxtrace_cache__alloc_entry(c); if (!e) return -ENOMEM; e->insn_cnt = insn_cnt; e->byte_cnt = byte_cnt; e->op = intel_pt_insn->op; e->branch = intel_pt_insn->branch; e->length = intel_pt_insn->length; e->rel = intel_pt_insn->rel; memcpy(e->insn, intel_pt_insn->buf, INTEL_PT_INSN_BUF_SZ); err = auxtrace_cache__add(c, offset, &e->entry); if (err) auxtrace_cache__free_entry(c, e); return err; }


static struct intel_pt_cache_entry * intel_pt_cache_lookup(struct dso *dso, struct machine *machine, u64 offset) { struct auxtrace_cache *c = intel_pt_cache(dso, machine); if (!c) return NULL; return auxtrace_cache__lookup(dso->auxtrace_cache, offset); }


adrian hunteradrian hunter51100.00%1100.00%

static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn, uint64_t *insn_cnt_ptr, uint64_t *ip, uint64_t to_ip, uint64_t max_insn_cnt, void *data) { struct intel_pt_queue *ptq = data; struct machine *machine = ptq->pt->machine; struct thread *thread; struct addr_location al; unsigned char buf[INTEL_PT_INSN_BUF_SZ]; ssize_t len; int x86_64; u8 cpumode; u64 offset, start_offset, start_ip; u64 insn_cnt = 0; bool one_map = true; intel_pt_insn->length = 0; if (to_ip && *ip == to_ip) goto out_no_cache; if (*ip >= ptq->pt->kernel_start) cpumode = PERF_RECORD_MISC_KERNEL; else cpumode = PERF_RECORD_MISC_USER; thread = ptq->thread; if (!thread) { if (cpumode != PERF_RECORD_MISC_KERNEL) return -EINVAL; thread = ptq->pt->unknown_thread; } while (1) { thread__find_addr_map(thread, cpumode, MAP__FUNCTION, *ip, &al); if (! || !>dso) return -EINVAL; if (>dso->data.status == DSO_DATA_STATUS_ERROR && dso__data_status_seen(>dso, DSO_DATA_STATUS_SEEN_ITRACE)) return -ENOENT; offset =>map_ip(, *ip); if (!to_ip && one_map) { struct intel_pt_cache_entry *e; e = intel_pt_cache_lookup(>dso, machine, offset); if (e && (!max_insn_cnt || e->insn_cnt <= max_insn_cnt)) { *insn_cnt_ptr = e->insn_cnt; *ip += e->byte_cnt; intel_pt_insn->op = e->op; intel_pt_insn->branch = e->branch; intel_pt_insn->length = e->length; intel_pt_insn->rel = e->rel; memcpy(intel_pt_insn->buf, e->insn, INTEL_PT_INSN_BUF_SZ); intel_pt_log_insn_no_data(intel_pt_insn, *ip); return 0; } } start_offset = offset; start_ip = *ip; /* Load maps to ensure dso->is_64_bit has been updated */ map__load(; x86_64 =>dso->is_64_bit; while (1) { len = dso__data_read_offset(>dso, machine, offset, buf, INTEL_PT_INSN_BUF_SZ); if (len <= 0) return -EINVAL; if (intel_pt_get_insn(buf, len, x86_64, intel_pt_insn)) return -EINVAL; intel_pt_log_insn(intel_pt_insn, *ip); insn_cnt += 1; if (intel_pt_insn->branch != INTEL_PT_BR_NO_BRANCH) goto out; if (max_insn_cnt && insn_cnt >= max_insn_cnt) goto out_no_cache; *ip += intel_pt_insn->length; if (to_ip && *ip == to_ip) goto out_no_cache; if (*ip >=>end) break; offset += intel_pt_insn->length; } one_map = false; } out: *insn_cnt_ptr = insn_cnt; if (!one_map) goto out_no_cache; /* * Didn't lookup in the 'to_ip' case, so do it now to prevent duplicate * entries. */ if (to_ip) { struct intel_pt_cache_entry *e; e = intel_pt_cache_lookup(>dso, machine, start_offset); if (e) return 0; } /* Ignore cache errors */ intel_pt_cache_add(>dso, machine, start_offset, insn_cnt, *ip - start_ip, intel_pt_insn); return 0; out_no_cache: *insn_cnt_ptr = insn_cnt; return 0; }


static bool intel_pt_match_pgd_ip(struct intel_pt *pt, uint64_t ip, uint64_t offset, const char *filename) { struct addr_filter *filt; bool have_filter = false; bool hit_tracestop = false; bool hit_filter = false; list_for_each_entry(filt, &pt->filts.head, list) { if (filt->start) have_filter = true; if ((filename && !filt->filename) || (!filename && filt->filename) || (filename && strcmp(filename, filt->filename))) continue; if (!(offset >= filt->addr && offset < filt->addr + filt->size)) continue; intel_pt_log("TIP.PGD ip %#"PRIx64" offset %#"PRIx64" in %s hit filter: %s offset %#"PRIx64" size %#"PRIx64"\n", ip, offset, filename ? filename : "[kernel]", filt->start ? "filter" : "stop", filt->addr, filt->size); if (filt->start) hit_filter = true; else hit_tracestop = true; } if (!hit_tracestop && !hit_filter) intel_pt_log("TIP.PGD ip %#"PRIx64" offset %#"PRIx64" in %s is not in a filter region\n", ip, offset, filename ? filename : "[kernel]"); return hit_tracestop || (have_filter && !hit_filter); }


static int __intel_pt_pgd_ip(uint64_t ip, void *data) { struct intel_pt_queue *ptq = data; struct thread *thread; struct addr_location al; u8 cpumode; u64 offset; if (ip >= ptq->pt->kernel_start) return intel_pt_match_pgd_ip(ptq->pt, ip, ip, NULL); cpumode = PERF_RECORD_MISC_USER; thread = ptq->thread; if (!thread) return -EINVAL; thread__find_addr_map(thread, cpumode, MAP__FUNCTION, ip, &al); if (! || !>dso) return -EINVAL; offset =>map_ip(, ip); return intel_pt_match_pgd_ip(ptq->pt, ip, offset,>dso->long_name); }


static bool intel_pt_pgd_ip(uint64_t ip, void *data) { return __intel_pt_pgd_ip(ip, data) > 0; }


static bool intel_pt_get_config(struct intel_pt *pt, struct perf_event_attr *attr, u64 *config) { if (attr->type == pt->pmu_type) { if (config) *config = attr->config; return true; } return false; }


static bool intel_pt_exclude_kernel(struct intel_pt *pt) { struct perf_evsel *evsel; evlist__for_each_entry(pt->session->evlist, evsel) { if (intel_pt_get_config(pt, &evsel->attr, NULL) && !evsel->attr.exclude_kernel) return false; } return true; }


static bool intel_pt_return_compression(struct intel_pt *pt) { struct perf_evsel *evsel; u64 config; if (!pt->noretcomp_bit) return true; evlist__for_each_entry(pt->session->evlist, evsel) { if (intel_pt_get_config(pt, &evsel->attr, &config) && (config & pt->noretcomp_bit)) return false; } return true; }


static unsigned int intel_pt_mtc_period(struct intel_pt *pt) { struct perf_evsel *evsel; unsigned int shift; u64 config; if (!pt->mtc_freq_bits) return 0; for (shift = 0, config = pt->mtc_freq_bits; !(config & 1); shift++) config >>= 1; evlist__for_each_entry(pt->session->evlist, evsel) { if (intel_pt_get_config(pt, &evsel->attr, &config)