cregit-Linux how code gets into the kernel

Release 4.14 arch/powerpc/oprofile/op_model_cell.c

/*
 * Cell Broadband Engine OProfile Support
 *
 * (C) Copyright IBM Corporation 2006
 *
 * Author: David Erb (djerb@us.ibm.com)
 * Modifications:
 *         Carl Love <carll@us.ibm.com>
 *         Maynard Johnson <maynardj@us.ibm.com>
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version
 * 2 of the License, or (at your option) any later version.
 */

#include <linux/cpufreq.h>
#include <linux/delay.h>
#include <linux/jiffies.h>
#include <linux/kthread.h>
#include <linux/oprofile.h>
#include <linux/percpu.h>
#include <linux/smp.h>
#include <linux/spinlock.h>
#include <linux/timer.h>
#include <asm/cell-pmu.h>
#include <asm/cputable.h>
#include <asm/firmware.h>
#include <asm/io.h>
#include <asm/oprofile_impl.h>
#include <asm/processor.h>
#include <asm/prom.h>
#include <asm/ptrace.h>
#include <asm/reg.h>
#include <asm/rtas.h>
#include <asm/cell-regs.h>

#include "../platforms/cell/interrupt.h"
#include "cell/pr_util.h"


#define PPU_PROFILING            0

#define SPU_PROFILING_CYCLES     1

#define SPU_PROFILING_EVENTS     2


#define SPU_EVENT_NUM_START      4100

#define SPU_EVENT_NUM_STOP       4399

#define SPU_PROFILE_EVENT_ADDR          4363  
/* spu, address trace, decimal */

#define SPU_PROFILE_EVENT_ADDR_MASK_A   0x146 
/* sub unit set to zero */

#define SPU_PROFILE_EVENT_ADDR_MASK_B   0x186 
/* sub unit set to zero */


#define NUM_SPUS_PER_NODE    8

#define SPU_CYCLES_EVENT_NUM 2	
/*  event number for SPU_CYCLES */


#define PPU_CYCLES_EVENT_NUM 1	
/*  event number for CYCLES */

#define PPU_CYCLES_GRP_NUM   1	
/* special group number for identifying
                                 * PPU_CYCLES event
                                 */

#define CBE_COUNT_ALL_CYCLES 0x42800000 
/* PPU cycle event specifier */


#define NUM_THREADS 2         
/* number of physical threads in
                               * physical processor
                               */

#define NUM_DEBUG_BUS_WORDS 4

#define NUM_INPUT_BUS_WORDS 2


#define MAX_SPU_COUNT 0xFFFFFF	
/* maximum 24 bit LFSR value */

/* Minimum HW interval timer setting to send value to trace buffer is 10 cycle.
 * To configure counter to send value every N cycles set counter to
 * 2^32 - 1 - N.
 */

#define NUM_INTERVAL_CYC  0xFFFFFFFF - 10

/*
 * spu_cycle_reset is the number of cycles between samples.
 * This variable is used for SPU profiling and should ONLY be set
 * at the beginning of cell_reg_setup; otherwise, it's read-only.
 */

static unsigned int spu_cycle_reset;

static unsigned int profiling_mode;

static int spu_evnt_phys_spu_indx;


struct pmc_cntrl_data {
	
unsigned long vcntr;
	
unsigned long evnts;
	
unsigned long masks;
	
unsigned long enabled;
};

/*
 * ibm,cbe-perftools rtas parameters
 */

struct pm_signal {
	
u16 cpu;		/* Processor to modify */
	
u16 sub_unit;		/* hw subunit this applies to (if applicable)*/
	
short int signal_group; /* Signal Group to Enable/Disable */
	
u8 bus_word;		/* Enable/Disable on this Trace/Trigger/Event
                                 * Bus Word(s) (bitmask)
                                 */
	
u8 bit;			/* Trigger/Event bit (if applicable) */
};

/*
 * rtas call arguments
 */
enum {
	
SUBFUNC_RESET = 1,
	
SUBFUNC_ACTIVATE = 2,
	
SUBFUNC_DEACTIVATE = 3,

	
PASSTHRU_IGNORE = 0,
	
PASSTHRU_ENABLE = 1,
	
PASSTHRU_DISABLE = 2,
};


struct pm_cntrl {
	
u16 enable;
	
u16 stop_at_max;
	
u16 trace_mode;
	
u16 freeze;
	
u16 count_mode;
	
u16 spu_addr_trace;
	
u8  trace_buf_ovflw;
};

static struct {
	
u32 group_control;
	
u32 debug_bus_control;
	
struct pm_cntrl pm_cntrl;
	
u32 pm07_cntrl[NR_PHYS_CTRS];

} pm_regs;


#define GET_SUB_UNIT(x) ((x & 0x0000f000) >> 12)

#define GET_BUS_WORD(x) ((x & 0x000000f0) >> 4)

#define GET_BUS_TYPE(x) ((x & 0x00000300) >> 8)

#define GET_POLARITY(x) ((x & 0x00000002) >> 1)

#define GET_COUNT_CYCLES(x) (x & 0x00000001)

#define GET_INPUT_CONTROL(x) ((x & 0x00000004) >> 2)

static DEFINE_PER_CPU(unsigned long[NR_PHYS_CTRS], pmc_values);

static unsigned long spu_pm_cnt[MAX_NUMNODES * NUM_SPUS_PER_NODE];

static struct pmc_cntrl_data pmc_cntrl[NUM_THREADS][NR_PHYS_CTRS];

/*
 * The CELL profiling code makes rtas calls to setup the debug bus to
 * route the performance signals.  Additionally, SPU profiling requires
 * a second rtas call to setup the hardware to capture the SPU PCs.
 * The EIO error value is returned if the token lookups or the rtas
 * call fail.  The EIO error number is the best choice of the existing
 * error numbers.  The probability of rtas related error is very low.  But
 * by returning EIO and printing additional information to dmsg the user
 * will know that OProfile did not start and dmesg will tell them why.
 * OProfile does not support returning errors on Stop.  Not a huge issue
 * since failure to reset the debug bus or stop the SPU PC collection is
 * not a fatel issue.  Chances are if the Stop failed, Start doesn't work
 * either.
 */

/*
 * Interpetation of hdw_thread:
 * 0 - even virtual cpus 0, 2, 4,...
 * 1 - odd virtual cpus 1, 3, 5, ...
 *
 * FIXME: this is strictly wrong, we need to clean this up in a number
 * of places. It works for now. -arnd
 */

static u32 hdw_thread;


static u32 virt_cntr_inter_mask;

static struct timer_list timer_virt_cntr;

static struct timer_list timer_spu_event_swap;

/*
 * pm_signal needs to be global since it is initialized in
 * cell_reg_setup at the time when the necessary information
 * is available.
 */

static struct pm_signal pm_signal[NR_PHYS_CTRS];

static int pm_rtas_token;    
/* token for debug bus setup call */

static int spu_rtas_token;   
/* token for SPU cycle profiling */


static u32 reset_value[NR_PHYS_CTRS];

static int num_counters;

static int oprofile_running;
static DEFINE_SPINLOCK(cntr_lock);


static u32 ctr_enabled;


static unsigned char input_bus[NUM_INPUT_BUS_WORDS];

/*
 * Firmware interface functions
 */

static int rtas_ibm_cbe_perftools(int subfunc, int passthru, void *address, unsigned long length) { u64 paddr = __pa(address); return rtas_call(pm_rtas_token, 5, 1, NULL, subfunc, passthru, paddr >> 32, paddr & 0xffffffff, length); }

Contributors

PersonTokensPropCommitsCommitProp
Maynard Johnson54100.00%1100.00%
Total54100.00%1100.00%


static void pm_rtas_reset_signals(u32 node) { int ret; struct pm_signal pm_signal_local; /* * The debug bus is being set to the passthru disable state. * However, the FW still expects at least one legal signal routing * entry or it will return an error on the arguments. If we don't * supply a valid entry, we must ignore all return values. Ignoring * all return values means we might miss an error we should be * concerned about. */ /* fw expects physical cpu #. */ pm_signal_local.cpu = node; pm_signal_local.signal_group = 21; pm_signal_local.bus_word = 1; pm_signal_local.sub_unit = 0; pm_signal_local.bit = 0; ret = rtas_ibm_cbe_perftools(SUBFUNC_RESET, PASSTHRU_DISABLE, &pm_signal_local, sizeof(struct pm_signal)); if (unlikely(ret)) /* * Not a fatal error. For Oprofile stop, the oprofile * functions do not support returning an error for * failure to stop OProfile. */ printk(KERN_WARNING "%s: rtas returned: %d\n", __func__, ret); }

Contributors

PersonTokensPropCommitsCommitProp
Maynard Johnson7892.86%125.00%
Bob Nelson44.76%125.00%
Adam Buchbinder11.19%125.00%
Harvey Harrison11.19%125.00%
Total84100.00%4100.00%


static int pm_rtas_activate_signals(u32 node, u32 count) { int ret; int i, j; struct pm_signal pm_signal_local[NR_PHYS_CTRS]; /* * There is no debug setup required for the cycles event. * Note that only events in the same group can be used. * Otherwise, there will be conflicts in correctly routing * the signals on the debug bus. It is the responsibility * of the OProfile user tool to check the events are in * the same group. */ i = 0; for (j = 0; j < count; j++) { if (pm_signal[j].signal_group != PPU_CYCLES_GRP_NUM) { /* fw expects physical cpu # */ pm_signal_local[i].cpu = node; pm_signal_local[i].signal_group = pm_signal[j].signal_group; pm_signal_local[i].bus_word = pm_signal[j].bus_word; pm_signal_local[i].sub_unit = pm_signal[j].sub_unit; pm_signal_local[i].bit = pm_signal[j].bit; i++; } } if (i != 0) { ret = rtas_ibm_cbe_perftools(SUBFUNC_ACTIVATE, PASSTHRU_ENABLE, pm_signal_local, i * sizeof(struct pm_signal)); if (unlikely(ret)) { printk(KERN_WARNING "%s: rtas returned: %d\n", __func__, ret); return -EIO; } } return 0; }

Contributors

PersonTokensPropCommitsCommitProp
Maynard Johnson16791.76%240.00%
Bob Nelson137.14%120.00%
Dirk Hohndel10.55%120.00%
Harvey Harrison10.55%120.00%
Total182100.00%5100.00%

/* * PM Signal functions */
static void set_pm_event(u32 ctr, int event, u32 unit_mask) { struct pm_signal *p; u32 signal_bit; u32 bus_word, bus_type, count_cycles, polarity, input_control; int j, i; if (event == PPU_CYCLES_EVENT_NUM) { /* Special Event: Count all cpu cycles */ pm_regs.pm07_cntrl[ctr] = CBE_COUNT_ALL_CYCLES; p = &(pm_signal[ctr]); p->signal_group = PPU_CYCLES_GRP_NUM; p->bus_word = 1; p->sub_unit = 0; p->bit = 0; goto out; } else { pm_regs.pm07_cntrl[ctr] = 0; } bus_word = GET_BUS_WORD(unit_mask); bus_type = GET_BUS_TYPE(unit_mask); count_cycles = GET_COUNT_CYCLES(unit_mask); polarity = GET_POLARITY(unit_mask); input_control = GET_INPUT_CONTROL(unit_mask); signal_bit = (event % 100); p = &(pm_signal[ctr]); p->signal_group = event / 100; p->bus_word = bus_word; p->sub_unit = GET_SUB_UNIT(unit_mask); pm_regs.pm07_cntrl[ctr] = 0; pm_regs.pm07_cntrl[ctr] |= PM07_CTR_COUNT_CYCLES(count_cycles); pm_regs.pm07_cntrl[ctr] |= PM07_CTR_POLARITY(polarity); pm_regs.pm07_cntrl[ctr] |= PM07_CTR_INPUT_CONTROL(input_control); /* * Some of the islands signal selection is based on 64 bit words. * The debug bus words are 32 bits, the input words to the performance * counters are defined as 32 bits. Need to convert the 64 bit island * specification to the appropriate 32 input bit and bus word for the * performance counter event selection. See the CELL Performance * monitoring signals manual and the Perf cntr hardware descriptions * for the details. */ if (input_control == 0) { if (signal_bit > 31) { signal_bit -= 32; if (bus_word == 0x3) bus_word = 0x2; else if (bus_word == 0xc) bus_word = 0x8; } if ((bus_type == 0) && p->signal_group >= 60) bus_type = 2; if ((bus_type == 1) && p->signal_group >= 50) bus_type = 0; pm_regs.pm07_cntrl[ctr] |= PM07_CTR_INPUT_MUX(signal_bit); } else { pm_regs.pm07_cntrl[ctr] = 0; p->bit = signal_bit; } for (i = 0; i < NUM_DEBUG_BUS_WORDS; i++) { if (bus_word & (1 << i)) { pm_regs.debug_bus_control |= (bus_type << (30 - (2 * i))); for (j = 0; j < NUM_INPUT_BUS_WORDS; j++) { if (input_bus[j] == 0xff) { input_bus[j] = i; pm_regs.group_control |= (i << (30 - (2 * j))); break; } } } } out: ; }

Contributors

PersonTokensPropCommitsCommitProp
Maynard Johnson42297.01%240.00%
Bob Nelson122.76%240.00%
Carl E. Love10.23%120.00%
Total435100.00%5100.00%


static void write_pm_cntrl(int cpu) { /* * Oprofile will use 32 bit counters, set bits 7:10 to 0 * pmregs.pm_cntrl is a global */ u32 val = 0; if (pm_regs.pm_cntrl.enable == 1) val |= CBE_PM_ENABLE_PERF_MON; if (pm_regs.pm_cntrl.stop_at_max == 1) val |= CBE_PM_STOP_AT_MAX; if (pm_regs.pm_cntrl.trace_mode != 0) val |= CBE_PM_TRACE_MODE_SET(pm_regs.pm_cntrl.trace_mode); if (pm_regs.pm_cntrl.trace_buf_ovflw == 1) val |= CBE_PM_TRACE_BUF_OVFLW(pm_regs.pm_cntrl.trace_buf_ovflw); if (pm_regs.pm_cntrl.freeze == 1) val |= CBE_PM_FREEZE_ALL_CTRS; val |= CBE_PM_SPU_ADDR_TRACE_SET(pm_regs.pm_cntrl.spu_addr_trace); /* * Routine set_count_mode must be called previously to set * the count mode based on the user selection of user and kernel. */ val |= CBE_PM_COUNT_MODE_SET(pm_regs.pm_cntrl.count_mode); cbe_write_pm(cpu, pm_control, val); }

Contributors

PersonTokensPropCommitsCommitProp
Maynard Johnson7758.78%120.00%
Carl E. Love5239.69%360.00%
Bob Nelson21.53%120.00%
Total131100.00%5100.00%


static inline void set_count_mode(u32 kernel, u32 user) { /* * The user must specify user and kernel if they want them. If * neither is specified, OProfile will count in hypervisor mode. * pm_regs.pm_cntrl is a global */ if (kernel) { if (user) pm_regs.pm_cntrl.count_mode = CBE_COUNT_ALL_MODES; else pm_regs.pm_cntrl.count_mode = CBE_COUNT_SUPERVISOR_MODE; } else { if (user) pm_regs.pm_cntrl.count_mode = CBE_COUNT_PROBLEM_MODE; else pm_regs.pm_cntrl.count_mode = CBE_COUNT_HYPERVISOR_MODE; } }

Contributors

PersonTokensPropCommitsCommitProp
Maynard Johnson5280.00%133.33%
Carl E. Love1218.46%133.33%
Bob Nelson11.54%133.33%
Total65100.00%3100.00%


static inline void enable_ctr(u32 cpu, u32 ctr, u32 *pm07_cntrl) { pm07_cntrl[ctr] |= CBE_PM_CTR_ENABLE; cbe_write_pm07_control(cpu, ctr, pm07_cntrl[ctr]); }

Contributors

PersonTokensPropCommitsCommitProp
Maynard Johnson3597.22%150.00%
Carl E. Love12.78%150.00%
Total36100.00%2100.00%

/* * Oprofile is expected to collect data on all CPUs simultaneously. * However, there is one set of performance counters per node. There are * two hardware threads or virtual CPUs on each node. Hence, OProfile must * multiplex in time the performance counter collection on the two virtual * CPUs. The multiplexing of the performance counters is done by this * virtual counter routine. * * The pmc_values used below is defined as 'per-cpu' but its use is * more akin to 'per-node'. We need to store two sets of counter * values per node -- one for the previous run and one for the next. * The per-cpu[NR_PHYS_CTRS] gives us the storage we need. Each odd/even * pair of per-cpu arrays is used for storing the previous and next * pmc values for a given node. * NOTE: We use the per-cpu variable to improve cache performance. * * This routine will alternate loading the virtual counters for * virtual CPUs */
static void cell_virtual_cntr(unsigned long data) { int i, prev_hdw_thread, next_hdw_thread; u32 cpu; unsigned long flags; /* * Make sure that the interrupt_hander and the virt counter are * not both playing with the counters on the same node. */ spin_lock_irqsave(&cntr_lock, flags); prev_hdw_thread = hdw_thread; /* switch the cpu handling the interrupts */ hdw_thread = 1 ^ hdw_thread; next_hdw_thread = hdw_thread; pm_regs.group_control = 0; pm_regs.debug_bus_control = 0; for (i = 0; i < NUM_INPUT_BUS_WORDS; i++) input_bus[i] = 0xff; /* * There are some per thread events. Must do the * set event, for the thread that is being started */ for (i = 0; i < num_counters; i++) set_pm_event(i, pmc_cntrl[next_hdw_thread][i].evnts, pmc_cntrl[next_hdw_thread][i].masks); /* * The following is done only once per each node, but * we need cpu #, not node #, to pass to the cbe_xxx functions. */ for_each_online_cpu(cpu) { if (cbe_get_hw_thread_id(cpu)) continue; /* * stop counters, save counter values, restore counts * for previous thread */ cbe_disable_pm(cpu); cbe_disable_pm_interrupts(cpu); for (i = 0; i < num_counters; i++) { per_cpu(pmc_values, cpu + prev_hdw_thread)[i] = cbe_read_ctr(cpu, i); if (per_cpu(pmc_values, cpu + next_hdw_thread)[i] == 0xFFFFFFFF) /* If the cntr value is 0xffffffff, we must * reset that to 0xfffffff0 when the current * thread is restarted. This will generate a * new interrupt and make sure that we never * restore the counters to the max value. If * the counters were restored to the max value, * they do not increment and no interrupts are * generated. Hence no more samples will be * collected on that cpu. */ cbe_write_ctr(cpu, i, 0xFFFFFFF0); else cbe_write_ctr(cpu, i, per_cpu(pmc_values, cpu + next_hdw_thread)[i]); } /* * Switch to the other thread. Change the interrupt * and control regs to be scheduled on the CPU * corresponding to the thread to execute. */ for (i = 0; i < num_counters; i++) { if (pmc_cntrl[next_hdw_thread][i].enabled) { /* * There are some per thread events. * Must do the set event, enable_cntr * for each cpu. */ enable_ctr(cpu, i, pm_regs.pm07_cntrl); } else { cbe_write_pm07_control(cpu, i, 0); } } /* Enable interrupts on the CPU thread that is starting */ cbe_enable_pm_interrupts(cpu, next_hdw_thread, virt_cntr_inter_mask); cbe_enable_pm(cpu); } spin_unlock_irqrestore(&cntr_lock, flags); mod_timer(&timer_virt_cntr, jiffies + HZ / 10); }

Contributors

PersonTokensPropCommitsCommitProp
Maynard Johnson23574.84%120.00%
Carl E. Love4012.74%240.00%
Bob Nelson3912.42%240.00%
Total314100.00%5100.00%


static void start_virt_cntrs(void) { init_timer(&timer_virt_cntr); timer_virt_cntr.function = cell_virtual_cntr; timer_virt_cntr.data = 0UL; timer_virt_cntr.expires = jiffies + HZ / 10; add_timer(&timer_virt_cntr); }

Contributors

PersonTokensPropCommitsCommitProp
Maynard Johnson42100.00%1100.00%
Total42100.00%1100.00%


static int cell_reg_setup_spu_cycles(struct op_counter_config *ctr, struct op_system_config *sys, int num_ctrs) { spu_cycle_reset = ctr[0].count; /* * Each node will need to make the rtas call to start * and stop SPU profiling. Get the token once and store it. */ spu_rtas_token = rtas_token("ibm,cbe-spu-perftools"); if (unlikely(spu_rtas_token == RTAS_UNKNOWN_SERVICE)) { printk(KERN_ERR "%s: rtas token ibm,cbe-spu-perftools unknown\n", __func__); return -EIO; } return 0; }

Contributors

PersonTokensPropCommitsCommitProp
Bob Nelson4267.74%125.00%
Maynard Johnson1625.81%125.00%
Carl E. Love34.84%125.00%
Harvey Harrison11.61%125.00%
Total62100.00%4100.00%

/* Unfortunately, the hardware will only support event profiling * on one SPU per node at a time. Therefore, we must time slice * the profiling across all SPUs in the node. Note, we do this * in parallel for each node. The following routine is called * periodically based on kernel timer to switch which SPU is * being monitored in a round robbin fashion. */
static void spu_evnt_swap(unsigned long data) { int node; int cur_phys_spu, nxt_phys_spu, cur_spu_evnt_phys_spu_indx; unsigned long flags; int cpu; int ret; u32 interrupt_mask; /* enable interrupts on cntr 0 */ interrupt_mask = CBE_PM_CTR_OVERFLOW_INTR(0); hdw_thread = 0; /* Make sure spu event interrupt handler and spu event swap * don't access the counters simultaneously. */ spin_lock_irqsave(&cntr_lock, flags); cur_spu_evnt_phys_spu_indx = spu_evnt_phys_spu_indx; if (++(spu_evnt_phys_spu_indx) == NUM_SPUS_PER_NODE) spu_evnt_phys_spu_indx = 0; pm_signal[0].sub_unit = spu_evnt_phys_spu_indx; pm_signal[1].sub_unit = spu_evnt_phys_spu_indx; pm_signal[2].sub_unit = spu_evnt_phys_spu_indx; /* switch the SPU being profiled on each node */ for_each_online_cpu(cpu) { if (cbe_get_hw_thread_id(cpu)) continue; node = cbe_cpu_to_node(cpu); cur_phys_spu = (node * NUM_SPUS_PER_NODE) + cur_spu_evnt_phys_spu_indx; nxt_phys_spu = (node * NUM_SPUS_PER_NODE) + spu_evnt_phys_spu_indx; /* * stop counters, save counter values, restore counts * for previous physical SPU */ cbe_disable_pm(cpu); cbe_disable_pm_interrupts(cpu); spu_pm_cnt[cur_phys_spu] = cbe_read_ctr(cpu, 0); /* restore previous count for the next spu to sample */ /* NOTE, hardware issue, counter will not start if the * counter value is at max (0xFFFFFFFF). */ if (spu_pm_cnt[nxt_phys_spu] >= 0xFFFFFFFF) cbe_write_ctr(cpu, 0, 0xFFFFFFF0); else cbe_write_ctr(cpu, 0, spu_pm_cnt[nxt_phys_spu]); pm_rtas_reset_signals(cbe_cpu_to_node(cpu)); /* setup the debug bus measure the one event and * the two events to route the next SPU's PC on * the debug bus */ ret = pm_rtas_activate_signals(cbe_cpu_to_node(cpu), 3); if (ret) printk(KERN_ERR "%s: pm_rtas_activate_signals failed, " "SPU event swap\n", __func__); /* clear the trace buffer, don't want to take PC for * previous SPU*/ cbe_write_pm(cpu, trace_address, 0); enable_ctr(cpu, 0, pm_regs.pm07_cntrl); /* Enable interrupts on the CPU thread that is starting */ cbe_enable_pm_interrupts(cpu, hdw_thread, interrupt_mask); cbe_enable_pm(cpu); } spin_unlock_irqrestore(&cntr_lock, flags); /* swap approximately every 0.1 seconds */ mod_timer(&timer_spu_event_swap, jiffies + HZ / 25); }

Contributors

PersonTokensPropCommitsCommitProp
Carl E. Love26190.94%360.00%
Maynard Johnson248.36%120.00%
Robert Richter20.70%120.00%
Total287100.00%5100.00%


static void start_spu_event_swap(void) { init_timer(&timer_spu_event_swap); timer_spu_event_swap.function = spu_evnt_swap; timer_spu_event_swap.data = 0UL; timer_spu_event_swap.expires = jiffies + HZ / 25; add_timer(&timer_spu_event_swap); }

Contributors

PersonTokensPropCommitsCommitProp
Carl E. Love42100.00%1100.00%
Total42100.00%1100.00%


static int cell_reg_setup_spu_events(struct op_counter_config *ctr, struct op_system_config *sys, int num_ctrs) { int i; /* routine is called once for all nodes */ spu_evnt_phys_spu_indx = 0; /* * For all events except PPU CYCLEs, each node will need to make * the rtas cbe-perftools call to setup and reset the debug bus. * Make the token lookup call once and store it in the global * variable pm_rtas_token. */ pm_rtas_token = rtas_token("ibm,cbe-perftools"); if (unlikely(pm_rtas_token == RTAS_UNKNOWN_SERVICE)) { printk(KERN_ERR "%s: rtas token ibm,cbe-perftools unknown\n", __func__); return -EIO; } /* setup the pm_control register settings, * settings will be written per node by the * cell_cpu_setup() function. */ pm_regs.pm_cntrl.trace_buf_ovflw = 1; /* Use the occurrence trace mode to have SPU PC saved * to the trace buffer. Occurrence data in trace buffer * is not used. Bit 2 must be set to store SPU addresses. */ pm_regs.pm_cntrl.trace_mode = 2; pm_regs.pm_cntrl.spu_addr_trace = 0x1; /* using debug bus event 2 & 3 */ /* setup the debug bus event array with the SPU PC routing events. * Note, pm_signal[0] will be filled in by set_pm_event() call below. */ pm_signal[1].signal_group = SPU_PROFILE_EVENT_ADDR / 100; pm_signal[1].bus_word = GET_BUS_WORD(SPU_PROFILE_EVENT_ADDR_MASK_A); pm_signal[1].bit = SPU_PROFILE_EVENT_ADDR % 100; pm_signal[1].sub_unit = spu_evnt_phys_spu_indx; pm_signal[2].signal_group = SPU_PROFILE_EVENT_ADDR / 100; pm_signal[2].bus_word = GET_BUS_WORD(SPU_PROFILE_EVENT_ADDR_MASK_B); pm_signal[2].bit = SPU_PROFILE_EVENT_ADDR % 100; pm_signal[2].sub_unit = spu_evnt_phys_spu_indx; /* Set the user selected spu event to profile on, * note, only one SPU profiling event is supported */ num_counters = 1; /* Only support one SPU event at a time */ set_pm_event(0, ctr[0].event, ctr[0].unit_mask); reset_value[0] = 0xFFFFFFFF - ctr[0].count; /* global, used by cell_cpu_setup */ ctr_enabled |= 1; /* Initialize the count for each SPU to the reset value */ for (i=0; i < MAX_NUMNODES * NUM_SPUS_PER_NODE; i++) spu_pm_cnt[i] = reset_value[0]; return 0; }

Contributors

PersonTokensPropCommitsCommitProp
Carl E. Love245100.00%1100.00%
Total245100.00%1100.00%


static int cell_reg_setup_ppu(struct op_counter_config *ctr, struct op_system_config *sys, int num_ctrs) { /* routine is called once for all nodes */ int i, j, cpu; num_counters = num_ctrs; if (unlikely(num_ctrs > NR_PHYS_CTRS)) { printk(KERN_ERR "%s: Oprofile, number of specified events " \ "exceeds number of physical counters\n", __func__); return -EIO; } set_count_mode(sys->enable_kernel, sys->enable_user); /* Setup the thread 0 events */ for (i = 0; i < num_ctrs; ++i) { pmc_cntrl[0][i].evnts = ctr[i].event; pmc_cntrl[0][i].masks = ctr[i].unit_mask; pmc_cntrl[0][i].enabled = ctr[i].enabled; pmc_cntrl[0][i].vcntr = i; for_each_possible_cpu(j) per_cpu(pmc_values, j)[i] = 0; } /* * Setup the thread 1 events, map the thread 0 event to the * equivalent thread 1 event. */ for (i = 0; i < num_ctrs; ++i) { if ((ctr[i].event >= 2100) && (ctr[i].event <= 2111)) pmc_cntrl[1][i].evnts = ctr[i].event + 19; else if (ctr[i].event == 2203) pmc_cntrl[1][i].evnts = ctr[i].event; else if ((ctr[i].event >= 2200) && (ctr[i].event <= 2215)) pmc_cntrl[1][i].evnts = ctr[i].event + 16; else pmc_cntrl[1][i].evnts = ctr[i].event; pmc_cntrl[1][i].masks = ctr[i].unit_mask; pmc_cntrl[1][i].enabled = ctr[i].enabled; pmc_cntrl[1][i].vcntr = i; } for (i = 0; i < NUM_INPUT_BUS_WORDS; i++) input_bus[i] = 0xff; /* * Our counters count up, and "count" refers to * how much before the next interrupt, and we interrupt * on overflow. So we calculate the starting value * which will give us "count" until overflow. * Then we set the events on the enabled counters. */ for (i = 0; i < num_counters; ++i) { /* start with virtual counter set 0 */ if (pmc_cntrl[0][i].enabled) { /* Using 32bit counters, reset max - count */ reset_value[i] = 0xFFFFFFFF - ctr[i].count; set_pm_event(i, pmc_cntrl[0][i].evnts, pmc_cntrl[0][i].masks); /* global, used by cell_cpu_setup */ ctr_enabled |= (1 << i); } } /* initialize the previous counts for the virtual cntrs */ for_each_online_cpu(cpu) for (i = 0; i < num_counters; ++i) { per_cpu(pmc_values, cpu)[i] = reset_value[i]; } return 0; }

Contributors

PersonTokensPropCommitsCommitProp
Maynard Johnson39679.84%125.00%
Carl E. Love9519.15%250.00%
Bob Nelson51.01%125.00%
Total496100.00%4100.00%

/* This function is called once for all cpus combined */
static int cell_reg_setup(struct op_counter_config *ctr, struct op_system_config *sys, int num_ctrs) { int ret=0; spu_cycle_reset = 0; /* initialize the spu_arr_trace value, will be reset if * doing spu event profiling. */ pm_regs.group_control = 0; pm_regs.debug_bus_control = 0; pm_regs.pm_cntrl.stop_at_max = 1; pm_regs.pm_cntrl.trace_mode = 0; pm_regs