Release 4.11 drivers/edac/skx_edac.c
/*
* EDAC driver for Intel(R) Xeon(R) Skylake processors
* Copyright (c) 2016, Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#include <linux/module.h>
#include <linux/init.h>
#include <linux/pci.h>
#include <linux/pci_ids.h>
#include <linux/slab.h>
#include <linux/delay.h>
#include <linux/edac.h>
#include <linux/mmzone.h>
#include <linux/smp.h>
#include <linux/bitmap.h>
#include <linux/math64.h>
#include <linux/mod_devicetable.h>
#include <asm/cpu_device_id.h>
#include <asm/intel-family.h>
#include <asm/processor.h>
#include <asm/mce.h>
#include "edac_module.h"
#define SKX_REVISION " Ver: 1.0 "
/*
* Debug macros
*/
#define skx_printk(level, fmt, arg...) \
edac_printk(level, "skx", fmt, ##arg)
#define skx_mc_printk(mci, level, fmt, arg...) \
edac_mc_chipset_printk(mci, level, "skx", fmt, ##arg)
/*
* Get a bit field at register value <v>, from bit <lo> to bit <hi>
*/
#define GET_BITFIELD(v, lo, hi) \
(((v) & GENMASK_ULL((hi), (lo))) >> (lo))
static LIST_HEAD(skx_edac_list);
static u64 skx_tolm, skx_tohm;
#define NUM_IMC 2
/* memory controllers per socket */
#define NUM_CHANNELS 3
/* channels per memory controller */
#define NUM_DIMMS 2
/* Max DIMMS per channel */
#define MASK26 0x3FFFFFF
/* Mask for 2^26 */
#define MASK29 0x1FFFFFFF
/* Mask for 2^29 */
/*
* Each cpu socket contains some pci devices that provide global
* information, and also some that are local to each of the two
* memory controllers on the die.
*/
struct skx_dev {
struct list_head list;
u8 bus[4];
struct pci_dev *sad_all;
struct pci_dev *util_all;
u32 mcroute;
struct skx_imc {
struct mem_ctl_info *mci;
u8 mc; /* system wide mc# */
u8 lmc; /* socket relative mc# */
u8 src_id, node_id;
struct skx_channel {
struct pci_dev *cdev;
struct skx_dimm {
u8 close_pg;
u8 bank_xor_enable;
u8 fine_grain_bank;
u8 rowbits;
u8 colbits;
}
dimms[NUM_DIMMS];
}
chan[NUM_CHANNELS];
}
imc[NUM_IMC];
};
static int skx_num_sockets;
struct skx_pvt {
struct skx_imc *imc;
};
struct decoded_addr {
struct skx_dev *dev;
u64 addr;
int socket;
int imc;
int channel;
u64 chan_addr;
int sktways;
int chanways;
int dimm;
int rank;
int channel_rank;
u64 rank_address;
int row;
int column;
int bank_address;
int bank_group;
};
static struct skx_dev *get_skx_dev(u8 bus, u8 idx)
{
struct skx_dev *d;
list_for_each_entry(d, &skx_edac_list, list) {
if (d->bus[idx] == bus)
return d;
}
return NULL;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Tony Luck | 46 | 100.00% | 1 | 100.00% |
Total | 46 | 100.00% | 1 | 100.00% |
enum munittype {
CHAN0, CHAN1, CHAN2, SAD_ALL, UTIL_ALL, SAD
};
struct munit {
u16 did;
u16 devfn[NUM_IMC];
u8 busidx;
u8 per_socket;
enum munittype mtype;
};
/*
* List of PCI device ids that we need together with some device
* number and function numbers to tell which memory controller the
* device belongs to.
*/
static const struct munit skx_all_munits[] = {
{ 0x2054, { }, 1, 1, SAD_ALL },
{ 0x2055, { }, 1, 1, UTIL_ALL },
{ 0x2040, { PCI_DEVFN(10, 0), PCI_DEVFN(12, 0) }, 2, 2, CHAN0 },
{ 0x2044, { PCI_DEVFN(10, 4), PCI_DEVFN(12, 4) }, 2, 2, CHAN1 },
{ 0x2048, { PCI_DEVFN(11, 0), PCI_DEVFN(13, 0) }, 2, 2, CHAN2 },
{ 0x208e, { }, 1, 0, SAD },
{ }
};
/*
* We use the per-socket device 0x2016 to count how many sockets are present,
* and to detemine which PCI buses are associated with each socket. Allocate
* and build the full list of all the skx_dev structures that we need here.
*/
static int get_all_bus_mappings(void)
{
struct pci_dev *pdev, *prev;
struct skx_dev *d;
u32 reg;
int ndev = 0;
prev = NULL;
for (;;) {
pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x2016, prev);
if (!pdev)
break;
ndev++;
d = kzalloc(sizeof(*d), GFP_KERNEL);
if (!d) {
pci_dev_put(pdev);
return -ENOMEM;
}
pci_read_config_dword(pdev, 0xCC, ®);
d->bus[0] = GET_BITFIELD(reg, 0, 7);
d->bus[1] = GET_BITFIELD(reg, 8, 15);
d->bus[2] = GET_BITFIELD(reg, 16, 23);
d->bus[3] = GET_BITFIELD(reg, 24, 31);
edac_dbg(2, "busses: %x, %x, %x, %x\n",
d->bus[0], d->bus[1], d->bus[2], d->bus[3]);
list_add_tail(&d->list, &skx_edac_list);
skx_num_sockets++;
prev = pdev;
}
return ndev;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Tony Luck | 219 | 100.00% | 1 | 100.00% |
Total | 219 | 100.00% | 1 | 100.00% |
static int get_all_munits(const struct munit *m)
{
struct pci_dev *pdev, *prev;
struct skx_dev *d;
u32 reg;
int i = 0, ndev = 0;
prev = NULL;
for (;;) {
pdev = pci_get_device(PCI_VENDOR_ID_INTEL, m->did, prev);
if (!pdev)
break;
ndev++;
if (m->per_socket == NUM_IMC) {
for (i = 0; i < NUM_IMC; i++)
if (m->devfn[i] == pdev->devfn)
break;
if (i == NUM_IMC)
goto fail;
}
d = get_skx_dev(pdev->bus->number, m->busidx);
if (!d)
goto fail;
/* Be sure that the device is enabled */
if (unlikely(pci_enable_device(pdev) < 0)) {
skx_printk(KERN_ERR,
"Couldn't enable %04x:%04x\n", PCI_VENDOR_ID_INTEL, m->did);
goto fail;
}
switch (m->mtype) {
case CHAN0: case CHAN1: case CHAN2:
pci_dev_get(pdev);
d->imc[i].chan[m->mtype].cdev = pdev;
break;
case SAD_ALL:
pci_dev_get(pdev);
d->sad_all = pdev;
break;
case UTIL_ALL:
pci_dev_get(pdev);
d->util_all = pdev;
break;
case SAD:
/*
* one of these devices per core, including cores
* that don't exist on this SKU. Ignore any that
* read a route table of zero, make sure all the
* non-zero values match.
*/
pci_read_config_dword(pdev, 0xB4, ®);
if (reg != 0) {
if (d->mcroute == 0)
d->mcroute = reg;
else if (d->mcroute != reg) {
skx_printk(KERN_ERR,
"mcroute mismatch\n");
goto fail;
}
}
ndev--;
break;
}
prev = pdev;
}
return ndev;
fail:
pci_dev_put(pdev);
return -ENODEV;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Tony Luck | 320 | 100.00% | 1 | 100.00% |
Total | 320 | 100.00% | 1 | 100.00% |
static const struct x86_cpu_id skx_cpuids[] = {
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_SKYLAKE_X, 0, 0 },
{ }
};
MODULE_DEVICE_TABLE(x86cpu, skx_cpuids);
static u8 get_src_id(struct skx_dev *d)
{
u32 reg;
pci_read_config_dword(d->util_all, 0xF0, ®);
return GET_BITFIELD(reg, 12, 14);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Tony Luck | 36 | 100.00% | 1 | 100.00% |
Total | 36 | 100.00% | 1 | 100.00% |
static u8 skx_get_node_id(struct skx_dev *d)
{
u32 reg;
pci_read_config_dword(d->util_all, 0xF4, ®);
return GET_BITFIELD(reg, 0, 2);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Tony Luck | 36 | 100.00% | 1 | 100.00% |
Total | 36 | 100.00% | 1 | 100.00% |
static int get_dimm_attr(u32 reg, int lobit, int hibit, int add, int minval,
int maxval, char *name)
{
u32 val = GET_BITFIELD(reg, lobit, hibit);
if (val < minval || val > maxval) {
edac_dbg(2, "bad %s = %d (raw=%x)\n", name, val, reg);
return -EINVAL;
}
return val + add;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Tony Luck | 74 | 100.00% | 1 | 100.00% |
Total | 74 | 100.00% | 1 | 100.00% |
#define IS_DIMM_PRESENT(mtr) GET_BITFIELD((mtr), 15, 15)
#define numrank(reg) get_dimm_attr((reg), 12, 13, 0, 1, 2, "ranks")
#define numrow(reg) get_dimm_attr((reg), 2, 4, 12, 1, 6, "rows")
#define numcol(reg) get_dimm_attr((reg), 0, 1, 10, 0, 2, "cols")
static int get_width(u32 mtr)
{
switch (GET_BITFIELD(mtr, 8, 9)) {
case 0:
return DEV_X4;
case 1:
return DEV_X8;
case 2:
return DEV_X16;
}
return DEV_UNKNOWN;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Tony Luck | 43 | 100.00% | 1 | 100.00% |
Total | 43 | 100.00% | 1 | 100.00% |
static int skx_get_hi_lo(void)
{
struct pci_dev *pdev;
u32 reg;
pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x2034, NULL);
if (!pdev) {
edac_dbg(0, "Can't get tolm/tohm\n");
return -ENODEV;
}
pci_read_config_dword(pdev, 0xD0, ®);
skx_tolm = reg;
pci_read_config_dword(pdev, 0xD4, ®);
skx_tohm = reg;
pci_read_config_dword(pdev, 0xD8, ®);
skx_tohm |= (u64)reg << 32;
pci_dev_put(pdev);
edac_dbg(2, "tolm=%llx tohm=%llx\n", skx_tolm, skx_tohm);
return 0;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Tony Luck | 111 | 100.00% | 1 | 100.00% |
Total | 111 | 100.00% | 1 | 100.00% |
static int get_dimm_info(u32 mtr, u32 amap, struct dimm_info *dimm,
struct skx_imc *imc, int chan, int dimmno)
{
int banks = 16, ranks, rows, cols, npages;
u64 size;
if (!IS_DIMM_PRESENT(mtr))
return 0;
ranks = numrank(mtr);
rows = numrow(mtr);
cols = numcol(mtr);
/*
* Compute size in 8-byte (2^3) words, then shift to MiB (2^20)
*/
size = ((1ull << (rows + cols + ranks)) * banks) >> (20 - 3);
npages = MiB_TO_PAGES(size);
edac_dbg(0, "mc#%d: channel %d, dimm %d, %lld Mb (%d pages) bank: %d, rank: %d, row: %#x, col: %#x\n",
imc->mc, chan, dimmno, size, npages,
banks, ranks, rows, cols);
imc->chan[chan].dimms[dimmno].close_pg = GET_BITFIELD(mtr, 0, 0);
imc->chan[chan].dimms[dimmno].bank_xor_enable = GET_BITFIELD(mtr, 9, 9);
imc->chan[chan].dimms[dimmno].fine_grain_bank = GET_BITFIELD(amap, 0, 0);
imc->chan[chan].dimms[dimmno].rowbits = rows;
imc->chan[chan].dimms[dimmno].colbits = cols;
dimm->nr_pages = npages;
dimm->grain = 32;
dimm->dtype = get_width(mtr);
dimm->mtype = MEM_DDR4;
dimm->edac_mode = EDAC_SECDED; /* likely better than this */
snprintf(dimm->label, sizeof(dimm->label), "CPU_SrcID#%u_MC#%u_Chan#%u_DIMM#%u",
imc->src_id, imc->lmc, chan, dimmno);
return 1;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Tony Luck | 301 | 100.00% | 1 | 100.00% |
Total | 301 | 100.00% | 1 | 100.00% |
#define SKX_GET_MTMTR(dev, reg) \
pci_read_config_dword((dev), 0x87c, ®)
static bool skx_check_ecc(struct pci_dev *pdev)
{
u32 mtmtr;
SKX_GET_MTMTR(pdev, mtmtr);
return !!GET_BITFIELD(mtmtr, 2, 2);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Tony Luck | 33 | 100.00% | 1 | 100.00% |
Total | 33 | 100.00% | 1 | 100.00% |
static int skx_get_dimm_config(struct mem_ctl_info *mci)
{
struct skx_pvt *pvt = mci->pvt_info;
struct skx_imc *imc = pvt->imc;
struct dimm_info *dimm;
int i, j;
u32 mtr, amap;
int ndimms;
for (i = 0; i < NUM_CHANNELS; i++) {
ndimms = 0;
pci_read_config_dword(imc->chan[i].cdev, 0x8C, &amap);
for (j = 0; j < NUM_DIMMS; j++) {
dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms,
mci->n_layers, i, j, 0);
pci_read_config_dword(imc->chan[i].cdev,
0x80 + 4*j, &mtr);
ndimms += get_dimm_info(mtr, amap, dimm, imc, i, j);
}
if (ndimms && !skx_check_ecc(imc->chan[0].cdev)) {
skx_printk(KERN_ERR, "ECC is disabled on imc %d\n", imc->mc);
return -ENODEV;
}
}
return 0;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Tony Luck | 196 | 100.00% | 1 | 100.00% |
Total | 196 | 100.00% | 1 | 100.00% |
static void skx_unregister_mci(struct skx_imc *imc)
{
struct mem_ctl_info *mci = imc->mci;
if (!mci)
return;
edac_dbg(0, "MC%d: mci = %p\n", imc->mc, mci);
/* Remove MC sysfs nodes */
edac_mc_del_mc(mci->pdev);
edac_dbg(1, "%s: free mci struct\n", mci->ctl_name);
kfree(mci->ctl_name);
edac_mc_free(mci);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Tony Luck | 70 | 100.00% | 1 | 100.00% |
Total | 70 | 100.00% | 1 | 100.00% |
static int skx_register_mci(struct skx_imc *imc)
{
struct mem_ctl_info *mci;
struct edac_mc_layer layers[2];
struct pci_dev *pdev = imc->chan[0].cdev;
struct skx_pvt *pvt;
int rc;
/* allocate a new MC control structure */
layers[0].type = EDAC_MC_LAYER_CHANNEL;
layers[0].size = NUM_CHANNELS;
layers[0].is_virt_csrow = false;
layers[1].type = EDAC_MC_LAYER_SLOT;
layers[1].size = NUM_DIMMS;
layers[1].is_virt_csrow = true;
mci = edac_mc_alloc(imc->mc, ARRAY_SIZE(layers), layers,
sizeof(struct skx_pvt));
if (unlikely(!mci))
return -ENOMEM;
edac_dbg(0, "MC#%d: mci = %p\n", imc->mc, mci);
/* Associate skx_dev and mci for future usage */
imc->mci = mci;
pvt = mci->pvt_info;
pvt->imc = imc;
mci->ctl_name = kasprintf(GFP_KERNEL, "Skylake Socket#%d IMC#%d",
imc->node_id, imc->lmc);
mci->mtype_cap = MEM_FLAG_DDR4;
mci->edac_ctl_cap = EDAC_FLAG_NONE;
mci->edac_cap = EDAC_FLAG_NONE;
mci->mod_name = "skx_edac.c";
mci->dev_name = pci_name(imc->chan[0].cdev);
mci->mod_ver = SKX_REVISION;
mci->ctl_page_to_phys = NULL;
rc = skx_get_dimm_config(mci);
if (rc < 0)
goto fail;
/* record ptr to the generic device */
mci->pdev = &pdev->dev;
/* add this new MC control structure to EDAC's list of MCs */
if (unlikely(edac_mc_add_mc(mci))) {
edac_dbg(0, "MC: failed edac_mc_add_mc()\n");
rc = -EINVAL;
goto fail;
}
return 0;
fail:
kfree(mci->ctl_name);
edac_mc_free(mci);
imc->mci = NULL;
return rc;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Tony Luck | 317 | 100.00% | 1 | 100.00% |
Total | 317 | 100.00% | 1 | 100.00% |
#define SKX_MAX_SAD 24
#define SKX_GET_SAD(d, i, reg) \
pci_read_config_dword((d)->sad_all, 0x60 + 8 * (i), ®)
#define SKX_GET_ILV(d, i, reg) \
pci_read_config_dword((d)->sad_all, 0x64 + 8 * (i), ®)
#define SKX_SAD_MOD3MODE(sad) GET_BITFIELD((sad), 30, 31)
#define SKX_SAD_MOD3(sad) GET_BITFIELD((sad), 27, 27)
#define SKX_SAD_LIMIT(sad) (((u64)GET_BITFIELD((sad), 7, 26) << 26) | MASK26)
#define SKX_SAD_MOD3ASMOD2(sad) GET_BITFIELD((sad), 5, 6)
#define SKX_SAD_ATTR(sad) GET_BITFIELD((sad), 3, 4)
#define SKX_SAD_INTERLEAVE(sad) GET_BITFIELD((sad), 1, 2)
#define SKX_SAD_ENABLE(sad) GET_BITFIELD((sad), 0, 0)
#define SKX_ILV_REMOTE(tgt) (((tgt) & 8) == 0)
#define SKX_ILV_TARGET(tgt) ((tgt) & 7)
static bool skx_sad_decode(struct decoded_addr *res)
{
struct skx_dev *d = list_first_entry(&skx_edac_list, typeof(*d), list);
u64 addr = res->addr;
int i, idx, tgt, lchan, shift;
u32 sad, ilv;
u64 limit, prev_limit;
int remote = 0;
/* Simple sanity check for I/O space or out of range */
if (addr >= skx_tohm || (addr >= skx_tolm && addr < BIT_ULL(32))) {
edac_dbg(0, "Address %llx out of range\n", addr);
return false;
}
restart:
prev_limit = 0;
for (i = 0; i < SKX_MAX_SAD; i++) {
SKX_GET_SAD(d, i, sad);
limit = SKX_SAD_LIMIT(sad);
if (SKX_SAD_ENABLE(sad)) {
if (addr >= prev_limit && addr <= limit)
goto sad_found;
}
prev_limit = limit + 1;
}
edac_dbg(0, "No SAD entry for %llx\n", addr);
return false;
sad_found:
SKX_GET_ILV(d, i, ilv);
switch (SKX_SAD_INTERLEAVE(sad)) {
case 0:
idx = GET_BITFIELD(addr, 6, 8);
break;
case 1:
idx = GET_BITFIELD(addr, 8, 10);
break;
case 2:
idx = GET_BITFIELD(addr, 12, 14);
break;
case 3:
idx = GET_BITFIELD(addr, 30, 32);
break;
}
tgt = GET_BITFIELD(ilv, 4 * idx, 4 * idx + 3);
/* If point to another node, find it and start over */
if (SKX_ILV_REMOTE(tgt)) {
if (remote) {
edac_dbg(0, "Double remote!\n");
return false;
}
remote = 1;
list_for_each_entry(d, &skx_edac_list, list) {
if (d->imc[0].src_id == SKX_ILV_TARGET(tgt))
goto restart;
}
edac_dbg(0, "Can't find node %d\n", SKX_ILV_TARGET(tgt));
return false;
}
if (SKX_SAD_MOD3(sad) == 0)
lchan = SKX_ILV_TARGET(tgt);
else {
switch (SKX_SAD_MOD3MODE(sad)) {
case 0:
shift = 6;
break;
case 1:
shift = 8;
break;
case 2:
shift = 12;
break;
default:
edac_dbg(0, "illegal mod3mode\n");
return false;
}
switch (SKX_SAD_MOD3ASMOD2(sad)) {
case 0:
lchan = (addr >> shift) % 3;
break;
case 1:
lchan = (addr >> shift) % 2;
break;
case 2:
lchan = (addr >> shift) % 2;
lchan = (lchan << 1) | ~lchan;
break;
case 3:
lchan = ((addr >> shift) % 2) << 1;
break;
}
lchan = (lchan << 1) | (SKX_ILV_TARGET(tgt) & 1);
}
res->dev = d;
res->socket = d->imc[0].src_id;
res->imc = GET_BITFIELD(d->mcroute, lchan * 3, lchan * 3 + 2);
res->channel = GET_BITFIELD(d->mcroute, lchan * 2 + 18, lchan * 2 + 19);
edac_dbg(2, "%llx: socket=%d imc=%d channel=%d\n",
res->addr, res->socket, res->imc, res->channel);
return true;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Tony Luck | 594 | 100.00% | 1 | 100.00% |
Total | 594 | 100.00% | 1 | 100.00% |
#define SKX_MAX_TAD 8
#define SKX_GET_TADBASE(d, mc, i, reg) \
pci_read_config_dword((d)->imc[mc].chan[0].cdev, 0x850 + 4 * (i), ®)
#define SKX_GET_TADWAYNESS(d, mc, i, reg) \
pci_read_config_dword((d)->imc[mc].chan[0].cdev, 0x880 + 4 * (i), ®)
#define SKX_GET_TADCHNILVOFFSET(d, mc, ch, i, reg