 * Copyright (C) 2008 Advanced Micro Devices, Inc.
 * Author: Joerg Roedel <>
 * This program is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 as published
 * by the Free Software Foundation.
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * GNU General Public License for more details.
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA

#include <linux/scatterlist.h>
#include <linux/dma-mapping.h>
#include <linux/stacktrace.h>
#include <linux/dma-debug.h>
#include <linux/spinlock.h>
#include <linux/debugfs.h>
#include <linux/uaccess.h>
#include <linux/export.h>
#include <linux/device.h>
#include <linux/types.h>
#include <linux/sched.h>
#include <linux/ctype.h>
#include <linux/list.h>
#include <linux/slab.h>

#include <asm/sections.h>

#define HASH_SIZE       1024ULL

#define HASH_FN_SHIFT   13

#define HASH_FN_MASK    (HASH_SIZE - 1)

enum {

enum map_err_types {


 * struct dma_debug_entry - track a dma_map* or dma_alloc_coherent mapping
 * @list: node on pre-allocated free_entries list
 * @dev: 'dev' argument to dma_map_{page|single|sg} or dma_alloc_coherent
 * @type: single, page, sg, coherent
 * @pfn: page frame of the start address
 * @offset: offset of mapping relative to pfn
 * @size: length of the mapping
 * @direction: enum dma_data_direction
 * @sg_call_ents: 'nents' from dma_map_sg
 * @sg_mapped_ents: 'mapped_ents' from dma_map_sg
 * @map_err_type: track whether dma_mapping_error() was checked
 * @stacktrace: support backtraces when a violation is detected

struct dma_debug_entry {
struct list_head list;
struct device    *dev;
int              type;
unsigned long	 pfn;
size_t		 offset;
u64              dev_addr;
u64              size;
int              direction;
int		 sg_call_ents;
int		 sg_mapped_ents;
enum map_err_types  map_err_type;
struct		 stack_trace stacktrace;
unsigned long	 st_entries[DMA_DEBUG_STACKTRACE_ENTRIES];

typedef bool (*match_fn)(struct dma_debug_entry *, struct dma_debug_entry *);

struct hash_bucket {
struct list_head list;
spinlock_t lock;

/* Hash list to save the allocated dma addresses */

static struct hash_bucket dma_entry_hash[HASH_SIZE];
/* List of pre-allocated dma_debug_entry's */
static LIST_HEAD(free_entries);
/* Lock for the list above */
static DEFINE_SPINLOCK(free_entries_lock);

/* Global disable flag - will be set in case of an error */

static bool global_disable __read_mostly;

/* Early initialization disable flag, set at the end of dma_debug_init */

static bool dma_debug_initialized __read_mostly;

static inline bool dma_debug_disabled(void) { return global_disable || !dma_debug_initialized; }


static inline void dump_entry_trace(struct dma_debug_entry *entry) { #ifdef CONFIG_STACKTRACE if (entry) { pr_warning("Mapped at:\n"); print_stack_trace(&entry->stacktrace, 0); } #endif }


static bool driver_filter(struct device *dev) { struct device_driver *drv; unsigned long flags; bool ret; /* driver filter off */ if (likely(!current_driver_name[0])) return true; /* driver filter on and initialized */ if (current_driver && dev && dev->driver == current_driver) return true; /* driver filter on, but we can't filter on a NULL device... */ if (!dev) return false; if (current_driver || !current_driver_name[0]) return false; /* driver filter on but not yet initialized */ drv = dev->driver; if (!drv) return false; /* lock to protect against change of current_driver_name */ read_lock_irqsave(&driver_name_lock, flags); ret = false; if (drv->name && strncmp(current_driver_name, drv->name, NAME_MAX_LEN - 1) == 0) { current_driver = drv; ret = true; } read_unlock_irqrestore(&driver_name_lock, flags); return ret; }


#define err_printk(dev, entry, format, arg...) do { \ error_count += 1; \ if (driver_filter(dev) && \ (show_all_errors || show_num_errors > 0)) { \ WARN(1, "%s %s: " format, \ dev ? dev_driver_string(dev) : "NULL", \ dev ? dev_name(dev) : "NULL", ## arg); \ dump_entry_trace(entry); \ } \ if (!show_all_errors && show_num_errors > 0) \ show_num_errors -= 1; \ } while (0); /* * Hash related functions * * Every DMA-API request is saved into a struct dma_debug_entry. To * have quick access to these structs they are stored into a hash. */
static int hash_fn(struct dma_debug_entry *entry) { /* * Hash function is based on the dma address. * We use bits 20-27 here as the index into the hash */ return (entry->dev_addr >> HASH_FN_SHIFT) & HASH_FN_MASK; }


/* * Request exclusive access to a hash bucket for a given dma_debug_entry. */
static struct hash_bucket *get_hash_bucket(struct dma_debug_entry *entry, unsigned long *flags) __acquires(&dma_entry_hash[idx].lock


) { int idx = hash_fn(entry); unsigned long __flags; spin_lock_irqsave(&dma_entry_hash[idx].lock, __flags); *flags = __flags; return &dma_entry_hash[idx]; } /* * Give up exclusive access to the hash bucket */
static void put_hash_bucket(struct hash_bucket *bucket, unsigned long *flags) __releases(&bucket->lock


) { unsigned long __flags = *flags; spin_unlock_irqrestore(&bucket->lock, __flags); }
static bool exact_match(struct dma_debug_entry *a, struct dma_debug_entry *b) { return ((a->dev_addr == b->dev_addr) && (a->dev == b->dev)) ? true : false; }


static bool containing_match(struct dma_debug_entry *a, struct dma_debug_entry *b) { if (a->dev != b->dev) return false; if ((b->dev_addr <= a->dev_addr) && ((b->dev_addr + b->size) >= (a->dev_addr + a->size))) return true; return false; }


/* * Search a given entry in the hash bucket list */
static struct dma_debug_entry *__hash_bucket_find(struct hash_bucket *bucket, struct dma_debug_entry *ref, match_fn match) { struct dma_debug_entry *entry, *ret = NULL; int matches = 0, match_lvl, last_lvl = -1; list_for_each_entry(entry, &bucket->list, list) { if (!match(ref, entry)) continue; /* * Some drivers map the same physical address multiple * times. Without a hardware IOMMU this results in the * same device addresses being put into the dma-debug * hash multiple times too. This can result in false * positives being reported. Therefore we implement a * best-fit algorithm here which returns the entry from * the hash which fits best to the reference value * instead of the first-fit. */ matches += 1; match_lvl = 0; entry->size == ref->size ? ++match_lvl : 0; entry->type == ref->type ? ++match_lvl : 0; entry->direction == ref->direction ? ++match_lvl : 0; entry->sg_call_ents == ref->sg_call_ents ? ++match_lvl : 0; if (match_lvl == 4) { /* perfect-fit - return the result */ return entry; } else if (match_lvl > last_lvl) { /* * We found an entry that fits better then the * previous one or it is the 1st match. */ last_lvl = match_lvl; ret = entry; } } /* * If we have multiple matches but no perfect-fit, just return * NULL. */ ret = (matches == 1) ? ret : NULL; return ret; }


static struct dma_debug_entry *bucket_find_exact(struct hash_bucket *bucket, struct dma_debug_entry *ref) { return __hash_bucket_find(bucket, ref, exact_match); }


static struct dma_debug_entry *bucket_find_contain(struct hash_bucket **bucket, struct dma_debug_entry *ref, unsigned long *flags) { unsigned int max_range = dma_get_max_seg_size(ref->dev); struct dma_debug_entry *entry, index = *ref; unsigned int range = 0; while (range <= max_range) { entry = __hash_bucket_find(*bucket, ref, containing_match); if (entry) return entry; /* * Nothing found, go back a hash bucket */ put_hash_bucket(*bucket, flags); range += (1 << HASH_FN_SHIFT); index.dev_addr -= (1 << HASH_FN_SHIFT); *bucket = get_hash_bucket(&index, flags); } return NULL; }


/* * Add an entry to a hash bucket */
static void hash_bucket_add(struct hash_bucket *bucket, struct dma_debug_entry *entry) { list_add_tail(&entry->list, &bucket->list); }


/* * Remove entry from a hash bucket list */
static void hash_bucket_del(struct dma_debug_entry *entry) { list_del(&entry->list); }


static unsigned long long phys_addr(struct dma_debug_entry *entry) { return page_to_phys(pfn_to_page(entry->pfn)) + entry->offset; }


/* * Dump mapping entries for debugging purposes */
void debug_dma_dump_mappings(struct device *dev) { int idx; for (idx = 0; idx < HASH_SIZE; idx++) { struct hash_bucket *bucket = &dma_entry_hash[idx]; struct dma_debug_entry *entry; unsigned long flags; spin_lock_irqsave(&bucket->lock, flags); list_for_each_entry(entry, &bucket->list, list) { if (!dev || dev == entry->dev) { dev_info(entry->dev, "%s idx %d P=%Lx N=%lx D=%Lx L=%Lx %s %s\n", type2name[entry->type], idx, phys_addr(entry), entry->pfn, entry->dev_addr, entry->size, dir2name[entry->direction], maperr2str[entry->map_err_type]); } } spin_unlock_irqrestore(&bucket->lock, flags); } }


EXPORT_SYMBOL(debug_dma_dump_mappings); /* * For each mapping (initial cacheline in the case of * dma_alloc_coherent/dma_map_page, initial cacheline in each page of a * scatterlist, or the cacheline specified in dma_map_single) insert * into this tree using the cacheline as the key. At * dma_unmap_{single|sg|page} or dma_free_coherent delete the entry. If * the entry already exists at insertion time add a tag as a reference * count for the overlapping mappings. For now, the overlap tracking * just ensures that 'unmaps' balance 'maps' before marking the * cacheline idle, but we should also be flagging overlaps as an API * violation. * * Memory usage is mostly constrained by the maximum number of available * dma-debug entries in that we need a free dma_debug_entry before * inserting into the tree. In the case of dma_map_page and * dma_alloc_coherent there is only one dma_debug_entry and one * dma_active_cacheline entry to track per event. dma_map_sg(), on the * other hand, consumes a single dma_debug_entry, but inserts 'nents' * entries into the tree. * * At any time debug_dma_assert_idle() can be called to trigger a * warning if any cachelines in the given page are in the active set. */ static RADIX_TREE(dma_active_cacheline, GFP_NOWAIT); static DEFINE_SPINLOCK(radix_lock); #define ACTIVE_CACHELINE_MAX_OVERLAP ((1 << RADIX_TREE_MAX_TAGS) - 1) #define CACHELINE_PER_PAGE_SHIFT (PAGE_SHIFT - L1_CACHE_SHIFT) #define CACHELINES_PER_PAGE (1 << CACHELINE_PER_PAGE_SHIFT)
static phys_addr_t to_cacheline_number(struct dma_debug_entry *entry) { return (entry->pfn << CACHELINE_PER_PAGE_SHIFT) + (entry->offset >> L1_CACHE_SHIFT); }


static int active_cacheline_read_overlap(phys_addr_t cln) { int overlap = 0, i; for (i = RADIX_TREE_MAX_TAGS - 1; i >= 0; i--) if (radix_tree_tag_get(&dma_active_cacheline, cln, i)) overlap |= 1 << i; return overlap; }


static int active_cacheline_set_overlap(phys_addr_t cln, int overlap) { int i; if (overlap > ACTIVE_CACHELINE_MAX_OVERLAP || overlap < 0) return overlap; for (i = RADIX_TREE_MAX_TAGS - 1; i >= 0; i--) if (overlap & 1 << i) radix_tree_tag_set(&dma_active_cacheline, cln, i); else radix_tree_tag_clear(&dma_active_cacheline, cln, i); return overlap; }


static void active_cacheline_inc_overlap(phys_addr_t cln) { int overlap = active_cacheline_read_overlap(cln); overlap = active_cacheline_set_overlap(cln, ++overlap); /* If we overflowed the overlap counter then we're potentially * leaking dma-mappings. Otherwise, if maps and unmaps are * balanced then this overflow may cause false negatives in * debug_dma_assert_idle() as the cacheline may be marked idle * prematurely. */ WARN_ONCE(overlap > ACTIVE_CACHELINE_MAX_OVERLAP, "DMA-API: exceeded %d overlapping mappings of cacheline %pa\n", ACTIVE_CACHELINE_MAX_OVERLAP, &cln); }


static int active_cacheline_dec_overlap(phys_addr_t cln) { int overlap = active_cacheline_read_overlap(cln); return active_cacheline_set_overlap(cln, --overlap); }


static int active_cacheline_insert(struct dma_debug_entry *entry) { phys_addr_t cln = to_cacheline_number(entry); unsigned long flags; int rc; /* If the device is not writing memory then we don't have any * concerns about the cpu consuming stale data. This mitigates * legitimate usages of overlapping mappings. */ if (entry->direction == DMA_TO_DEVICE) return 0; spin_lock_irqsave(&radix_lock, flags); rc = radix_tree_insert(&dma_active_cacheline, cln, entry); if (rc == -EEXIST) active_cacheline_inc_overlap(cln); spin_unlock_irqrestore(&radix_lock, flags); return rc; }


static void active_cacheline_remove(struct dma_debug_entry *entry) { phys_addr_t cln = to_cacheline_number(entry); unsigned long flags; /* ...mirror the insert case */ if (entry->direction == DMA_TO_DEVICE) return; spin_lock_irqsave(&radix_lock, flags); /* since we are counting overlaps the final put of the * cacheline will occur when the overlap count is 0. * active_cacheline_dec_overlap() returns -1 in that case */ if (active_cacheline_dec_overlap(cln) < 0) radix_tree_delete(&dma_active_cacheline, cln); spin_unlock_irqrestore(&radix_lock, flags); }


/** * debug_dma_assert_idle() - assert that a page is not undergoing dma * @page: page to lookup in the dma_active_cacheline tree * * Place a call to this routine in cases where the cpu touching the page * before the dma completes (page is dma_unmapped) will lead to data * corruption. */
void debug_dma_assert_idle(struct page *page) { static struct dma_debug_entry *ents[CACHELINES_PER_PAGE]; struct dma_debug_entry *entry = NULL; void **results = (void **) &ents; unsigned int nents, i; unsigned long flags; phys_addr_t cln; if (dma_debug_disabled()) return; if (!page) return; cln = (phys_addr_t) page_to_pfn(page) << CACHELINE_PER_PAGE_SHIFT; spin_lock_irqsave(&radix_lock, flags); nents = radix_tree_gang_lookup(&dma_active_cacheline, results, cln, CACHELINES_PER_PAGE); for (i = 0; i < nents; i++) { phys_addr_t ent_cln = to_cacheline_number(ents[i]); if (ent_cln == cln) { entry = ents[i]; break; } else if (ent_cln >= cln + CACHELINES_PER_PAGE) break; } spin_unlock_irqrestore(&radix_lock, flags); if (!entry) return; cln = to_cacheline_number(entry); err_printk(entry->dev, entry, "DMA-API: cpu touching an active dma mapped cacheline [cln=%pa]\n", &cln); }


/* * Wrapper function for adding an entry to the hash. * This function takes care of locking itself. */
static void add_dma_entry(struct dma_debug_entry *entry) { struct hash_bucket *bucket; unsigned long flags; int rc; bucket = get_hash_bucket(entry, &flags); hash_bucket_add(bucket, entry); put_hash_bucket(bucket, &flags); rc = active_cacheline_insert(entry); if (rc == -ENOMEM) { pr_err("DMA-API: cacheline tracking ENOMEM, dma-debug disabled\n"); global_disable = true; } /* TODO: report -EEXIST errors here as overlapping mappings are * not supported by the DMA API */ }


static struct dma_debug_entry *__dma_entry_alloc(void) { struct dma_debug_entry *entry; entry = list_entry(, struct dma_debug_entry, list); list_del(&entry->list); memset(entry, 0, sizeof(*entry)); num_free_entries -= 1; if (num_free_entries < min_free_entries) min_free_entries = num_free_entries; return entry; }


/* struct dma_entry allocator * * The next two functions implement the allocator for * struct dma_debug_entries. */
static struct dma_debug_entry *dma_entry_alloc(void) { struct dma_debug_entry *entry; unsigned long flags; spin_lock_irqsave(&free_entries_lock, flags); if (list_empty(&free_entries)) { global_disable = true; spin_unlock_irqrestore(&free_entries_lock, flags); pr_err("DMA-API: debugging out of memory - disabling\n"); return NULL; } entry = __dma_entry_alloc(); spin_unlock_irqrestore(&free_entries_lock, flags); #ifdef CONFIG_STACKTRACE entry->stacktrace.max_entries = DMA_DEBUG_STACKTRACE_ENTRIES; entry->stacktrace.entries = entry->st_entries; entry->stacktrace.skip = 2; save_stack_trace(&entry->stacktrace); #endif return entry; }


static void dma_entry_free(struct dma_debug_entry *entry) { unsigned long flags; active_cacheline_remove(entry); /* * add to beginning of the list - this way the entries are * more likely cache hot when they are reallocated. */ spin_lock_irqsave(&free_entries_lock, flags); list_add(&entry->list, &free_entries); num_free_entries += 1; spin_unlock_irqrestore(&free_entries_lock, flags); }


int dma_debug_resize_entries(u32 num_entries) { int i, delta, ret = 0; unsigned long flags; struct dma_debug_entry *entry; LIST_HEAD(tmp); spin_lock_irqsave(&free_entries_lock, flags); if (nr_total_entries < num_entries) { delta = num_entries - nr_total_entries; spin_unlock_irqrestore(&free_entries_lock, flags); for (i = 0; i < delta; i++) { entry = kzalloc(sizeof(*entry), GFP_KERNEL); if (!entry) break; list_add_tail(&entry->list, &tmp); } spin_lock_irqsave(&free_entries_lock, flags); list_splice(&tmp, &free_entries); nr_total_entries += i; num_free_entries += i; } else { delta = nr_total_entries - num_entries; for (i = 0; i < delta && !list_empty(&free_entries); i++) { entry = __dma_entry_alloc(); kfree(entry); } nr_total_entries -= i; } if (nr_total_entries != num_entries) ret = 1; spin_unlock_irqrestore(&free_entries_lock, flags); return ret; }


EXPORT_SYMBOL(dma_debug_resize_entries); /* * DMA-API debugging init code * * The init code does two things: * 1. Initialize core data structures * 2. Preallocate a given number of dma_debug_entry structs */
static int prealloc_memory(u32 num_entries) { struct dma_debug_entry *entry, *next_entry; int i; for (i = 0; i < num_entries; ++i) { entry = kzalloc(sizeof(*entry), GFP_KERNEL); if (!entry) goto out_err; list_add_tail(&entry->list, &free_entries); } num_free_entries = num_entries; min_free_entries = num_entries; pr_info("DMA-API: preallocated %d debug entries\n", num_entries); return 0; out_err: list_for_each_entry_safe(entry, next_entry, &free_entries, list) { list_del(&entry->list); kfree(entry); } return -ENOMEM; }


static ssize_t filter_read(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) { char buf[NAME_MAX_LEN + 1]; unsigned long flags; int len; if (!current_driver_name[0]) return 0; /* * We can't copy to userspace directly because current_driver_name can * only be read under the driver_name_lock with irqs disabled. So * create a temporary copy first. */ read_lock_irqsave(&driver_name_lock, flags); len = scnprintf(buf, NAME_MAX_LEN + 1, "%s\n", current_driver_name); read_unlock_irqrestore(&driver_name_lock, flags); return simple_read_from_buffer(user_buf, count, ppos, buf, len); }


static ssize_t filter_write(struct file *file, const char __user *userbuf, size_t count, loff_t *ppos) { char buf[NAME_MAX_LEN]; unsigned long flags; size_t len; int i; /* * We can't copy from userspace directly. Access to * current_driver_name is protected with a write_lock with irqs * disabled. Since copy_from_user can fault and may sleep we * need to copy to temporary buffer first */ len = min(count, (size_t)(NAME_MAX_LEN - 1)); if (copy_from_user(buf, userbuf, len)) return -EFAULT; buf[len] = 0; write_lock_irqsave(&driver_name_lock, flags); /* * Now handle the string we got from userspace very carefully. * The rules are: * - only use the first token we got * - token delimiter is everything looking like a space * character (' ', '\n', '\t' ...) * */ if (!isalnum(buf[0])