~ljy/RK3588_XEN.git

/*
 * Copyright (C) 2010 Philippe Gerum <rpm@xenomai.org>.
 *
 * Xenomai is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published
 * by the Free Software Foundation; either version 2 of the License,
 * or (at your option) any later version.
 *
 * Xenomai is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Xenomai; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
 * 02111-1307, USA.
 */
#include <linux/types.h>
#include <linux/limits.h>
#include <linux/ctype.h>
#include <linux/jhash.h>
#include <linux/mm.h>
#include <linux/signal.h>
#include <linux/vmalloc.h>
#include <cobalt/kernel/sched.h>
#include <cobalt/kernel/heap.h>
#include <cobalt/kernel/clock.h>
#include <cobalt/kernel/ppd.h>
#include <cobalt/uapi/signal.h>
#include <asm/xenomai/syscall.h>
#include "posix/process.h"
#include "debug.h"
 
/**
 * @ingroup cobalt_core
 * @defgroup cobalt_core_debug Debugging services
 * @{
 */
struct xnvfile_directory cobalt_debug_vfroot;
EXPORT_SYMBOL_GPL(cobalt_debug_vfroot);
 
#ifdef CONFIG_XENO_OPT_DEBUG_TRACE_RELAX
 
#define SYMBOL_HSLOTS    (1 << 8)
 
struct hashed_symbol {
    struct hashed_symbol *next;
    char symbol[0];
};
 
static struct hashed_symbol *symbol_jhash[SYMBOL_HSLOTS];
 
static struct xnheap memory_pool;
 
/*
 * This is a permanent storage for ASCII strings which comes handy to
 * get a unique and constant reference to a symbol while preserving
 * storage space. Hashed symbols have infinite lifetime and are never
 * flushed.
 */
DEFINE_PRIVATE_XNLOCK(symbol_lock);
 
static const char *hash_symbol(const char *symbol)
{
    struct hashed_symbol *p, **h;
    const char *str;
    size_t len;
    u32 hash;
    spl_t s;
 
    len = strlen(symbol);
    hash = jhash(symbol, len, 0);
 
    xnlock_get_irqsave(&symbol_lock, s);
 
    h = &symbol_jhash[hash & (SYMBOL_HSLOTS - 1)];
    p = *h;
    while (p &&
           (*p->symbol != *symbol ||
        strcmp(p->symbol + 1, symbol + 1)))
           p = p->next;
 
    if (p)
        goto done;
 
    p = xnheap_alloc(&memory_pool, sizeof(*p) + len + 1);
    if (p == NULL) {
        str = NULL;
        goto out;
    }
 
    strcpy(p->symbol, symbol);
    p->next = *h;
    *h = p;
done:
    str = p->symbol;
out:
    xnlock_put_irqrestore(&symbol_lock, s);
 
    return str;
}
 
/*
 * We define a static limit (RELAX_SPOTNR) for spot records to limit
 * the memory consumption (we pull record memory from the system
 * heap). The current value should be reasonable enough unless the
 * application is extremely unsane, given that we only keep unique
 * spots. Said differently, if the application has more than
 * RELAX_SPOTNR distinct code locations doing spurious relaxes, then
 * the first issue to address is likely PEBKAC.
 */
#define RELAX_SPOTNR    128
#define RELAX_HSLOTS    (1 << 8)
 
struct relax_record {
    /* Number of hits for this location */
    u32 hits;
    struct relax_spot {
        /* Faulty thread name. */
        char thread[XNOBJECT_NAME_LEN];
        /* call stack the relax originates from. */
        int depth;
        struct backtrace {
            unsigned long pc;
            const char *mapname;
        } backtrace[SIGSHADOW_BACKTRACE_DEPTH];
        /* Program hash value of the caller. */
        u32 proghash;
        /* Pid of the caller. */
        pid_t pid;
        /* Reason for relaxing. */
        int reason;
    } spot;
    struct relax_record *r_next;
    struct relax_record *h_next;
    const char *exe_path;
};
 
static struct relax_record *relax_jhash[RELAX_HSLOTS];
 
static struct relax_record *relax_record_list;
 
static int relax_overall, relax_queued;
 
DEFINE_PRIVATE_XNLOCK(relax_lock);
 
/*
 * The motivation to centralize tracing information about relaxes
 * directly into kernel space is fourfold:
 *
 * - this allows to gather all the trace data into a single location
 * and keep it safe there, with no external log file involved.
 *
 * - enabling the tracing does not impose any requirement on the
 * application (aside of being compiled with debug symbols for best
 * interpreting that information). We only need a kernel config switch
 * for this (i.e. CONFIG_XENO_OPT_DEBUG_TRACE_RELAX).
 *
 * - the data is collected and can be made available exactly the same
 * way regardless of the application emitting the relax requests, or
 * whether it is still alive when the trace data are displayed.
 *
 * - the kernel is able to provide accurate and detailed trace
 * information, such as the relative offset of instructions causing
 * relax requests within dynamic shared objects, without having to
 * guess it roughly from /proc/pid/maps, or relying on ldd's
 * --function-relocs feature, which both require to run on the target
 * system to get the needed information. Instead, we allow a build
 * host to use a cross-compilation toolchain later to extract the
 * source location, from the raw data the kernel has provided on the
 * target system.
 *
 * However, collecting the call frames within the application to
 * determine the full context of a relax spot is not something we can
 * do purely from kernel space, notably because it depends on build
 * options we just don't know about (e.g. frame pointers availability
 * for the app, or other nitty-gritty details depending on the
 * toolchain). To solve this, we ask the application to send us a
 * complete backtrace taken from the context of a specific signal
 * handler, which we know is stacked over the relax spot. That
 * information is then stored by the kernel after some
 * post-processing, along with other data identifying the caller, and
 * made available through the /proc/xenomai/debug/relax vfile.
 *
 * Implementation-wise, xndebug_notify_relax and xndebug_trace_relax
 * routines are paired: first, xndebug_notify_relax sends a SIGSHADOW
 * request to userland when a relax spot is detected from
 * xnthread_relax, which should then trigger a call back to
 * xndebug_trace_relax with the complete backtrace information, as
 * seen from userland (via the internal sc_cobalt_backtrace
 * syscall). All this runs on behalf of the relaxing thread, so we can
 * make a number of convenient assumptions (such as being able to scan
 * the current vma list to get detailed information about the
 * executable mappings that could be involved).
 */
 
void xndebug_notify_relax(struct xnthread *thread, int reason)
{
    xnthread_signal(thread, SIGSHADOW,
              sigshadow_int(SIGSHADOW_ACTION_BACKTRACE, reason));
}
 
void xndebug_trace_relax(int nr, unsigned long *backtrace,
             int reason)
{
    struct relax_record *p, **h;
    struct vm_area_struct *vma;
    struct xnthread *thread;
    struct relax_spot spot;
    struct mm_struct *mm;
    struct file *file;
    unsigned long pc;
    char *mapname;
    int n, depth;
    char *tmp;
    u32 hash;
    spl_t s;
 
    thread = xnthread_current();
    if (thread == NULL)
        return;        /* Can't be, right? What a mess. */
 
    /*
     * We compute PC values relative to the base of the shared
     * executable mappings we find in the backtrace, which makes
     * it possible for the slackspot utility to match the
     * corresponding source code locations from unrelocated file
     * offsets.
     */
 
    tmp = (char *)__get_free_page(GFP_KERNEL);
    if (tmp == NULL)
        /*
         * The situation looks really bad, but we can't do
         * anything about it. Just bail out.
         */
        return;
 
    memset(&spot, 0, sizeof(spot));
    mm = get_task_mm(current);
    mmap_read_lock(mm);
 
    for (n = 0, depth = 0; n < nr; n++) {
        pc = backtrace[n];
 
        vma = find_vma(mm, pc);
        if (vma == NULL)
            continue;
 
        /*
         * Interpreter-generated executable mappings are not
         * file-backed. Use this to determine when $pc should be fixed
         * up by subtracting the mapping base address in the DSO case.
         */
        file = vma->vm_file;
        if (file != NULL)
            pc -= vma->vm_start;
 
        spot.backtrace[depth].pc = pc;
 
        /*
         * Even in case we can't fetch the map name, we still
         * record the PC value, which may still give some hint
         * downstream.
         */
        if (file == NULL)
            goto next_frame;
 
        mapname = d_path(&file->f_path, tmp, PAGE_SIZE);
        if (IS_ERR(mapname))
            goto next_frame;
 
        spot.backtrace[depth].mapname = hash_symbol(mapname);
    next_frame:
        depth++;
    }
 
    mmap_read_unlock(mm);
    mmput(mm);
    free_page((unsigned long)tmp);
 
    /*
     * Most of the time we will be sent duplicates, since the odds
     * of seeing the same thread running the same code doing the
     * same mistake all over again are high. So we probe the hash
     * table for an identical spot first, before going for a
     * complete record allocation from the system heap if no match
     * was found. Otherwise, we just take the fast exit path.
     */
    spot.depth = depth;
    spot.proghash = thread->proghash;
    spot.pid = xnthread_host_pid(thread);
    spot.reason = reason;
    strcpy(spot.thread, thread->name);
    hash = jhash2((u32 *)&spot, sizeof(spot) / sizeof(u32), 0);
 
    xnlock_get_irqsave(&relax_lock, s);
 
    h = &relax_jhash[hash & (RELAX_HSLOTS - 1)];
    p = *h;
    while (p &&
           /* Try quick guesses first, then memcmp */
           (p->spot.depth != spot.depth ||
        p->spot.pid != spot.pid ||
        memcmp(&p->spot, &spot, sizeof(spot))))
           p = p->h_next;
 
    if (p) {
        p->hits++;
        goto out;    /* Spot already recorded. */
    }
 
    if (relax_queued >= RELAX_SPOTNR)
        goto out;    /* No more space -- ignore. */
    /*
     * We can only compete with other shadows which have just
     * switched to secondary mode like us. So holding the
     * relax_lock a bit more without disabling interrupts is not
     * an issue. This allows us to postpone the record memory
     * allocation while probing and updating the hash table in a
     * single move.
     */
    p = xnheap_alloc(&memory_pool, sizeof(*p));
    if (p == NULL)
        goto out;      /* Something is about to go wrong... */
 
    memcpy(&p->spot, &spot, sizeof(p->spot));
    p->exe_path = hash_symbol(thread->exe_path);
    p->hits = 1;
    p->h_next = *h;
    *h = p;
    p->r_next = relax_record_list;
    relax_record_list = p;
    relax_queued++;
out:
    relax_overall++;
 
    xnlock_put_irqrestore(&relax_lock, s);
}
 
static DEFINE_VFILE_HOSTLOCK(relax_mutex);
 
struct relax_vfile_priv {
    int queued;
    int overall;
    int ncurr;
    struct relax_record *head;
    struct relax_record *curr;
};
 
static void *relax_vfile_begin(struct xnvfile_regular_iterator *it)
{
    struct relax_vfile_priv *priv = xnvfile_iterator_priv(it);
    struct relax_record *p;
    spl_t s;
    int n;
 
    /*
     * Snapshot the counters under lock, to make sure they remain
     * mutually consistent despite we dump the record list in a
     * lock-less manner. Additionally, the vfile layer already
     * holds the relax_mutex lock for us, so that we can't race
     * with ->store().
     */
    xnlock_get_irqsave(&relax_lock, s);
 
    if (relax_queued == 0 || it->pos > relax_queued) {
        xnlock_put_irqrestore(&relax_lock, s);
        return NULL;
    }
    priv->overall = relax_overall;
    priv->queued = relax_queued;
    priv->head = relax_record_list;
 
    xnlock_put_irqrestore(&relax_lock, s);
 
    if (it->pos == 0) {
        priv->curr = NULL;
        priv->ncurr = -1;
        return VFILE_SEQ_START;
    }
 
    for (n = 1, p = priv->head; n < it->pos; n++)
        p = p->r_next;
 
    priv->curr = p;
    priv->ncurr = n;
 
    return p;
}
 
static void *relax_vfile_next(struct xnvfile_regular_iterator *it)
{
    struct relax_vfile_priv *priv = xnvfile_iterator_priv(it);
    struct relax_record *p;
    int n;
 
    if (it->pos > priv->queued)
        return NULL;
 
    if (it->pos == priv->ncurr + 1)
        p = priv->curr->r_next;
    else {
        for (n = 1, p = priv->head; n < it->pos; n++)
            p = p->r_next;
    }
 
    priv->curr = p;
    priv->ncurr = it->pos;
 
    return p;
}
 
static const char *reason_str[] = {
    [SIGDEBUG_UNDEFINED] = "undefined",
    [SIGDEBUG_MIGRATE_SIGNAL] = "signal",
    [SIGDEBUG_MIGRATE_SYSCALL] = "syscall",
    [SIGDEBUG_MIGRATE_FAULT] = "fault",
    [SIGDEBUG_MIGRATE_PRIOINV] = "pi-error",
    [SIGDEBUG_NOMLOCK] = "mlock-check",
    [SIGDEBUG_WATCHDOG] = "runaway-break",
    [SIGDEBUG_RESCNT_IMBALANCE] = "resource-count-imbalance",
    [SIGDEBUG_MUTEX_SLEEP] = "sleep-holding-mutex",
    [SIGDEBUG_LOCK_BREAK] = "scheduler-lock-break",
};
 
static int relax_vfile_show(struct xnvfile_regular_iterator *it, void *data)
{
    struct relax_vfile_priv *priv = xnvfile_iterator_priv(it);
    struct relax_record *p = data;
    int n;
 
    /*
     * No need to grab any lock to read a record from a previously
     * validated index: the data must be there and won't be
     * touched anymore.
     */
    if (p == NULL) {
        xnvfile_printf(it, "%d\n", priv->overall);
        return 0;
    }
 
    xnvfile_printf(it, "%s\n", p->exe_path ?: "?");
    xnvfile_printf(it, "%d %d %s %s\n", p->spot.pid, p->hits,
               reason_str[p->spot.reason], p->spot.thread);
 
    for (n = 0; n < p->spot.depth; n++)
        xnvfile_printf(it, "0x%lx %s\n",
                   p->spot.backtrace[n].pc,
                   p->spot.backtrace[n].mapname ?: "?");
 
    xnvfile_printf(it, ".\n");
 
    return 0;
}
 
static ssize_t relax_vfile_store(struct xnvfile_input *input)
{
    struct relax_record *p, *np;
    spl_t s;
 
    /*
     * Flush out all records. Races with ->show() are prevented
     * using the relax_mutex lock. The vfile layer takes care of
     * this internally.
     */
    xnlock_get_irqsave(&relax_lock, s);
    p = relax_record_list;
    relax_record_list = NULL;
    relax_overall = 0;
    relax_queued = 0;
    memset(relax_jhash, 0, sizeof(relax_jhash));
    xnlock_put_irqrestore(&relax_lock, s);
 
    while (p) {
        np = p->r_next;
        xnheap_free(&memory_pool, p);
        p = np;
    }
 
    return input->size;
}
 
static struct xnvfile_regular_ops relax_vfile_ops = {
    .begin = relax_vfile_begin,
    .next = relax_vfile_next,
    .show = relax_vfile_show,
    .store = relax_vfile_store,
};
 
static struct xnvfile_regular relax_vfile = {
    .privsz = sizeof(struct relax_vfile_priv),
    .ops = &relax_vfile_ops,
    .entry = { .lockops = &relax_mutex.ops },
};
 
static inline int init_trace_relax(void)
{
    u32 size = CONFIG_XENO_OPT_DEBUG_TRACE_LOGSZ * 1024;
    void *p;
    int ret;
 
    p = vmalloc(size);
    if (p == NULL)
        return -ENOMEM;
 
    ret = xnheap_init(&memory_pool, p, size);
    if (ret)
        return ret;
 
    xnheap_set_name(&memory_pool, "debug log");
 
    ret = xnvfile_init_regular("relax", &relax_vfile, &cobalt_debug_vfroot);
    if (ret) {
        xnheap_destroy(&memory_pool);
        vfree(p);
    }
 
    return ret;
}
 
static inline void cleanup_trace_relax(void)
{
    void *p;
 
    xnvfile_destroy_regular(&relax_vfile);
    p = xnheap_get_membase(&memory_pool);
    xnheap_destroy(&memory_pool);
    vfree(p);
}
 
#else /* !CONFIG_XENO_OPT_DEBUG_TRACE_RELAX */
 
static inline int init_trace_relax(void)
{
    return 0;
}
 
static inline void cleanup_trace_relax(void)
{
}
 
static inline void init_thread_relax_trace(struct xnthread *thread)
{
}
 
#endif /* !XENO_OPT_DEBUG_TRACE_RELAX */
 
#ifdef CONFIG_XENO_OPT_DEBUG_LOCKING
 
void xnlock_dbg_prepare_acquire(unsigned long long *start)
{
    *start = xnclock_read_raw(&nkclock);
}
EXPORT_SYMBOL_GPL(xnlock_dbg_prepare_acquire);
 
void xnlock_dbg_acquired(struct xnlock *lock, int cpu, unsigned long long *start,
             const char *file, int line, const char *function)
{
    lock->lock_date = *start;
    lock->spin_time = xnclock_read_raw(&nkclock) - *start;
    lock->file = file;
    lock->function = function;
    lock->line = line;
    lock->cpu = cpu;
}
EXPORT_SYMBOL_GPL(xnlock_dbg_acquired);
 
int xnlock_dbg_release(struct xnlock *lock,
               const char *file, int line, const char *function)
{
    unsigned long long lock_time;
    struct xnlockinfo *stats;
    int cpu;
 
    lock_time = xnclock_read_raw(&nkclock) - lock->lock_date;
    cpu = raw_smp_processor_id();
    stats = &per_cpu(xnlock_stats, cpu);
 
    if (lock->file == NULL) {
        lock->file = "??";
        lock->line = 0;
        lock->function = "invalid";
    }
 
    if (unlikely(lock->owner != cpu)) {
        pipeline_prepare_panic();
        printk(XENO_ERR "lock %p already unlocked on CPU #%d\n"
                "          last owner = %s:%u (%s(), CPU #%d)\n",
               lock, cpu, lock->file, lock->line, lock->function,
               lock->cpu);
        dump_stack();
        return 1;
    }
 
    /* File that we released it. */
    lock->cpu = -lock->cpu;
    lock->file = file;
    lock->line = line;
    lock->function = function;
 
    if (lock_time > stats->lock_time) {
        stats->lock_time = lock_time;
        stats->spin_time = lock->spin_time;
        stats->file = lock->file;
        stats->function = lock->function;
        stats->line = lock->line;
    }
 
    return 0;
}
EXPORT_SYMBOL_GPL(xnlock_dbg_release);
 
#endif /* CONFIG_XENO_OPT_DEBUG_LOCKING */
 
void xndebug_shadow_init(struct xnthread *thread)
{
    struct cobalt_ppd *sys_ppd;
    size_t len;
 
    sys_ppd = cobalt_ppd_get(0);
    /*
     * The caller is current, so we know for sure that sys_ppd
     * will still be valid after we dropped the lock.
     *
     * NOTE: Kernel shadows all share the system global ppd
     * descriptor with no refcounting.
     */
    thread->exe_path = sys_ppd->exe_path ?: "(unknown)";
    /*
     * The program hash value is a unique token debug features may
     * use to identify all threads which belong to a given
     * executable file. Using this value for quick probes is often
     * handier and more efficient than testing the whole exe_path.
     */
    len = strlen(thread->exe_path);
    thread->proghash = jhash(thread->exe_path, len, 0);
}
 
int xndebug_init(void)
{
    int ret;
 
    ret = init_trace_relax();
    if (ret)
        return ret;
 
    return 0;
}
 
void xndebug_cleanup(void)
{
    cleanup_trace_relax();
}
 
/** @} */