/*
|
*
|
* (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
|
*
|
* This program is free software and is provided to you under the terms of the
|
* GNU General Public License version 2 as published by the Free Software
|
* Foundation, and any use by you of this program is subject to the terms
|
* of such GNU licence.
|
*
|
* A copy of the licence is included with the program, and can also be obtained
|
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
|
* Boston, MA 02110-1301, USA.
|
*
|
*/
|
|
|
|
#include <mali_kbase.h>
|
#include <linux/spinlock.h>
|
#include <mali_kbase_hwaccess_jm.h>
|
|
#ifdef CONFIG_DEBUG_FS
|
|
static bool kbase_is_job_fault_event_pending(struct kbase_device *kbdev)
|
{
|
struct list_head *event_list = &kbdev->job_fault_event_list;
|
unsigned long flags;
|
bool ret;
|
|
spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
|
ret = !list_empty(event_list);
|
spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
|
|
return ret;
|
}
|
|
static bool kbase_ctx_has_no_event_pending(struct kbase_context *kctx)
|
{
|
struct kbase_device *kbdev = kctx->kbdev;
|
struct list_head *event_list = &kctx->kbdev->job_fault_event_list;
|
struct base_job_fault_event *event;
|
unsigned long flags;
|
|
spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
|
if (list_empty(event_list)) {
|
spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
|
return true;
|
}
|
list_for_each_entry(event, event_list, head) {
|
if (event->katom->kctx == kctx) {
|
spin_unlock_irqrestore(&kbdev->job_fault_event_lock,
|
flags);
|
return false;
|
}
|
}
|
spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
|
return true;
|
}
|
|
/* wait until the fault happen and copy the event */
|
static int kbase_job_fault_event_wait(struct kbase_device *kbdev,
|
struct base_job_fault_event *event)
|
{
|
struct list_head *event_list = &kbdev->job_fault_event_list;
|
struct base_job_fault_event *event_in;
|
unsigned long flags;
|
|
spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
|
if (list_empty(event_list)) {
|
spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
|
if (wait_event_interruptible(kbdev->job_fault_wq,
|
kbase_is_job_fault_event_pending(kbdev)))
|
return -ERESTARTSYS;
|
spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
|
}
|
|
event_in = list_entry(event_list->next,
|
struct base_job_fault_event, head);
|
event->event_code = event_in->event_code;
|
event->katom = event_in->katom;
|
|
spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
|
|
return 0;
|
|
}
|
|
/* remove the event from the queue */
|
static struct base_job_fault_event *kbase_job_fault_event_dequeue(
|
struct kbase_device *kbdev, struct list_head *event_list)
|
{
|
struct base_job_fault_event *event;
|
|
event = list_entry(event_list->next,
|
struct base_job_fault_event, head);
|
list_del(event_list->next);
|
|
return event;
|
|
}
|
|
/* Remove all the following atoms after the failed atom in the same context
|
* Call the postponed bottom half of job done.
|
* Then, this context could be rescheduled.
|
*/
|
static void kbase_job_fault_resume_event_cleanup(struct kbase_context *kctx)
|
{
|
struct list_head *event_list = &kctx->job_fault_resume_event_list;
|
|
while (!list_empty(event_list)) {
|
struct base_job_fault_event *event;
|
|
event = kbase_job_fault_event_dequeue(kctx->kbdev,
|
&kctx->job_fault_resume_event_list);
|
kbase_jd_done_worker(&event->katom->work);
|
}
|
|
}
|
|
/* Remove all the failed atoms that belong to different contexts
|
* Resume all the contexts that were suspend due to failed job
|
*/
|
static void kbase_job_fault_event_cleanup(struct kbase_device *kbdev)
|
{
|
struct list_head *event_list = &kbdev->job_fault_event_list;
|
unsigned long flags;
|
|
spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
|
while (!list_empty(event_list)) {
|
kbase_job_fault_event_dequeue(kbdev, event_list);
|
spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
|
wake_up(&kbdev->job_fault_resume_wq);
|
spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
|
}
|
spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
|
}
|
|
static void kbase_job_fault_resume_worker(struct work_struct *data)
|
{
|
struct base_job_fault_event *event = container_of(data,
|
struct base_job_fault_event, job_fault_work);
|
struct kbase_context *kctx;
|
struct kbase_jd_atom *katom;
|
|
katom = event->katom;
|
kctx = katom->kctx;
|
|
dev_info(kctx->kbdev->dev, "Job dumping wait\n");
|
|
/* When it was waked up, it need to check if queue is empty or the
|
* failed atom belongs to different context. If yes, wake up. Both
|
* of them mean the failed job has been dumped. Please note, it
|
* should never happen that the job_fault_event_list has the two
|
* atoms belong to the same context.
|
*/
|
wait_event(kctx->kbdev->job_fault_resume_wq,
|
kbase_ctx_has_no_event_pending(kctx));
|
|
atomic_set(&kctx->job_fault_count, 0);
|
kbase_jd_done_worker(&katom->work);
|
|
/* In case the following atoms were scheduled during failed job dump
|
* the job_done_worker was held. We need to rerun it after the dump
|
* was finished
|
*/
|
kbase_job_fault_resume_event_cleanup(kctx);
|
|
dev_info(kctx->kbdev->dev, "Job dumping finish, resume scheduler\n");
|
}
|
|
static struct base_job_fault_event *kbase_job_fault_event_queue(
|
struct list_head *event_list,
|
struct kbase_jd_atom *atom,
|
u32 completion_code)
|
{
|
struct base_job_fault_event *event;
|
|
event = &atom->fault_event;
|
|
event->katom = atom;
|
event->event_code = completion_code;
|
|
list_add_tail(&event->head, event_list);
|
|
return event;
|
|
}
|
|
static void kbase_job_fault_event_post(struct kbase_device *kbdev,
|
struct kbase_jd_atom *katom, u32 completion_code)
|
{
|
struct base_job_fault_event *event;
|
unsigned long flags;
|
|
spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
|
event = kbase_job_fault_event_queue(&kbdev->job_fault_event_list,
|
katom, completion_code);
|
spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
|
|
wake_up_interruptible(&kbdev->job_fault_wq);
|
|
INIT_WORK(&event->job_fault_work, kbase_job_fault_resume_worker);
|
queue_work(kbdev->job_fault_resume_workq, &event->job_fault_work);
|
|
dev_info(katom->kctx->kbdev->dev, "Job fault happen, start dump: %d_%d",
|
katom->kctx->tgid, katom->kctx->id);
|
|
}
|
|
/*
|
* This function will process the job fault
|
* Get the register copy
|
* Send the failed job dump event
|
* Create a Wait queue to wait until the job dump finish
|
*/
|
|
bool kbase_debug_job_fault_process(struct kbase_jd_atom *katom,
|
u32 completion_code)
|
{
|
struct kbase_context *kctx = katom->kctx;
|
|
/* Check if dumping is in the process
|
* only one atom of each context can be dumped at the same time
|
* If the atom belongs to different context, it can be dumped
|
*/
|
if (atomic_read(&kctx->job_fault_count) > 0) {
|
kbase_job_fault_event_queue(
|
&kctx->job_fault_resume_event_list,
|
katom, completion_code);
|
dev_info(kctx->kbdev->dev, "queue:%d\n",
|
kbase_jd_atom_id(kctx, katom));
|
return true;
|
}
|
|
if (kctx->kbdev->job_fault_debug == true) {
|
|
if (completion_code != BASE_JD_EVENT_DONE) {
|
|
if (kbase_job_fault_get_reg_snapshot(kctx) == false) {
|
dev_warn(kctx->kbdev->dev, "get reg dump failed\n");
|
return false;
|
}
|
|
kbase_job_fault_event_post(kctx->kbdev, katom,
|
completion_code);
|
atomic_inc(&kctx->job_fault_count);
|
dev_info(kctx->kbdev->dev, "post:%d\n",
|
kbase_jd_atom_id(kctx, katom));
|
return true;
|
|
}
|
}
|
return false;
|
|
}
|
|
static int debug_job_fault_show(struct seq_file *m, void *v)
|
{
|
struct kbase_device *kbdev = m->private;
|
struct base_job_fault_event *event = (struct base_job_fault_event *)v;
|
struct kbase_context *kctx = event->katom->kctx;
|
int i;
|
|
dev_info(kbdev->dev, "debug job fault seq show:%d_%d, %d",
|
kctx->tgid, kctx->id, event->reg_offset);
|
|
if (kctx->reg_dump == NULL) {
|
dev_warn(kbdev->dev, "reg dump is NULL");
|
return -1;
|
}
|
|
if (kctx->reg_dump[event->reg_offset] ==
|
REGISTER_DUMP_TERMINATION_FLAG) {
|
/* Return the error here to stop the read. And the
|
* following next() will not be called. The stop can
|
* get the real event resource and release it
|
*/
|
return -1;
|
}
|
|
if (event->reg_offset == 0)
|
seq_printf(m, "%d_%d\n", kctx->tgid, kctx->id);
|
|
for (i = 0; i < 50; i++) {
|
if (kctx->reg_dump[event->reg_offset] ==
|
REGISTER_DUMP_TERMINATION_FLAG) {
|
break;
|
}
|
seq_printf(m, "%08x: %08x\n",
|
kctx->reg_dump[event->reg_offset],
|
kctx->reg_dump[1+event->reg_offset]);
|
event->reg_offset += 2;
|
|
}
|
|
|
return 0;
|
}
|
static void *debug_job_fault_next(struct seq_file *m, void *v, loff_t *pos)
|
{
|
struct kbase_device *kbdev = m->private;
|
struct base_job_fault_event *event = (struct base_job_fault_event *)v;
|
|
dev_info(kbdev->dev, "debug job fault seq next:%d, %d",
|
event->reg_offset, (int)*pos);
|
|
return event;
|
}
|
|
static void *debug_job_fault_start(struct seq_file *m, loff_t *pos)
|
{
|
struct kbase_device *kbdev = m->private;
|
struct base_job_fault_event *event;
|
|
dev_info(kbdev->dev, "fault job seq start:%d", (int)*pos);
|
|
/* The condition is trick here. It needs make sure the
|
* fault hasn't happened and the dumping hasn't been started,
|
* or the dumping has finished
|
*/
|
if (*pos == 0) {
|
event = kmalloc(sizeof(*event), GFP_KERNEL);
|
if (!event)
|
return NULL;
|
event->reg_offset = 0;
|
if (kbase_job_fault_event_wait(kbdev, event)) {
|
kfree(event);
|
return NULL;
|
}
|
|
/* The cache flush workaround is called in bottom half of
|
* job done but we delayed it. Now we should clean cache
|
* earlier. Then the GPU memory dump should be correct.
|
*/
|
kbase_backend_cacheclean(kbdev, event->katom);
|
} else
|
return NULL;
|
|
return event;
|
}
|
|
static void debug_job_fault_stop(struct seq_file *m, void *v)
|
{
|
struct kbase_device *kbdev = m->private;
|
|
/* here we wake up the kbase_jd_done_worker after stop, it needs
|
* get the memory dump before the register dump in debug daemon,
|
* otherwise, the memory dump may be incorrect.
|
*/
|
|
if (v != NULL) {
|
kfree(v);
|
dev_info(kbdev->dev, "debug job fault seq stop stage 1");
|
|
} else {
|
unsigned long flags;
|
|
spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
|
if (!list_empty(&kbdev->job_fault_event_list)) {
|
kbase_job_fault_event_dequeue(kbdev,
|
&kbdev->job_fault_event_list);
|
wake_up(&kbdev->job_fault_resume_wq);
|
}
|
spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
|
dev_info(kbdev->dev, "debug job fault seq stop stage 2");
|
}
|
|
}
|
|
static const struct seq_operations ops = {
|
.start = debug_job_fault_start,
|
.next = debug_job_fault_next,
|
.stop = debug_job_fault_stop,
|
.show = debug_job_fault_show,
|
};
|
|
static int debug_job_fault_open(struct inode *in, struct file *file)
|
{
|
struct kbase_device *kbdev = in->i_private;
|
|
seq_open(file, &ops);
|
|
((struct seq_file *)file->private_data)->private = kbdev;
|
dev_info(kbdev->dev, "debug job fault seq open");
|
|
kbdev->job_fault_debug = true;
|
|
return 0;
|
|
}
|
|
static int debug_job_fault_release(struct inode *in, struct file *file)
|
{
|
struct kbase_device *kbdev = in->i_private;
|
|
seq_release(in, file);
|
|
kbdev->job_fault_debug = false;
|
|
/* Clean the unprocessed job fault. After that, all the suspended
|
* contexts could be rescheduled.
|
*/
|
kbase_job_fault_event_cleanup(kbdev);
|
|
dev_info(kbdev->dev, "debug job fault seq close");
|
|
return 0;
|
}
|
|
static const struct file_operations kbasep_debug_job_fault_fops = {
|
.open = debug_job_fault_open,
|
.read = seq_read,
|
.llseek = seq_lseek,
|
.release = debug_job_fault_release,
|
};
|
|
/*
|
* Initialize debugfs entry for job fault dump
|
*/
|
void kbase_debug_job_fault_debugfs_init(struct kbase_device *kbdev)
|
{
|
debugfs_create_file("job_fault", S_IRUGO,
|
kbdev->mali_debugfs_directory, kbdev,
|
&kbasep_debug_job_fault_fops);
|
}
|
|
|
int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev)
|
{
|
|
INIT_LIST_HEAD(&kbdev->job_fault_event_list);
|
|
init_waitqueue_head(&(kbdev->job_fault_wq));
|
init_waitqueue_head(&(kbdev->job_fault_resume_wq));
|
spin_lock_init(&kbdev->job_fault_event_lock);
|
|
kbdev->job_fault_resume_workq = alloc_workqueue(
|
"kbase_job_fault_resume_work_queue", WQ_MEM_RECLAIM, 1);
|
if (!kbdev->job_fault_resume_workq)
|
return -ENOMEM;
|
|
kbdev->job_fault_debug = false;
|
|
return 0;
|
}
|
|
/*
|
* Release the relevant resource per device
|
*/
|
void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev)
|
{
|
destroy_workqueue(kbdev->job_fault_resume_workq);
|
}
|
|
|
/*
|
* Initialize the relevant data structure per context
|
*/
|
void kbase_debug_job_fault_context_init(struct kbase_context *kctx)
|
{
|
|
/* We need allocate double size register range
|
* Because this memory will keep the register address and value
|
*/
|
kctx->reg_dump = vmalloc(0x4000 * 2);
|
if (kctx->reg_dump == NULL)
|
return;
|
|
if (kbase_debug_job_fault_reg_snapshot_init(kctx, 0x4000) == false) {
|
vfree(kctx->reg_dump);
|
kctx->reg_dump = NULL;
|
}
|
INIT_LIST_HEAD(&kctx->job_fault_resume_event_list);
|
atomic_set(&kctx->job_fault_count, 0);
|
|
}
|
|
/*
|
* release the relevant resource per context
|
*/
|
void kbase_debug_job_fault_context_term(struct kbase_context *kctx)
|
{
|
vfree(kctx->reg_dump);
|
}
|
|
#else /* CONFIG_DEBUG_FS */
|
|
int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev)
|
{
|
kbdev->job_fault_debug = false;
|
|
return 0;
|
}
|
|
void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev)
|
{
|
}
|
|
#endif /* CONFIG_DEBUG_FS */
|