From 9d77db3c730780c8ef5ccd4b66403ff5675cfe4e Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Mon, 13 May 2024 10:30:14 +0000 Subject: [PATCH] modify sin led gpio --- kernel/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 1104 +++++++++++++++++++++++++++++++++++---------------------- 1 files changed, 675 insertions(+), 429 deletions(-) diff --git a/kernel/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/kernel/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index bff39f5..195b7e0 100644 --- a/kernel/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/kernel/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -33,17 +33,14 @@ #include "kfd_mqd_manager.h" #include "cik_regs.h" #include "kfd_kernel_queue.h" +#include "amdgpu_amdkfd.h" /* Size of the per-pipe EOP queue */ #define CIK_HPD_EOP_BYTES_LOG2 11 #define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2) static int set_pasid_vmid_mapping(struct device_queue_manager *dqm, - unsigned int pasid, unsigned int vmid); - -static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, - struct queue *q, - struct qcm_process_device *qpd); + u32 pasid, unsigned int vmid); static int execute_queues_cpsch(struct device_queue_manager *dqm, enum kfd_unmap_queues_filter filter, @@ -54,19 +51,20 @@ static int map_queues_cpsch(struct device_queue_manager *dqm); -static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, - struct queue *q, - struct qcm_process_device *qpd); - static void deallocate_sdma_queue(struct device_queue_manager *dqm, - unsigned int sdma_queue_id); + struct queue *q); +static inline void deallocate_hqd(struct device_queue_manager *dqm, + struct queue *q); +static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q); +static int allocate_sdma_queue(struct device_queue_manager *dqm, + struct queue *q); static void kfd_process_hw_exception(struct work_struct *work); static inline enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type) { - if (type == KFD_QUEUE_TYPE_SDMA) + if (type == KFD_QUEUE_TYPE_SDMA || type == KFD_QUEUE_TYPE_SDMA_XGMI) return KFD_MQD_TYPE_SDMA; return KFD_MQD_TYPE_CP; } @@ -80,14 +78,14 @@ /* queue is available for KFD usage if bit is 1 */ for (i = 0; i < dqm->dev->shared_resources.num_queue_per_pipe; ++i) if (test_bit(pipe_offset + i, - dqm->dev->shared_resources.queue_bitmap)) + dqm->dev->shared_resources.cp_queue_bitmap)) return true; return false; } -unsigned int get_queues_num(struct device_queue_manager *dqm) +unsigned int get_cp_queues_num(struct device_queue_manager *dqm) { - return bitmap_weight(dqm->dev->shared_resources.queue_bitmap, + return bitmap_weight(dqm->dev->shared_resources.cp_queue_bitmap, KGD_MAX_QUEUES); } @@ -106,10 +104,26 @@ return dqm->dev->device_info->num_sdma_engines; } +static unsigned int get_num_xgmi_sdma_engines(struct device_queue_manager *dqm) +{ + return dqm->dev->device_info->num_xgmi_sdma_engines; +} + +static unsigned int get_num_all_sdma_engines(struct device_queue_manager *dqm) +{ + return get_num_sdma_engines(dqm) + get_num_xgmi_sdma_engines(dqm); +} + unsigned int get_num_sdma_queues(struct device_queue_manager *dqm) { return dqm->dev->device_info->num_sdma_engines - * KFD_SDMA_QUEUES_PER_ENGINE; + * dqm->dev->device_info->num_sdma_queues_per_engine; +} + +unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm) +{ + return dqm->dev->device_info->num_xgmi_sdma_engines + * dqm->dev->device_info->num_sdma_queues_per_engine; } void program_sh_mem_settings(struct device_queue_manager *dqm, @@ -123,6 +137,36 @@ qpd->sh_mem_bases); } +static void increment_queue_count(struct device_queue_manager *dqm, + struct qcm_process_device *qpd, + struct queue *q) +{ + dqm->active_queue_count++; + if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || + q->properties.type == KFD_QUEUE_TYPE_DIQ) + dqm->active_cp_queue_count++; + + if (q->properties.is_gws) { + dqm->gws_queue_count++; + qpd->mapped_gws_queue = true; + } +} + +static void decrement_queue_count(struct device_queue_manager *dqm, + struct qcm_process_device *qpd, + struct queue *q) +{ + dqm->active_queue_count--; + if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || + q->properties.type == KFD_QUEUE_TYPE_DIQ) + dqm->active_cp_queue_count--; + + if (q->properties.is_gws) { + dqm->gws_queue_count--; + qpd->mapped_gws_queue = false; + } +} + static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q) { struct kfd_dev *dev = qpd->dqm->dev; @@ -132,13 +176,20 @@ * preserve the user mode ABI. */ q->doorbell_id = q->properties.queue_id; - } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { - /* For SDMA queues on SOC15, use static doorbell - * assignments based on the engine and queue. + } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA || + q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { + /* For SDMA queues on SOC15 with 8-byte doorbell, use static + * doorbell assignments based on the engine and queue id. + * The doobell index distance between RLC (2*i) and (2*i+1) + * for a SDMA engine is 512. */ - q->doorbell_id = dev->shared_resources.sdma_doorbell - [q->properties.sdma_engine_id] - [q->properties.sdma_queue_id]; + uint32_t *idx_offset = + dev->shared_resources.sdma_doorbell_idx; + + q->doorbell_id = idx_offset[q->properties.sdma_engine_id] + + (q->properties.sdma_queue_id & 1) + * KFD_QUEUE_DOORBELL_MIRROR_OFFSET + + (q->properties.sdma_queue_id >> 1); } else { /* For CP queues on SOC15 reserve a free doorbell ID */ unsigned int found; @@ -154,9 +205,8 @@ } q->properties.doorbell_off = - kfd_doorbell_id_to_offset(dev, q->process, + kfd_get_doorbell_dw_offset_in_bar(dev, qpd_to_pdd(qpd), q->doorbell_id); - return 0; } @@ -167,7 +217,8 @@ struct kfd_dev *dev = qpd->dqm->dev; if (!KFD_IS_SOC15(dev->device_info->asic_family) || - q->properties.type == KFD_QUEUE_TYPE_SDMA) + q->properties.type == KFD_QUEUE_TYPE_SDMA || + q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) return; old = test_and_clear_bit(q->doorbell_id, qpd->doorbell_bitmap); @@ -178,20 +229,30 @@ struct qcm_process_device *qpd, struct queue *q) { - int bit, allocated_vmid; + int allocated_vmid = -1, i; - if (dqm->vmid_bitmap == 0) - return -ENOMEM; + for (i = dqm->dev->vm_info.first_vmid_kfd; + i <= dqm->dev->vm_info.last_vmid_kfd; i++) { + if (!dqm->vmid_pasid[i]) { + allocated_vmid = i; + break; + } + } - bit = ffs(dqm->vmid_bitmap) - 1; - dqm->vmid_bitmap &= ~(1 << bit); + if (allocated_vmid < 0) { + pr_err("no more vmid to allocate\n"); + return -ENOSPC; + } - allocated_vmid = bit + dqm->dev->vm_info.first_vmid_kfd; - pr_debug("vmid allocation %d\n", allocated_vmid); + pr_debug("vmid allocated: %d\n", allocated_vmid); + + dqm->vmid_pasid[allocated_vmid] = q->process->pasid; + + set_pasid_vmid_mapping(dqm, q->process->pasid, allocated_vmid); + qpd->vmid = allocated_vmid; q->properties.vmid = allocated_vmid; - set_pasid_vmid_mapping(dqm, q->process->pasid, q->properties.vmid); program_sh_mem_settings(dqm, qpd); /* qpd->page_table_base is set earlier when register_process() @@ -202,6 +263,10 @@ qpd->page_table_base); /* invalidate the VM context after pasid and vmid mapping is set up */ kfd_flush_tlb(qpd_to_pdd(qpd)); + + if (dqm->dev->kfd2kgd->set_scratch_backing_va) + dqm->dev->kfd2kgd->set_scratch_backing_va(dqm->dev->kgd, + qpd->sh_hidden_private_base, qpd->vmid); return 0; } @@ -219,7 +284,7 @@ if (ret) return ret; - return kdev->kfd2kgd->submit_ib(kdev->kgd, KGD_ENGINE_MEC1, qpd->vmid, + return amdgpu_amdkfd_submit_ib(kdev->kgd, KGD_ENGINE_MEC1, qpd->vmid, qpd->ib_base, (uint32_t *)qpd->ib_kaddr, pmf->release_mem_size / sizeof(uint32_t)); } @@ -228,8 +293,6 @@ struct qcm_process_device *qpd, struct queue *q) { - int bit = qpd->vmid - dqm->dev->vm_info.first_vmid_kfd; - /* On GFX v7, CP doesn't flush TC at dequeue */ if (q->device->device_info->asic_family == CHIP_HAWAII) if (flush_texture_cache_nocpsch(q->device, qpd)) @@ -239,8 +302,8 @@ /* Release the vmid mapping */ set_pasid_vmid_mapping(dqm, 0, qpd->vmid); + dqm->vmid_pasid[qpd->vmid] = 0; - dqm->vmid_bitmap |= (1 << bit); qpd->vmid = 0; q->properties.vmid = 0; } @@ -249,9 +312,8 @@ struct queue *q, struct qcm_process_device *qpd) { + struct mqd_manager *mqd_mgr; int retval; - - print_queue(q); dqm_lock(dqm); @@ -269,37 +331,67 @@ } q->properties.vmid = qpd->vmid; /* - * Eviction state logic: we only mark active queues as evicted - * to avoid the overhead of restoring inactive queues later + * Eviction state logic: mark all queues as evicted, even ones + * not currently active. Restoring inactive queues later only + * updates the is_evicted flag but is a no-op otherwise. */ - if (qpd->evicted) - q->properties.is_evicted = (q->properties.queue_size > 0 && - q->properties.queue_percent > 0 && - q->properties.queue_address != 0); + q->properties.is_evicted = !!qpd->evicted; q->properties.tba_addr = qpd->tba_addr; q->properties.tma_addr = qpd->tma_addr; - if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) - retval = create_compute_queue_nocpsch(dqm, q, qpd); - else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) - retval = create_sdma_queue_nocpsch(dqm, q, qpd); - else - retval = -EINVAL; - - if (retval) { - if (list_empty(&qpd->queues_list)) - deallocate_vmid(dqm, qpd, q); - goto out_unlock; + mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( + q->properties.type)]; + if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) { + retval = allocate_hqd(dqm, q); + if (retval) + goto deallocate_vmid; + pr_debug("Loading mqd to hqd on pipe %d, queue %d\n", + q->pipe, q->queue); + } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA || + q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { + retval = allocate_sdma_queue(dqm, q); + if (retval) + goto deallocate_vmid; + dqm->asic_ops.init_sdma_vm(dqm, q, qpd); } + retval = allocate_doorbell(qpd, q); + if (retval) + goto out_deallocate_hqd; + + /* Temporarily release dqm lock to avoid a circular lock dependency */ + dqm_unlock(dqm); + q->mqd_mem_obj = mqd_mgr->allocate_mqd(mqd_mgr->dev, &q->properties); + dqm_lock(dqm); + + if (!q->mqd_mem_obj) { + retval = -ENOMEM; + goto out_deallocate_doorbell; + } + mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, + &q->gart_mqd_addr, &q->properties); + if (q->properties.is_active) { + if (!dqm->sched_running) { + WARN_ONCE(1, "Load non-HWS mqd while stopped\n"); + goto add_queue_to_list; + } + + if (WARN(q->process->mm != current->mm, + "should only run in user thread")) + retval = -EFAULT; + else + retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, + q->queue, &q->properties, current->mm); + if (retval) + goto out_free_mqd; + } + +add_queue_to_list: list_add(&q->list, &qpd->queues_list); qpd->queue_count++; if (q->properties.is_active) - dqm->queue_count++; - - if (q->properties.type == KFD_QUEUE_TYPE_SDMA) - dqm->sdma_queue_count++; + increment_queue_count(dqm, qpd, q); /* * Unconditionally increment this counter, regardless of the queue's @@ -308,7 +400,21 @@ dqm->total_queue_count++; pr_debug("Total of %d queues are accountable so far\n", dqm->total_queue_count); + goto out_unlock; +out_free_mqd: + mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); +out_deallocate_doorbell: + deallocate_doorbell(qpd, q); +out_deallocate_hqd: + if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) + deallocate_hqd(dqm, q); + else if (q->properties.type == KFD_QUEUE_TYPE_SDMA || + q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) + deallocate_sdma_queue(dqm, q); +deallocate_vmid: + if (list_empty(&qpd->queues_list)) + deallocate_vmid(dqm, qpd, q); out_unlock: dqm_unlock(dqm); return retval; @@ -354,60 +460,6 @@ dqm->allocated_queues[q->pipe] |= (1 << q->queue); } -static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, - struct queue *q, - struct qcm_process_device *qpd) -{ - struct mqd_manager *mqd_mgr; - int retval; - - mqd_mgr = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE); - if (!mqd_mgr) - return -ENOMEM; - - retval = allocate_hqd(dqm, q); - if (retval) - return retval; - - retval = allocate_doorbell(qpd, q); - if (retval) - goto out_deallocate_hqd; - - retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj, - &q->gart_mqd_addr, &q->properties); - if (retval) - goto out_deallocate_doorbell; - - pr_debug("Loading mqd to hqd on pipe %d, queue %d\n", - q->pipe, q->queue); - - dqm->dev->kfd2kgd->set_scratch_backing_va( - dqm->dev->kgd, qpd->sh_hidden_private_base, qpd->vmid); - - if (!q->properties.is_active) - return 0; - - if (WARN(q->process->mm != current->mm, - "should only run in user thread")) - retval = -EFAULT; - else - retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, q->queue, - &q->properties, current->mm); - if (retval) - goto out_uninit_mqd; - - return 0; - -out_uninit_mqd: - mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); -out_deallocate_doorbell: - deallocate_doorbell(qpd, q); -out_deallocate_hqd: - deallocate_hqd(dqm, q); - - return retval; -} - /* Access to DQM has to be locked before calling destroy_queue_nocpsch_locked * to avoid asynchronized access */ @@ -418,17 +470,16 @@ int retval; struct mqd_manager *mqd_mgr; - mqd_mgr = dqm->ops.get_mqd_manager(dqm, - get_mqd_type_from_queue_type(q->properties.type)); - if (!mqd_mgr) - return -ENOMEM; + mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( + q->properties.type)]; - if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) { + if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) deallocate_hqd(dqm, q); - } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { - dqm->sdma_queue_count--; - deallocate_sdma_queue(dqm, q->sdma_id); - } else { + else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) + deallocate_sdma_queue(dqm, q); + else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) + deallocate_sdma_queue(dqm, q); + else { pr_debug("q->properties.type %d is invalid\n", q->properties.type); return -EINVAL; @@ -437,14 +488,17 @@ deallocate_doorbell(qpd, q); + if (!dqm->sched_running) { + WARN_ONCE(1, "Destroy non-HWS queue while stopped\n"); + return 0; + } + retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, KFD_PREEMPT_TYPE_WAVEFRONT_RESET, KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); if (retval == -ETIME) qpd->reset_wavefronts = true; - - mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); list_del(&q->list); if (list_empty(&qpd->queues_list)) { @@ -463,7 +517,7 @@ } qpd->queue_count--; if (q->properties.is_active) - dqm->queue_count--; + decrement_queue_count(dqm, qpd, q); return retval; } @@ -473,17 +527,35 @@ struct queue *q) { int retval; + uint64_t sdma_val = 0; + struct kfd_process_device *pdd = qpd_to_pdd(qpd); + struct mqd_manager *mqd_mgr = + dqm->mqd_mgrs[get_mqd_type_from_queue_type(q->properties.type)]; + + /* Get the SDMA queue stats */ + if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || + (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { + retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr, + &sdma_val); + if (retval) + pr_err("Failed to read SDMA queue counter for queue: %d\n", + q->properties.queue_id); + } dqm_lock(dqm); retval = destroy_queue_nocpsch_locked(dqm, qpd, q); + if (!retval) + pdd->sdma_past_activity_counter += sdma_val; dqm_unlock(dqm); + + mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); return retval; } static int update_queue(struct device_queue_manager *dqm, struct queue *q) { - int retval; + int retval = 0; struct mqd_manager *mqd_mgr; struct kfd_process_device *pdd; bool prev_active = false; @@ -494,20 +566,8 @@ retval = -ENODEV; goto out_unlock; } - mqd_mgr = dqm->ops.get_mqd_manager(dqm, - get_mqd_type_from_queue_type(q->properties.type)); - if (!mqd_mgr) { - retval = -ENOMEM; - goto out_unlock; - } - /* - * Eviction state logic: we only mark active queues as evicted - * to avoid the overhead of restoring inactive queues later - */ - if (pdd->qpd.evicted) - q->properties.is_evicted = (q->properties.queue_size > 0 && - q->properties.queue_percent > 0 && - q->properties.queue_address != 0); + mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( + q->properties.type)]; /* Save previous activity state for counters */ prev_active = q->properties.is_active; @@ -522,7 +582,14 @@ } } else if (prev_active && (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || - q->properties.type == KFD_QUEUE_TYPE_SDMA)) { + q->properties.type == KFD_QUEUE_TYPE_SDMA || + q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { + + if (!dqm->sched_running) { + WARN_ONCE(1, "Update non-HWS queue while stopped\n"); + goto out_unlock; + } + retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN, KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); @@ -532,24 +599,38 @@ } } - retval = mqd_mgr->update_mqd(mqd_mgr, q->mqd, &q->properties); + mqd_mgr->update_mqd(mqd_mgr, q->mqd, &q->properties); /* * check active state vs. the previous state and modify * counter accordingly. map_queues_cpsch uses the - * dqm->queue_count to determine whether a new runlist must be + * dqm->active_queue_count to determine whether a new runlist must be * uploaded. */ - if (q->properties.is_active && !prev_active) - dqm->queue_count++; - else if (!q->properties.is_active && prev_active) - dqm->queue_count--; + if (q->properties.is_active && !prev_active) { + increment_queue_count(dqm, &pdd->qpd, q); + } else if (!q->properties.is_active && prev_active) { + decrement_queue_count(dqm, &pdd->qpd, q); + } else if (q->gws && !q->properties.is_gws) { + if (q->properties.is_active) { + dqm->gws_queue_count++; + pdd->qpd.mapped_gws_queue = true; + } + q->properties.is_gws = true; + } else if (!q->gws && q->properties.is_gws) { + if (q->properties.is_active) { + dqm->gws_queue_count--; + pdd->qpd.mapped_gws_queue = false; + } + q->properties.is_gws = false; + } if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) retval = map_queues_cpsch(dqm); else if (q->properties.is_active && (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || - q->properties.type == KFD_QUEUE_TYPE_SDMA)) { + q->properties.type == KFD_QUEUE_TYPE_SDMA || + q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { if (WARN(q->process->mm != current->mm, "should only run in user thread")) retval = -EFAULT; @@ -564,67 +645,52 @@ return retval; } -static struct mqd_manager *get_mqd_manager( - struct device_queue_manager *dqm, enum KFD_MQD_TYPE type) -{ - struct mqd_manager *mqd_mgr; - - if (WARN_ON(type >= KFD_MQD_TYPE_MAX)) - return NULL; - - pr_debug("mqd type %d\n", type); - - mqd_mgr = dqm->mqd_mgrs[type]; - if (!mqd_mgr) { - mqd_mgr = mqd_manager_init(type, dqm->dev); - if (!mqd_mgr) - pr_err("mqd manager is NULL"); - dqm->mqd_mgrs[type] = mqd_mgr; - } - - return mqd_mgr; -} - static int evict_process_queues_nocpsch(struct device_queue_manager *dqm, struct qcm_process_device *qpd) { struct queue *q; struct mqd_manager *mqd_mgr; struct kfd_process_device *pdd; - int retval = 0; + int retval, ret = 0; dqm_lock(dqm); if (qpd->evicted++ > 0) /* already evicted, do nothing */ goto out; pdd = qpd_to_pdd(qpd); - pr_info_ratelimited("Evicting PASID %u queues\n", + pr_debug_ratelimited("Evicting PASID 0x%x queues\n", pdd->process->pasid); - /* unactivate all active queues on the qpd */ + pdd->last_evict_timestamp = get_jiffies_64(); + /* Mark all queues as evicted. Deactivate all active queues on + * the qpd. + */ list_for_each_entry(q, &qpd->queues_list, list) { + q->properties.is_evicted = true; if (!q->properties.is_active) continue; - mqd_mgr = dqm->ops.get_mqd_manager(dqm, - get_mqd_type_from_queue_type(q->properties.type)); - if (!mqd_mgr) { /* should not be here */ - pr_err("Cannot evict queue, mqd mgr is NULL\n"); - retval = -ENOMEM; - goto out; - } - q->properties.is_evicted = true; + + mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( + q->properties.type)]; q->properties.is_active = false; + decrement_queue_count(dqm, qpd, q); + + if (WARN_ONCE(!dqm->sched_running, "Evict when stopped\n")) + continue; + retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN, KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); - if (retval) - goto out; - dqm->queue_count--; + if (retval && !ret) + /* Return the first error, but keep going to + * maintain a consistent eviction state + */ + ret = retval; } out: dqm_unlock(dqm); - return retval; + return ret; } static int evict_process_queues_cpsch(struct device_queue_manager *dqm, @@ -639,17 +705,21 @@ goto out; pdd = qpd_to_pdd(qpd); - pr_info_ratelimited("Evicting PASID %u queues\n", + pr_debug_ratelimited("Evicting PASID 0x%x queues\n", pdd->process->pasid); - /* unactivate all active queues on the qpd */ + /* Mark all queues as evicted. Deactivate all active queues on + * the qpd. + */ list_for_each_entry(q, &qpd->queues_list, list) { + q->properties.is_evicted = true; if (!q->properties.is_active) continue; - q->properties.is_evicted = true; + q->properties.is_active = false; - dqm->queue_count--; + decrement_queue_count(dqm, qpd, q); } + pdd->last_evict_timestamp = get_jiffies_64(); retval = execute_queues_cpsch(dqm, qpd->is_debug ? KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES : @@ -667,12 +737,13 @@ struct queue *q; struct mqd_manager *mqd_mgr; struct kfd_process_device *pdd; - uint32_t pd_base; - int retval = 0; + uint64_t pd_base; + uint64_t eviction_duration; + int retval, ret = 0; pdd = qpd_to_pdd(qpd); /* Retrieve PD base */ - pd_base = dqm->dev->kfd2kgd->get_process_page_dir(pdd->vm); + pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->vm); dqm_lock(dqm); if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */ @@ -682,12 +753,12 @@ goto out; } - pr_info_ratelimited("Restoring PASID %u queues\n", + pr_debug_ratelimited("Restoring PASID 0x%x queues\n", pdd->process->pasid); /* Update PD Base in QPD */ qpd->page_table_base = pd_base; - pr_debug("Updated PD address to 0x%08x\n", pd_base); + pr_debug("Updated PD address to 0x%llx\n", pd_base); if (!list_empty(&qpd->queues_list)) { dqm->dev->kfd2kgd->set_vm_context_page_table_base( @@ -702,35 +773,42 @@ */ mm = get_task_mm(pdd->process->lead_thread); if (!mm) { - retval = -EFAULT; + ret = -EFAULT; goto out; } - /* activate all active queues on the qpd */ + /* Remove the eviction flags. Activate queues that are not + * inactive for other reasons. + */ list_for_each_entry(q, &qpd->queues_list, list) { - if (!q->properties.is_evicted) - continue; - mqd_mgr = dqm->ops.get_mqd_manager(dqm, - get_mqd_type_from_queue_type(q->properties.type)); - if (!mqd_mgr) { /* should not be here */ - pr_err("Cannot restore queue, mqd mgr is NULL\n"); - retval = -ENOMEM; - goto out; - } q->properties.is_evicted = false; + if (!QUEUE_IS_ACTIVE(q->properties)) + continue; + + mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( + q->properties.type)]; q->properties.is_active = true; + increment_queue_count(dqm, qpd, q); + + if (WARN_ONCE(!dqm->sched_running, "Restore when stopped\n")) + continue; + retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, q->queue, &q->properties, mm); - if (retval) - goto out; - dqm->queue_count++; + if (retval && !ret) + /* Return the first error, but keep going to + * maintain a consistent eviction state + */ + ret = retval; } qpd->evicted = 0; + eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp; + atomic64_add(eviction_duration, &pdd->evict_duration_counter); out: if (mm) mmput(mm); dqm_unlock(dqm); - return retval; + return ret; } static int restore_process_queues_cpsch(struct device_queue_manager *dqm, @@ -738,12 +816,13 @@ { struct queue *q; struct kfd_process_device *pdd; - uint32_t pd_base; + uint64_t pd_base; + uint64_t eviction_duration; int retval = 0; pdd = qpd_to_pdd(qpd); /* Retrieve PD base */ - pd_base = dqm->dev->kfd2kgd->get_process_page_dir(pdd->vm); + pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->vm); dqm_lock(dqm); if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */ @@ -753,25 +832,27 @@ goto out; } - pr_info_ratelimited("Restoring PASID %u queues\n", + pr_debug_ratelimited("Restoring PASID 0x%x queues\n", pdd->process->pasid); /* Update PD Base in QPD */ qpd->page_table_base = pd_base; - pr_debug("Updated PD address to 0x%08x\n", pd_base); + pr_debug("Updated PD address to 0x%llx\n", pd_base); /* activate all active queues on the qpd */ list_for_each_entry(q, &qpd->queues_list, list) { - if (!q->properties.is_evicted) - continue; q->properties.is_evicted = false; + if (!QUEUE_IS_ACTIVE(q->properties)) + continue; + q->properties.is_active = true; - dqm->queue_count++; + increment_queue_count(dqm, &pdd->qpd, q); } retval = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); - if (!retval) - qpd->evicted = 0; + qpd->evicted = 0; + eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp; + atomic64_add(eviction_duration, &pdd->evict_duration_counter); out: dqm_unlock(dqm); return retval; @@ -782,7 +863,7 @@ { struct device_process_node *n; struct kfd_process_device *pdd; - uint32_t pd_base; + uint64_t pd_base; int retval; n = kzalloc(sizeof(*n), GFP_KERNEL); @@ -793,20 +874,25 @@ pdd = qpd_to_pdd(qpd); /* Retrieve PD base */ - pd_base = dqm->dev->kfd2kgd->get_process_page_dir(pdd->vm); + pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->vm); dqm_lock(dqm); list_add(&n->list, &dqm->queues); /* Update PD Base in QPD */ qpd->page_table_base = pd_base; + pr_debug("Updated PD address to 0x%llx\n", pd_base); retval = dqm->asic_ops.update_qpd(dqm, qpd); - if (dqm->processes_count++ == 0) - dqm->dev->kfd2kgd->set_compute_idle(dqm->dev->kgd, false); + dqm->processes_count++; dqm_unlock(dqm); + + /* Outside the DQM lock because under the DQM lock we can't do + * reclaim or take other locks that others hold while reclaiming. + */ + kfd_inc_compute_active(dqm->dev); return retval; } @@ -827,9 +913,7 @@ if (qpd == cur->qpd) { list_del(&cur->list); kfree(cur); - if (--dqm->processes_count == 0) - dqm->dev->kfd2kgd->set_compute_idle( - dqm->dev->kgd, true); + dqm->processes_count--; goto out; } } @@ -837,22 +921,22 @@ retval = 1; out: dqm_unlock(dqm); + + /* Outside the DQM lock because under the DQM lock we can't do + * reclaim or take other locks that others hold while reclaiming. + */ + if (!retval) + kfd_dec_compute_active(dqm->dev); + return retval; } static int -set_pasid_vmid_mapping(struct device_queue_manager *dqm, unsigned int pasid, +set_pasid_vmid_mapping(struct device_queue_manager *dqm, u32 pasid, unsigned int vmid) { - uint32_t pasid_mapping; - - pasid_mapping = (pasid == 0) ? 0 : - (uint32_t)pasid | - ATC_VMID_PASID_MAPPING_VALID; - return dqm->dev->kfd2kgd->set_pasid_vmid_mapping( - dqm->dev->kgd, pasid_mapping, - vmid); + dqm->dev->kgd, pasid, vmid); } static void init_interrupts(struct device_queue_manager *dqm) @@ -877,20 +961,23 @@ mutex_init(&dqm->lock_hidden); INIT_LIST_HEAD(&dqm->queues); - dqm->queue_count = dqm->next_pipe_to_allocate = 0; - dqm->sdma_queue_count = 0; + dqm->active_queue_count = dqm->next_pipe_to_allocate = 0; + dqm->active_cp_queue_count = 0; + dqm->gws_queue_count = 0; for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) { int pipe_offset = pipe * get_queues_per_pipe(dqm); for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) if (test_bit(pipe_offset + queue, - dqm->dev->shared_resources.queue_bitmap)) + dqm->dev->shared_resources.cp_queue_bitmap)) dqm->allocated_queues[pipe] |= 1 << queue; } - dqm->vmid_bitmap = (1 << dqm->dev->vm_info.vmid_num_kfd) - 1; - dqm->sdma_bitmap = (1 << get_num_sdma_queues(dqm)) - 1; + memset(dqm->vmid_pasid, 0, sizeof(dqm->vmid_pasid)); + + dqm->sdma_bitmap = ~0ULL >> (64 - get_num_sdma_queues(dqm)); + dqm->xgmi_sdma_bitmap = ~0ULL >> (64 - get_num_xgmi_sdma_queues(dqm)); return 0; } @@ -899,97 +986,98 @@ { int i; - WARN_ON(dqm->queue_count > 0 || dqm->processes_count > 0); + WARN_ON(dqm->active_queue_count > 0 || dqm->processes_count > 0); kfree(dqm->allocated_queues); for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++) kfree(dqm->mqd_mgrs[i]); mutex_destroy(&dqm->lock_hidden); - kfd_gtt_sa_free(dqm->dev, dqm->pipeline_mem); } static int start_nocpsch(struct device_queue_manager *dqm) { + pr_info("SW scheduler is used"); init_interrupts(dqm); - return pm_init(&dqm->packets, dqm); + + if (dqm->dev->device_info->asic_family == CHIP_HAWAII) + return pm_init(&dqm->packets, dqm); + dqm->sched_running = true; + + return 0; } static int stop_nocpsch(struct device_queue_manager *dqm) { - pm_uninit(&dqm->packets); + if (dqm->dev->device_info->asic_family == CHIP_HAWAII) + pm_uninit(&dqm->packets, false); + dqm->sched_running = false; + return 0; } +static void pre_reset(struct device_queue_manager *dqm) +{ + dqm_lock(dqm); + dqm->is_resetting = true; + dqm_unlock(dqm); +} + static int allocate_sdma_queue(struct device_queue_manager *dqm, - unsigned int *sdma_queue_id) + struct queue *q) { int bit; - if (dqm->sdma_bitmap == 0) - return -ENOMEM; + if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { + if (dqm->sdma_bitmap == 0) { + pr_err("No more SDMA queue to allocate\n"); + return -ENOMEM; + } - bit = ffs(dqm->sdma_bitmap) - 1; - dqm->sdma_bitmap &= ~(1 << bit); - *sdma_queue_id = bit; + bit = __ffs64(dqm->sdma_bitmap); + dqm->sdma_bitmap &= ~(1ULL << bit); + q->sdma_id = bit; + q->properties.sdma_engine_id = q->sdma_id % + get_num_sdma_engines(dqm); + q->properties.sdma_queue_id = q->sdma_id / + get_num_sdma_engines(dqm); + } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { + if (dqm->xgmi_sdma_bitmap == 0) { + pr_err("No more XGMI SDMA queue to allocate\n"); + return -ENOMEM; + } + bit = __ffs64(dqm->xgmi_sdma_bitmap); + dqm->xgmi_sdma_bitmap &= ~(1ULL << bit); + q->sdma_id = bit; + /* sdma_engine_id is sdma id including + * both PCIe-optimized SDMAs and XGMI- + * optimized SDMAs. The calculation below + * assumes the first N engines are always + * PCIe-optimized ones + */ + q->properties.sdma_engine_id = get_num_sdma_engines(dqm) + + q->sdma_id % get_num_xgmi_sdma_engines(dqm); + q->properties.sdma_queue_id = q->sdma_id / + get_num_xgmi_sdma_engines(dqm); + } + + pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id); + pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id); return 0; } static void deallocate_sdma_queue(struct device_queue_manager *dqm, - unsigned int sdma_queue_id) + struct queue *q) { - if (sdma_queue_id >= get_num_sdma_queues(dqm)) - return; - dqm->sdma_bitmap |= (1 << sdma_queue_id); -} - -static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, - struct queue *q, - struct qcm_process_device *qpd) -{ - struct mqd_manager *mqd_mgr; - int retval; - - mqd_mgr = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_SDMA); - if (!mqd_mgr) - return -ENOMEM; - - retval = allocate_sdma_queue(dqm, &q->sdma_id); - if (retval) - return retval; - - q->properties.sdma_queue_id = q->sdma_id / get_num_sdma_engines(dqm); - q->properties.sdma_engine_id = q->sdma_id % get_num_sdma_engines(dqm); - - retval = allocate_doorbell(qpd, q); - if (retval) - goto out_deallocate_sdma_queue; - - pr_debug("SDMA id is: %d\n", q->sdma_id); - pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id); - pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id); - - dqm->asic_ops.init_sdma_vm(dqm, q, qpd); - retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj, - &q->gart_mqd_addr, &q->properties); - if (retval) - goto out_deallocate_doorbell; - - retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, 0, 0, &q->properties, - NULL); - if (retval) - goto out_uninit_mqd; - - return 0; - -out_uninit_mqd: - mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); -out_deallocate_doorbell: - deallocate_doorbell(qpd, q); -out_deallocate_sdma_queue: - deallocate_sdma_queue(dqm, q->sdma_id); - - return retval; + if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { + if (q->sdma_id >= get_num_sdma_queues(dqm)) + return; + dqm->sdma_bitmap |= (1ULL << q->sdma_id); + } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { + if (q->sdma_id >= get_num_xgmi_sdma_queues(dqm)) + return; + dqm->xgmi_sdma_bitmap |= (1ULL << q->sdma_id); + } } /* @@ -1008,7 +1096,7 @@ mec = (i / dqm->dev->shared_resources.num_queue_per_pipe) / dqm->dev->shared_resources.num_pipe_per_mec; - if (!test_bit(i, dqm->dev->shared_resources.queue_bitmap)) + if (!test_bit(i, dqm->dev->shared_resources.cp_queue_bitmap)) continue; /* only acquire queues from the first MEC */ @@ -1024,10 +1112,12 @@ break; } - res.queue_mask |= (1ull << i); + res.queue_mask |= 1ull + << amdgpu_queue_mask_bit_to_set_resource_bit( + (struct amdgpu_device *)dqm->dev->kgd, i); } - res.gws_mask = res.oac_mask = res.gds_heap_base = - res.gds_heap_size = 0; + res.gws_mask = ~0ull; + res.oac_mask = res.gds_heap_base = res.gds_heap_size = 0; pr_debug("Scheduling resources:\n" "vmid mask: 0x%8X\n" @@ -1039,14 +1129,29 @@ static int initialize_cpsch(struct device_queue_manager *dqm) { + uint64_t num_sdma_queues; + uint64_t num_xgmi_sdma_queues; + pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm)); mutex_init(&dqm->lock_hidden); INIT_LIST_HEAD(&dqm->queues); - dqm->queue_count = dqm->processes_count = 0; - dqm->sdma_queue_count = 0; + dqm->active_queue_count = dqm->processes_count = 0; + dqm->active_cp_queue_count = 0; + dqm->gws_queue_count = 0; dqm->active_runlist = false; - dqm->sdma_bitmap = (1 << get_num_sdma_queues(dqm)) - 1; + + num_sdma_queues = get_num_sdma_queues(dqm); + if (num_sdma_queues >= BITS_PER_TYPE(dqm->sdma_bitmap)) + dqm->sdma_bitmap = ULLONG_MAX; + else + dqm->sdma_bitmap = (BIT_ULL(num_sdma_queues) - 1); + + num_xgmi_sdma_queues = get_num_xgmi_sdma_queues(dqm); + if (num_xgmi_sdma_queues >= BITS_PER_TYPE(dqm->xgmi_sdma_bitmap)) + dqm->xgmi_sdma_bitmap = ULLONG_MAX; + else + dqm->xgmi_sdma_bitmap = (BIT_ULL(num_xgmi_sdma_queues) - 1); INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception); @@ -1076,7 +1181,7 @@ if (retval) goto fail_allocate_vidmem; - dqm->fence_addr = dqm->fence_mem->cpu_ptr; + dqm->fence_addr = (uint64_t *)dqm->fence_mem->cpu_ptr; dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr; init_interrupts(dqm); @@ -1084,27 +1189,39 @@ dqm_lock(dqm); /* clear hang status when driver try to start the hw scheduler */ dqm->is_hws_hang = false; + dqm->is_resetting = false; + dqm->sched_running = true; execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); dqm_unlock(dqm); return 0; fail_allocate_vidmem: fail_set_sched_resources: - pm_uninit(&dqm->packets); + pm_uninit(&dqm->packets, false); fail_packet_manager_init: return retval; } static int stop_cpsch(struct device_queue_manager *dqm) { + bool hanging; + dqm_lock(dqm); - unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); + if (!dqm->sched_running) { + dqm_unlock(dqm); + return 0; + } + + if (!dqm->is_hws_hang) + unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); + hanging = dqm->is_hws_hang || dqm->is_resetting; + dqm->sched_running = false; dqm_unlock(dqm); pm_release_ib(&dqm->packets); kfd_gtt_sa_free(dqm->dev, dqm->fence_mem); - pm_uninit(&dqm->packets); + pm_uninit(&dqm->packets, hanging); return 0; } @@ -1130,7 +1247,7 @@ dqm->total_queue_count); list_add(&kq->list, &qpd->priv_queue_list); - dqm->queue_count++; + increment_queue_count(dqm, qpd, kq->queue); qpd->is_debug = true; execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); dqm_unlock(dqm); @@ -1144,7 +1261,7 @@ { dqm_lock(dqm); list_del(&kq->list); - dqm->queue_count--; + decrement_queue_count(dqm, qpd, kq->queue); qpd->is_debug = false; execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); /* @@ -1163,66 +1280,60 @@ int retval; struct mqd_manager *mqd_mgr; - retval = 0; - - dqm_lock(dqm); - if (dqm->total_queue_count >= max_num_of_queues_per_device) { pr_warn("Can't create new usermode queue because %d queues were already created\n", dqm->total_queue_count); retval = -EPERM; - goto out_unlock; + goto out; } - if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { - retval = allocate_sdma_queue(dqm, &q->sdma_id); + if (q->properties.type == KFD_QUEUE_TYPE_SDMA || + q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { + dqm_lock(dqm); + retval = allocate_sdma_queue(dqm, q); + dqm_unlock(dqm); if (retval) - goto out_unlock; - q->properties.sdma_queue_id = - q->sdma_id / get_num_sdma_engines(dqm); - q->properties.sdma_engine_id = - q->sdma_id % get_num_sdma_engines(dqm); + goto out; } retval = allocate_doorbell(qpd, q); if (retval) goto out_deallocate_sdma_queue; - mqd_mgr = dqm->ops.get_mqd_manager(dqm, - get_mqd_type_from_queue_type(q->properties.type)); + mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( + q->properties.type)]; - if (!mqd_mgr) { + if (q->properties.type == KFD_QUEUE_TYPE_SDMA || + q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) + dqm->asic_ops.init_sdma_vm(dqm, q, qpd); + q->properties.tba_addr = qpd->tba_addr; + q->properties.tma_addr = qpd->tma_addr; + q->mqd_mem_obj = mqd_mgr->allocate_mqd(mqd_mgr->dev, &q->properties); + if (!q->mqd_mem_obj) { retval = -ENOMEM; goto out_deallocate_doorbell; } + + dqm_lock(dqm); /* - * Eviction state logic: we only mark active queues as evicted - * to avoid the overhead of restoring inactive queues later + * Eviction state logic: mark all queues as evicted, even ones + * not currently active. Restoring inactive queues later only + * updates the is_evicted flag but is a no-op otherwise. */ - if (qpd->evicted) - q->properties.is_evicted = (q->properties.queue_size > 0 && - q->properties.queue_percent > 0 && - q->properties.queue_address != 0); - - dqm->asic_ops.init_sdma_vm(dqm, q, qpd); - - q->properties.tba_addr = qpd->tba_addr; - q->properties.tma_addr = qpd->tma_addr; - retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj, + q->properties.is_evicted = !!qpd->evicted; + mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, &q->gart_mqd_addr, &q->properties); - if (retval) - goto out_deallocate_doorbell; list_add(&q->list, &qpd->queues_list); qpd->queue_count++; + if (q->properties.is_active) { - dqm->queue_count++; - retval = execute_queues_cpsch(dqm, + increment_queue_count(dqm, qpd, q); + + execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); } - if (q->properties.type == KFD_QUEUE_TYPE_SDMA) - dqm->sdma_queue_count++; /* * Unconditionally increment this counter, regardless of the queue's * type or whether the queue is active. @@ -1238,16 +1349,18 @@ out_deallocate_doorbell: deallocate_doorbell(qpd, q); out_deallocate_sdma_queue: - if (q->properties.type == KFD_QUEUE_TYPE_SDMA) - deallocate_sdma_queue(dqm, q->sdma_id); -out_unlock: - dqm_unlock(dqm); - + if (q->properties.type == KFD_QUEUE_TYPE_SDMA || + q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { + dqm_lock(dqm); + deallocate_sdma_queue(dqm, q); + dqm_unlock(dqm); + } +out: return retval; } -int amdkfd_fence_wait_timeout(unsigned int *fence_addr, - unsigned int fence_value, +int amdkfd_fence_wait_timeout(uint64_t *fence_addr, + uint64_t fence_value, unsigned int timeout_ms) { unsigned long end_jiffies = msecs_to_jiffies(timeout_ms) + jiffies; @@ -1270,31 +1383,20 @@ return 0; } -static int unmap_sdma_queues(struct device_queue_manager *dqm) -{ - int i, retval = 0; - - for (i = 0; i < dqm->dev->device_info->num_sdma_engines; i++) { - retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA, - KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false, i); - if (retval) - return retval; - } - return retval; -} - /* dqm->lock mutex has to be locked before calling this function */ static int map_queues_cpsch(struct device_queue_manager *dqm) { int retval; - if (dqm->queue_count <= 0 || dqm->processes_count <= 0) + if (!dqm->sched_running) return 0; - + if (dqm->active_queue_count <= 0 || dqm->processes_count <= 0) + return 0; if (dqm->active_runlist) return 0; retval = pm_send_runlist(&dqm->packets, &dqm->queues); + pr_debug("%s sent runlist\n", __func__); if (retval) { pr_err("failed to execute runlist\n"); return retval; @@ -1311,16 +1413,12 @@ { int retval = 0; + if (!dqm->sched_running) + return 0; if (dqm->is_hws_hang) return -EIO; if (!dqm->active_runlist) return retval; - - pr_debug("Before destroying queues, sdma queue count is : %u\n", - dqm->sdma_queue_count); - - if (dqm->sdma_queue_count > 0) - unmap_sdma_queues(dqm); retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE, filter, filter_param, false, 0); @@ -1332,9 +1430,18 @@ KFD_FENCE_COMPLETED); /* should be timed out */ retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED, - QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS); - if (retval) + queue_preemption_timeout_ms); + if (retval) { + pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n"); + dqm->is_hws_hang = true; + /* It's possible we're detecting a HWS hang in the + * middle of a GPU reset. No need to schedule another + * reset in this case. + */ + if (!dqm->is_resetting) + schedule_work(&dqm->hw_exception_work); return retval; + } pm_release_ib(&dqm->packets); dqm->active_runlist = false; @@ -1352,12 +1459,8 @@ if (dqm->is_hws_hang) return -EIO; retval = unmap_queues_cpsch(dqm, filter, filter_param); - if (retval) { - pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n"); - dqm->is_hws_hang = true; - schedule_work(&dqm->hw_exception_work); + if (retval) return retval; - } return map_queues_cpsch(dqm); } @@ -1368,9 +1471,18 @@ { int retval; struct mqd_manager *mqd_mgr; - bool preempt_all_queues; + uint64_t sdma_val = 0; + struct kfd_process_device *pdd = qpd_to_pdd(qpd); - preempt_all_queues = false; + /* Get the SDMA queue stats */ + if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || + (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { + retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr, + &sdma_val); + if (retval) + pr_err("Failed to read SDMA queue counter for queue: %d\n", + q->properties.queue_id); + } retval = 0; @@ -1387,31 +1499,26 @@ } - mqd_mgr = dqm->ops.get_mqd_manager(dqm, - get_mqd_type_from_queue_type(q->properties.type)); - if (!mqd_mgr) { - retval = -ENOMEM; - goto failed; - } + mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( + q->properties.type)]; deallocate_doorbell(qpd, q); - if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { - dqm->sdma_queue_count--; - deallocate_sdma_queue(dqm, q->sdma_id); + if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || + (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { + deallocate_sdma_queue(dqm, q); + pdd->sdma_past_activity_counter += sdma_val; } list_del(&q->list); qpd->queue_count--; if (q->properties.is_active) { - dqm->queue_count--; + decrement_queue_count(dqm, qpd, q); retval = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); if (retval == -ETIME) qpd->reset_wavefronts = true; } - - mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); /* * Unconditionally decrement this counter, regardless of the queue's @@ -1423,9 +1530,11 @@ dqm_unlock(dqm); + /* Do free_mqd after dqm_unlock(dqm) to avoid circular locking */ + mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); + return retval; -failed: failed_try_destroy_debugged_queue: dqm_unlock(dqm); @@ -1525,19 +1634,27 @@ static int process_termination_nocpsch(struct device_queue_manager *dqm, struct qcm_process_device *qpd) { - struct queue *q, *next; + struct queue *q; struct device_process_node *cur, *next_dpn; int retval = 0; + bool found = false; dqm_lock(dqm); /* Clear all user mode queues */ - list_for_each_entry_safe(q, next, &qpd->queues_list, list) { + while (!list_empty(&qpd->queues_list)) { + struct mqd_manager *mqd_mgr; int ret; + q = list_first_entry(&qpd->queues_list, struct queue, list); + mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( + q->properties.type)]; ret = destroy_queue_nocpsch_locked(dqm, qpd, q); if (ret) retval = ret; + dqm_unlock(dqm); + mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); + dqm_lock(dqm); } /* Unregister process */ @@ -1546,25 +1663,63 @@ list_del(&cur->list); kfree(cur); dqm->processes_count--; + found = true; break; } } dqm_unlock(dqm); + + /* Outside the DQM lock because under the DQM lock we can't do + * reclaim or take other locks that others hold while reclaiming. + */ + if (found) + kfd_dec_compute_active(dqm->dev); + return retval; } +static int get_wave_state(struct device_queue_manager *dqm, + struct queue *q, + void __user *ctl_stack, + u32 *ctl_stack_used_size, + u32 *save_area_used_size) +{ + struct mqd_manager *mqd_mgr; + + dqm_lock(dqm); + + mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_CP]; + + if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE || + q->properties.is_active || !q->device->cwsr_enabled || + !mqd_mgr->get_wave_state) { + dqm_unlock(dqm); + return -EINVAL; + } + + dqm_unlock(dqm); + + /* + * get_wave_state is outside the dqm lock to prevent circular locking + * and the queue should be protected against destruction by the process + * lock. + */ + return mqd_mgr->get_wave_state(mqd_mgr, q->mqd, ctl_stack, + ctl_stack_used_size, save_area_used_size); +} static int process_termination_cpsch(struct device_queue_manager *dqm, struct qcm_process_device *qpd) { int retval; - struct queue *q, *next; + struct queue *q; struct kernel_queue *kq, *kq_next; struct mqd_manager *mqd_mgr; struct device_process_node *cur, *next_dpn; enum kfd_unmap_queues_filter filter = KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES; + bool found = false; retval = 0; @@ -1573,7 +1728,7 @@ /* Clean all kernel queues */ list_for_each_entry_safe(kq, kq_next, &qpd->priv_queue_list, list) { list_del(&kq->list); - dqm->queue_count--; + decrement_queue_count(dqm, qpd, kq->queue); qpd->is_debug = false; dqm->total_queue_count--; filter = KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES; @@ -1581,13 +1736,13 @@ /* Clear all user mode queues */ list_for_each_entry(q, &qpd->queues_list, list) { - if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { - dqm->sdma_queue_count--; - deallocate_sdma_queue(dqm, q->sdma_id); - } + if (q->properties.type == KFD_QUEUE_TYPE_SDMA) + deallocate_sdma_queue(dqm, q); + else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) + deallocate_sdma_queue(dqm, q); if (q->properties.is_active) - dqm->queue_count--; + decrement_queue_count(dqm, qpd, q); dqm->total_queue_count--; } @@ -1598,6 +1753,7 @@ list_del(&cur->list); kfree(cur); dqm->processes_count--; + found = true; break; } } @@ -1609,21 +1765,70 @@ qpd->reset_wavefronts = false; } - /* lastly, free mqd resources */ - list_for_each_entry_safe(q, next, &qpd->queues_list, list) { - mqd_mgr = dqm->ops.get_mqd_manager(dqm, - get_mqd_type_from_queue_type(q->properties.type)); - if (!mqd_mgr) { - retval = -ENOMEM; - goto out; - } + /* Lastly, free mqd resources. + * Do free_mqd() after dqm_unlock to avoid circular locking. + */ + while (!list_empty(&qpd->queues_list)) { + q = list_first_entry(&qpd->queues_list, struct queue, list); + mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( + q->properties.type)]; list_del(&q->list); qpd->queue_count--; - mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); + dqm_unlock(dqm); + mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); + dqm_lock(dqm); + } + dqm_unlock(dqm); + + /* Outside the DQM lock because under the DQM lock we can't do + * reclaim or take other locks that others hold while reclaiming. + */ + if (found) + kfd_dec_compute_active(dqm->dev); + + return retval; +} + +static int init_mqd_managers(struct device_queue_manager *dqm) +{ + int i, j; + struct mqd_manager *mqd_mgr; + + for (i = 0; i < KFD_MQD_TYPE_MAX; i++) { + mqd_mgr = dqm->asic_ops.mqd_manager_init(i, dqm->dev); + if (!mqd_mgr) { + pr_err("mqd manager [%d] initialization failed\n", i); + goto out_free; + } + dqm->mqd_mgrs[i] = mqd_mgr; } -out: - dqm_unlock(dqm); + return 0; + +out_free: + for (j = 0; j < i; j++) { + kfree(dqm->mqd_mgrs[j]); + dqm->mqd_mgrs[j] = NULL; + } + + return -ENOMEM; +} + +/* Allocate one hiq mqd (HWS) and all SDMA mqd in a continuous trunk*/ +static int allocate_hiq_sdma_mqd(struct device_queue_manager *dqm) +{ + int retval; + struct kfd_dev *dev = dqm->dev; + struct kfd_mem_obj *mem_obj = &dqm->hiq_sdma_mqd; + uint32_t size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size * + get_num_all_sdma_engines(dqm) * + dev->device_info->num_sdma_queues_per_engine + + dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size; + + retval = amdgpu_amdkfd_alloc_gtt_mem(dev->kgd, size, + &(mem_obj->gtt_mem), &(mem_obj->gpu_addr), + (void *)&(mem_obj->cpu_ptr), false); + return retval; } @@ -1662,9 +1867,9 @@ dqm->ops.initialize = initialize_cpsch; dqm->ops.start = start_cpsch; dqm->ops.stop = stop_cpsch; + dqm->ops.pre_reset = pre_reset; dqm->ops.destroy_queue = destroy_queue_cpsch; dqm->ops.update_queue = update_queue; - dqm->ops.get_mqd_manager = get_mqd_manager; dqm->ops.register_process = register_process; dqm->ops.unregister_process = unregister_process; dqm->ops.uninitialize = uninitialize; @@ -1675,15 +1880,16 @@ dqm->ops.process_termination = process_termination_cpsch; dqm->ops.evict_process_queues = evict_process_queues_cpsch; dqm->ops.restore_process_queues = restore_process_queues_cpsch; + dqm->ops.get_wave_state = get_wave_state; break; case KFD_SCHED_POLICY_NO_HWS: /* initialize dqm for no cp scheduling */ dqm->ops.start = start_nocpsch; dqm->ops.stop = stop_nocpsch; + dqm->ops.pre_reset = pre_reset; dqm->ops.create_queue = create_queue_nocpsch; dqm->ops.destroy_queue = destroy_queue_nocpsch; dqm->ops.update_queue = update_queue; - dqm->ops.get_mqd_manager = get_mqd_manager; dqm->ops.register_process = register_process; dqm->ops.unregister_process = unregister_process; dqm->ops.initialize = initialize_nocpsch; @@ -1694,6 +1900,7 @@ dqm->ops.evict_process_queues = evict_process_queues_nocpsch; dqm->ops.restore_process_queues = restore_process_queues_nocpsch; + dqm->ops.get_wave_state = get_wave_state; break; default: pr_err("Invalid scheduling policy %d\n", dqm->sched_policy); @@ -1717,16 +1924,37 @@ case CHIP_FIJI: case CHIP_POLARIS10: case CHIP_POLARIS11: + case CHIP_POLARIS12: + case CHIP_VEGAM: device_queue_manager_init_vi_tonga(&dqm->asic_ops); break; case CHIP_VEGA10: + case CHIP_VEGA12: + case CHIP_VEGA20: case CHIP_RAVEN: + case CHIP_RENOIR: + case CHIP_ARCTURUS: device_queue_manager_init_v9(&dqm->asic_ops); + break; + case CHIP_NAVI10: + case CHIP_NAVI12: + case CHIP_NAVI14: + case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: + device_queue_manager_init_v10_navi10(&dqm->asic_ops); break; default: WARN(1, "Unexpected ASIC family %u", dev->device_info->asic_family); + goto out_free; + } + + if (init_mqd_managers(dqm)) + goto out_free; + + if (allocate_hiq_sdma_mqd(dqm)) { + pr_err("Failed to allocate hiq sdma mqd trunk buffer\n"); goto out_free; } @@ -1738,14 +1966,22 @@ return NULL; } +static void deallocate_hiq_sdma_mqd(struct kfd_dev *dev, + struct kfd_mem_obj *mqd) +{ + WARN(!mqd, "No hiq sdma mqd trunk to free"); + + amdgpu_amdkfd_free_gtt_mem(dev->kgd, mqd->gtt_mem); +} + void device_queue_manager_uninit(struct device_queue_manager *dqm) { dqm->ops.uninitialize(dqm); + deallocate_hiq_sdma_mqd(dqm->dev, &dqm->hiq_sdma_mqd); kfree(dqm); } -int kfd_process_vm_fault(struct device_queue_manager *dqm, - unsigned int pasid) +int kfd_process_vm_fault(struct device_queue_manager *dqm, u32 pasid) { struct kfd_process_device *pdd; struct kfd_process *p = kfd_lookup_process_by_pasid(pasid); @@ -1753,6 +1989,7 @@ if (!p) return -EINVAL; + WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid); pdd = kfd_get_process_device_data(dqm->dev, p); if (pdd) ret = dqm->ops.evict_process_queues(dqm, &pdd->qpd); @@ -1765,7 +2002,7 @@ { struct device_queue_manager *dqm = container_of(work, struct device_queue_manager, hw_exception_work); - dqm->dev->kfd2kgd->gpu_recover(dqm->dev->kgd); + amdgpu_amdkfd_gpu_reset(dqm->dev->kgd); } #if defined(CONFIG_DEBUG_FS) @@ -1798,13 +2035,20 @@ int pipe, queue; int r = 0; + if (!dqm->sched_running) { + seq_printf(m, " Device is stopped\n"); + + return 0; + } + r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->kgd, - KFD_CIK_HIQ_PIPE, KFD_CIK_HIQ_QUEUE, &dump, &n_regs); + KFD_CIK_HIQ_PIPE, KFD_CIK_HIQ_QUEUE, + &dump, &n_regs); if (!r) { seq_printf(m, " HIQ on MEC %d Pipe %d Queue %d\n", - KFD_CIK_HIQ_PIPE/get_pipes_per_mec(dqm)+1, - KFD_CIK_HIQ_PIPE%get_pipes_per_mec(dqm), - KFD_CIK_HIQ_QUEUE); + KFD_CIK_HIQ_PIPE/get_pipes_per_mec(dqm)+1, + KFD_CIK_HIQ_PIPE%get_pipes_per_mec(dqm), + KFD_CIK_HIQ_QUEUE); seq_reg_dump(m, dump, n_regs); kfree(dump); @@ -1815,7 +2059,7 @@ for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) { if (!test_bit(pipe_offset + queue, - dqm->dev->shared_resources.queue_bitmap)) + dqm->dev->shared_resources.cp_queue_bitmap)) continue; r = dqm->dev->kfd2kgd->hqd_dump( @@ -1831,8 +2075,10 @@ } } - for (pipe = 0; pipe < get_num_sdma_engines(dqm); pipe++) { - for (queue = 0; queue < KFD_SDMA_QUEUES_PER_ENGINE; queue++) { + for (pipe = 0; pipe < get_num_all_sdma_engines(dqm); pipe++) { + for (queue = 0; + queue < dqm->dev->device_info->num_sdma_queues_per_engine; + queue++) { r = dqm->dev->kfd2kgd->hqd_sdma_dump( dqm->dev->kgd, pipe, queue, &dump, &n_regs); if (r) -- Gitblit v1.6.2