/*
|
*
|
* (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
|
*
|
* This program is free software and is provided to you under the terms of the
|
* GNU General Public License version 2 as published by the Free Software
|
* Foundation, and any use by you of this program is subject to the terms
|
* of such GNU licence.
|
*
|
* A copy of the licence is included with the program, and can also be obtained
|
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
|
* Boston, MA 02110-1301, USA.
|
*
|
*/
|
|
|
|
|
|
/*
|
* Base kernel affinity manager APIs
|
*/
|
|
#include <mali_kbase.h>
|
#include "mali_kbase_js_affinity.h"
|
#include "mali_kbase_hw.h"
|
|
#include <backend/gpu/mali_kbase_pm_internal.h>
|
|
|
bool kbase_js_can_run_job_on_slot_no_lock(struct kbase_device *kbdev,
|
int js)
|
{
|
/*
|
* Here are the reasons for using job slot 2:
|
* - BASE_HW_ISSUE_8987 (which is entirely used for that purpose)
|
* - In absence of the above, then:
|
* - Atoms with BASE_JD_REQ_COHERENT_GROUP
|
* - But, only when there aren't contexts with
|
* KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES, because the atoms that run on
|
* all cores on slot 1 could be blocked by those using a coherent group
|
* on slot 2
|
* - And, only when you actually have 2 or more coregroups - if you
|
* only have 1 coregroup, then having jobs for slot 2 implies they'd
|
* also be for slot 1, meaning you'll get interference from them. Jobs
|
* able to run on slot 2 could also block jobs that can only run on
|
* slot 1 (tiler jobs)
|
*/
|
if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987))
|
return true;
|
|
if (js != 2)
|
return true;
|
|
/* Only deal with js==2 now: */
|
if (kbdev->gpu_props.num_core_groups > 1) {
|
/* Only use slot 2 in the 2+ coregroup case */
|
if (kbasep_js_ctx_attr_is_attr_on_runpool(kbdev,
|
KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES) ==
|
false) {
|
/* ...But only when we *don't* have atoms that run on
|
* all cores */
|
|
/* No specific check for BASE_JD_REQ_COHERENT_GROUP
|
* atoms - the policy will sort that out */
|
return true;
|
}
|
}
|
|
/* Above checks failed mean we shouldn't use slot 2 */
|
return false;
|
}
|
|
/*
|
* As long as it has been decided to have a deeper modification of
|
* what job scheduler, power manager and affinity manager will
|
* implement, this function is just an intermediate step that
|
* assumes:
|
* - all working cores will be powered on when this is called.
|
* - largest current configuration is 2 core groups.
|
* - It has been decided not to have hardcoded values so the low
|
* and high cores in a core split will be evently distributed.
|
* - Odd combinations of core requirements have been filtered out
|
* and do not get to this function (e.g. CS+T+NSS is not
|
* supported here).
|
* - This function is frequently called and can be optimized,
|
* (see notes in loops), but as the functionallity will likely
|
* be modified, optimization has not been addressed.
|
*/
|
bool kbase_js_choose_affinity(u64 * const affinity,
|
struct kbase_device *kbdev,
|
struct kbase_jd_atom *katom, int js)
|
{
|
base_jd_core_req core_req = katom->core_req;
|
unsigned int num_core_groups = kbdev->gpu_props.num_core_groups;
|
u64 core_availability_mask;
|
|
lockdep_assert_held(&kbdev->hwaccess_lock);
|
|
core_availability_mask = kbase_pm_ca_get_core_mask(kbdev);
|
|
/*
|
* If no cores are currently available (core availability policy is
|
* transitioning) then fail.
|
*/
|
if (0 == core_availability_mask) {
|
*affinity = 0;
|
return false;
|
}
|
|
KBASE_DEBUG_ASSERT(js >= 0);
|
|
if ((core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) ==
|
BASE_JD_REQ_T) {
|
/* If the hardware supports XAFFINITY then we'll only enable
|
* the tiler (which is the default so this is a no-op),
|
* otherwise enable shader core 0. */
|
if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY))
|
*affinity = 1;
|
else
|
*affinity = 0;
|
|
return true;
|
}
|
|
if (1 == kbdev->gpu_props.num_cores) {
|
/* trivial case only one core, nothing to do */
|
*affinity = core_availability_mask &
|
kbdev->pm.debug_core_mask[js];
|
} else {
|
if ((core_req & (BASE_JD_REQ_COHERENT_GROUP |
|
BASE_JD_REQ_SPECIFIC_COHERENT_GROUP))) {
|
if (js == 0 || num_core_groups == 1) {
|
/* js[0] and single-core-group systems just get
|
* the first core group */
|
*affinity =
|
kbdev->gpu_props.props.coherency_info.group[0].core_mask
|
& core_availability_mask &
|
kbdev->pm.debug_core_mask[js];
|
} else {
|
/* js[1], js[2] use core groups 0, 1 for
|
* dual-core-group systems */
|
u32 core_group_idx = ((u32) js) - 1;
|
|
KBASE_DEBUG_ASSERT(core_group_idx <
|
num_core_groups);
|
*affinity =
|
kbdev->gpu_props.props.coherency_info.group[core_group_idx].core_mask
|
& core_availability_mask &
|
kbdev->pm.debug_core_mask[js];
|
|
/* If the job is specifically targeting core
|
* group 1 and the core availability policy is
|
* keeping that core group off, then fail */
|
if (*affinity == 0 && core_group_idx == 1 &&
|
kbdev->pm.backend.cg1_disabled
|
== true)
|
katom->event_code =
|
BASE_JD_EVENT_PM_EVENT;
|
}
|
} else {
|
/* All cores are available when no core split is
|
* required */
|
*affinity = core_availability_mask &
|
kbdev->pm.debug_core_mask[js];
|
}
|
}
|
|
/*
|
* If no cores are currently available in the desired core group(s)
|
* (core availability policy is transitioning) then fail.
|
*/
|
if (*affinity == 0)
|
return false;
|
|
/* Enable core 0 if tiler required for hardware without XAFFINITY
|
* support (notes above) */
|
if (core_req & BASE_JD_REQ_T) {
|
if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY))
|
*affinity = *affinity | 1;
|
}
|
|
return true;
|
}
|
|
static inline bool kbase_js_affinity_is_violating(
|
struct kbase_device *kbdev,
|
u64 *affinities)
|
{
|
/* This implementation checks whether the two slots involved in Generic
|
* thread creation have intersecting affinity. This is due to micro-
|
* architectural issues where a job in slot A targetting cores used by
|
* slot B could prevent the job in slot B from making progress until the
|
* job in slot A has completed.
|
*/
|
u64 affinity_set_left;
|
u64 affinity_set_right;
|
u64 intersection;
|
|
KBASE_DEBUG_ASSERT(affinities != NULL);
|
|
affinity_set_left = affinities[1];
|
|
affinity_set_right = affinities[2];
|
|
/* A violation occurs when any bit in the left_set is also in the
|
* right_set */
|
intersection = affinity_set_left & affinity_set_right;
|
|
return (bool) (intersection != (u64) 0u);
|
}
|
|
bool kbase_js_affinity_would_violate(struct kbase_device *kbdev, int js,
|
u64 affinity)
|
{
|
struct kbasep_js_device_data *js_devdata;
|
u64 new_affinities[BASE_JM_MAX_NR_SLOTS];
|
|
KBASE_DEBUG_ASSERT(kbdev != NULL);
|
KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS);
|
js_devdata = &kbdev->js_data;
|
|
memcpy(new_affinities, js_devdata->runpool_irq.slot_affinities,
|
sizeof(js_devdata->runpool_irq.slot_affinities));
|
|
new_affinities[js] |= affinity;
|
|
return kbase_js_affinity_is_violating(kbdev, new_affinities);
|
}
|
|
void kbase_js_affinity_retain_slot_cores(struct kbase_device *kbdev, int js,
|
u64 affinity)
|
{
|
struct kbasep_js_device_data *js_devdata;
|
u64 cores;
|
|
KBASE_DEBUG_ASSERT(kbdev != NULL);
|
KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS);
|
js_devdata = &kbdev->js_data;
|
|
KBASE_DEBUG_ASSERT(kbase_js_affinity_would_violate(kbdev, js, affinity)
|
== false);
|
|
cores = affinity;
|
while (cores) {
|
int bitnum = fls64(cores) - 1;
|
u64 bit = 1ULL << bitnum;
|
s8 cnt;
|
|
cnt =
|
++(js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum]);
|
|
if (cnt == 1)
|
js_devdata->runpool_irq.slot_affinities[js] |= bit;
|
|
cores &= ~bit;
|
}
|
}
|
|
void kbase_js_affinity_release_slot_cores(struct kbase_device *kbdev, int js,
|
u64 affinity)
|
{
|
struct kbasep_js_device_data *js_devdata;
|
u64 cores;
|
|
KBASE_DEBUG_ASSERT(kbdev != NULL);
|
KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS);
|
js_devdata = &kbdev->js_data;
|
|
cores = affinity;
|
while (cores) {
|
int bitnum = fls64(cores) - 1;
|
u64 bit = 1ULL << bitnum;
|
s8 cnt;
|
|
KBASE_DEBUG_ASSERT(
|
js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum] > 0);
|
|
cnt =
|
--(js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum]);
|
|
if (0 == cnt)
|
js_devdata->runpool_irq.slot_affinities[js] &= ~bit;
|
|
cores &= ~bit;
|
}
|
}
|
|
#if KBASE_TRACE_ENABLE
|
void kbase_js_debug_log_current_affinities(struct kbase_device *kbdev)
|
{
|
struct kbasep_js_device_data *js_devdata;
|
int slot_nr;
|
|
KBASE_DEBUG_ASSERT(kbdev != NULL);
|
js_devdata = &kbdev->js_data;
|
|
for (slot_nr = 0; slot_nr < 3; ++slot_nr)
|
KBASE_TRACE_ADD_SLOT_INFO(kbdev, JS_AFFINITY_CURRENT, NULL,
|
NULL, 0u, slot_nr,
|
(u32) js_devdata->runpool_irq.slot_affinities[slot_nr]);
|
}
|
#endif /* KBASE_TRACE_ENABLE */
|