// SPDX-License-Identifier: MIT
|
/*
|
* Copyright © 2019 Intel Corporation
|
*/
|
|
#include <linux/sort.h>
|
|
#include "gt/intel_gt.h"
|
#include "gt/intel_engine_user.h"
|
|
#include "i915_selftest.h"
|
|
#include "gem/i915_gem_context.h"
|
#include "selftests/igt_flush_test.h"
|
#include "selftests/i915_random.h"
|
#include "selftests/mock_drm.h"
|
#include "huge_gem_object.h"
|
#include "mock_context.h"
|
|
static int wrap_ktime_compare(const void *A, const void *B)
|
{
|
const ktime_t *a = A, *b = B;
|
|
return ktime_compare(*a, *b);
|
}
|
|
static int __perf_fill_blt(struct drm_i915_gem_object *obj)
|
{
|
struct drm_i915_private *i915 = to_i915(obj->base.dev);
|
int inst = 0;
|
|
do {
|
struct intel_engine_cs *engine;
|
ktime_t t[5];
|
int pass;
|
int err;
|
|
engine = intel_engine_lookup_user(i915,
|
I915_ENGINE_CLASS_COPY,
|
inst++);
|
if (!engine)
|
return 0;
|
|
intel_engine_pm_get(engine);
|
for (pass = 0; pass < ARRAY_SIZE(t); pass++) {
|
struct intel_context *ce = engine->kernel_context;
|
ktime_t t0, t1;
|
|
t0 = ktime_get();
|
|
err = i915_gem_object_fill_blt(obj, ce, 0);
|
if (err)
|
break;
|
|
err = i915_gem_object_wait(obj,
|
I915_WAIT_ALL,
|
MAX_SCHEDULE_TIMEOUT);
|
if (err)
|
break;
|
|
t1 = ktime_get();
|
t[pass] = ktime_sub(t1, t0);
|
}
|
intel_engine_pm_put(engine);
|
if (err)
|
return err;
|
|
sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL);
|
pr_info("%s: blt %zd KiB fill: %lld MiB/s\n",
|
engine->name,
|
obj->base.size >> 10,
|
div64_u64(mul_u32_u32(4 * obj->base.size,
|
1000 * 1000 * 1000),
|
t[1] + 2 * t[2] + t[3]) >> 20);
|
} while (1);
|
}
|
|
static int perf_fill_blt(void *arg)
|
{
|
struct drm_i915_private *i915 = arg;
|
static const unsigned long sizes[] = {
|
SZ_4K,
|
SZ_64K,
|
SZ_2M,
|
SZ_64M
|
};
|
int i;
|
|
for (i = 0; i < ARRAY_SIZE(sizes); i++) {
|
struct drm_i915_gem_object *obj;
|
int err;
|
|
obj = i915_gem_object_create_internal(i915, sizes[i]);
|
if (IS_ERR(obj))
|
return PTR_ERR(obj);
|
|
err = __perf_fill_blt(obj);
|
i915_gem_object_put(obj);
|
if (err)
|
return err;
|
}
|
|
return 0;
|
}
|
|
static int __perf_copy_blt(struct drm_i915_gem_object *src,
|
struct drm_i915_gem_object *dst)
|
{
|
struct drm_i915_private *i915 = to_i915(src->base.dev);
|
int inst = 0;
|
|
do {
|
struct intel_engine_cs *engine;
|
ktime_t t[5];
|
int pass;
|
int err = 0;
|
|
engine = intel_engine_lookup_user(i915,
|
I915_ENGINE_CLASS_COPY,
|
inst++);
|
if (!engine)
|
return 0;
|
|
intel_engine_pm_get(engine);
|
for (pass = 0; pass < ARRAY_SIZE(t); pass++) {
|
struct intel_context *ce = engine->kernel_context;
|
ktime_t t0, t1;
|
|
t0 = ktime_get();
|
|
err = i915_gem_object_copy_blt(src, dst, ce);
|
if (err)
|
break;
|
|
err = i915_gem_object_wait(dst,
|
I915_WAIT_ALL,
|
MAX_SCHEDULE_TIMEOUT);
|
if (err)
|
break;
|
|
t1 = ktime_get();
|
t[pass] = ktime_sub(t1, t0);
|
}
|
intel_engine_pm_put(engine);
|
if (err)
|
return err;
|
|
sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL);
|
pr_info("%s: blt %zd KiB copy: %lld MiB/s\n",
|
engine->name,
|
src->base.size >> 10,
|
div64_u64(mul_u32_u32(4 * src->base.size,
|
1000 * 1000 * 1000),
|
t[1] + 2 * t[2] + t[3]) >> 20);
|
} while (1);
|
}
|
|
static int perf_copy_blt(void *arg)
|
{
|
struct drm_i915_private *i915 = arg;
|
static const unsigned long sizes[] = {
|
SZ_4K,
|
SZ_64K,
|
SZ_2M,
|
SZ_64M
|
};
|
int i;
|
|
for (i = 0; i < ARRAY_SIZE(sizes); i++) {
|
struct drm_i915_gem_object *src, *dst;
|
int err;
|
|
src = i915_gem_object_create_internal(i915, sizes[i]);
|
if (IS_ERR(src))
|
return PTR_ERR(src);
|
|
dst = i915_gem_object_create_internal(i915, sizes[i]);
|
if (IS_ERR(dst)) {
|
err = PTR_ERR(dst);
|
goto err_src;
|
}
|
|
err = __perf_copy_blt(src, dst);
|
|
i915_gem_object_put(dst);
|
err_src:
|
i915_gem_object_put(src);
|
if (err)
|
return err;
|
}
|
|
return 0;
|
}
|
|
struct igt_thread_arg {
|
struct intel_engine_cs *engine;
|
struct i915_gem_context *ctx;
|
struct file *file;
|
struct rnd_state prng;
|
unsigned int n_cpus;
|
};
|
|
static int igt_fill_blt_thread(void *arg)
|
{
|
struct igt_thread_arg *thread = arg;
|
struct intel_engine_cs *engine = thread->engine;
|
struct rnd_state *prng = &thread->prng;
|
struct drm_i915_gem_object *obj;
|
struct i915_gem_context *ctx;
|
struct intel_context *ce;
|
unsigned int prio;
|
IGT_TIMEOUT(end);
|
u64 total, max;
|
int err;
|
|
ctx = thread->ctx;
|
if (!ctx) {
|
ctx = live_context_for_engine(engine, thread->file);
|
if (IS_ERR(ctx))
|
return PTR_ERR(ctx);
|
|
prio = i915_prandom_u32_max_state(I915_PRIORITY_MAX, prng);
|
ctx->sched.priority = I915_USER_PRIORITY(prio);
|
}
|
|
ce = i915_gem_context_get_engine(ctx, 0);
|
GEM_BUG_ON(IS_ERR(ce));
|
|
/*
|
* If we have a tiny shared address space, like for the GGTT
|
* then we can't be too greedy.
|
*/
|
max = ce->vm->total;
|
if (i915_is_ggtt(ce->vm) || thread->ctx)
|
max = div_u64(max, thread->n_cpus);
|
max >>= 4;
|
|
total = PAGE_SIZE;
|
do {
|
/* Aim to keep the runtime under reasonable bounds! */
|
const u32 max_phys_size = SZ_64K;
|
u32 val = prandom_u32_state(prng);
|
u32 phys_sz;
|
u32 sz;
|
u32 *vaddr;
|
u32 i;
|
|
total = min(total, max);
|
sz = i915_prandom_u32_max_state(total, prng) + 1;
|
phys_sz = sz % max_phys_size + 1;
|
|
sz = round_up(sz, PAGE_SIZE);
|
phys_sz = round_up(phys_sz, PAGE_SIZE);
|
phys_sz = min(phys_sz, sz);
|
|
pr_debug("%s with phys_sz= %x, sz=%x, val=%x\n", __func__,
|
phys_sz, sz, val);
|
|
obj = huge_gem_object(engine->i915, phys_sz, sz);
|
if (IS_ERR(obj)) {
|
err = PTR_ERR(obj);
|
goto err_flush;
|
}
|
|
vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
|
if (IS_ERR(vaddr)) {
|
err = PTR_ERR(vaddr);
|
goto err_put;
|
}
|
|
/*
|
* Make sure the potentially async clflush does its job, if
|
* required.
|
*/
|
memset32(vaddr, val ^ 0xdeadbeaf,
|
huge_gem_object_phys_size(obj) / sizeof(u32));
|
|
if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
|
obj->cache_dirty = true;
|
|
err = i915_gem_object_fill_blt(obj, ce, val);
|
if (err)
|
goto err_unpin;
|
|
err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT);
|
if (err)
|
goto err_unpin;
|
|
for (i = 0; i < huge_gem_object_phys_size(obj) / sizeof(u32); i += 17) {
|
if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
|
drm_clflush_virt_range(&vaddr[i], sizeof(vaddr[i]));
|
|
if (vaddr[i] != val) {
|
pr_err("vaddr[%u]=%x, expected=%x\n", i,
|
vaddr[i], val);
|
err = -EINVAL;
|
goto err_unpin;
|
}
|
}
|
|
i915_gem_object_unpin_map(obj);
|
i915_gem_object_put(obj);
|
|
total <<= 1;
|
} while (!time_after(jiffies, end));
|
|
goto err_flush;
|
|
err_unpin:
|
i915_gem_object_unpin_map(obj);
|
err_put:
|
i915_gem_object_put(obj);
|
err_flush:
|
if (err == -ENOMEM)
|
err = 0;
|
|
intel_context_put(ce);
|
return err;
|
}
|
|
static int igt_copy_blt_thread(void *arg)
|
{
|
struct igt_thread_arg *thread = arg;
|
struct intel_engine_cs *engine = thread->engine;
|
struct rnd_state *prng = &thread->prng;
|
struct drm_i915_gem_object *src, *dst;
|
struct i915_gem_context *ctx;
|
struct intel_context *ce;
|
unsigned int prio;
|
IGT_TIMEOUT(end);
|
u64 total, max;
|
int err;
|
|
ctx = thread->ctx;
|
if (!ctx) {
|
ctx = live_context_for_engine(engine, thread->file);
|
if (IS_ERR(ctx))
|
return PTR_ERR(ctx);
|
|
prio = i915_prandom_u32_max_state(I915_PRIORITY_MAX, prng);
|
ctx->sched.priority = I915_USER_PRIORITY(prio);
|
}
|
|
ce = i915_gem_context_get_engine(ctx, 0);
|
GEM_BUG_ON(IS_ERR(ce));
|
|
/*
|
* If we have a tiny shared address space, like for the GGTT
|
* then we can't be too greedy.
|
*/
|
max = ce->vm->total;
|
if (i915_is_ggtt(ce->vm) || thread->ctx)
|
max = div_u64(max, thread->n_cpus);
|
max >>= 4;
|
|
total = PAGE_SIZE;
|
do {
|
/* Aim to keep the runtime under reasonable bounds! */
|
const u32 max_phys_size = SZ_64K;
|
u32 val = prandom_u32_state(prng);
|
u32 phys_sz;
|
u32 sz;
|
u32 *vaddr;
|
u32 i;
|
|
total = min(total, max);
|
sz = i915_prandom_u32_max_state(total, prng) + 1;
|
phys_sz = sz % max_phys_size + 1;
|
|
sz = round_up(sz, PAGE_SIZE);
|
phys_sz = round_up(phys_sz, PAGE_SIZE);
|
phys_sz = min(phys_sz, sz);
|
|
pr_debug("%s with phys_sz= %x, sz=%x, val=%x\n", __func__,
|
phys_sz, sz, val);
|
|
src = huge_gem_object(engine->i915, phys_sz, sz);
|
if (IS_ERR(src)) {
|
err = PTR_ERR(src);
|
goto err_flush;
|
}
|
|
vaddr = i915_gem_object_pin_map(src, I915_MAP_WB);
|
if (IS_ERR(vaddr)) {
|
err = PTR_ERR(vaddr);
|
goto err_put_src;
|
}
|
|
memset32(vaddr, val,
|
huge_gem_object_phys_size(src) / sizeof(u32));
|
|
i915_gem_object_unpin_map(src);
|
|
if (!(src->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
|
src->cache_dirty = true;
|
|
dst = huge_gem_object(engine->i915, phys_sz, sz);
|
if (IS_ERR(dst)) {
|
err = PTR_ERR(dst);
|
goto err_put_src;
|
}
|
|
vaddr = i915_gem_object_pin_map(dst, I915_MAP_WB);
|
if (IS_ERR(vaddr)) {
|
err = PTR_ERR(vaddr);
|
goto err_put_dst;
|
}
|
|
memset32(vaddr, val ^ 0xdeadbeaf,
|
huge_gem_object_phys_size(dst) / sizeof(u32));
|
|
if (!(dst->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
|
dst->cache_dirty = true;
|
|
err = i915_gem_object_copy_blt(src, dst, ce);
|
if (err)
|
goto err_unpin;
|
|
err = i915_gem_object_wait(dst, 0, MAX_SCHEDULE_TIMEOUT);
|
if (err)
|
goto err_unpin;
|
|
for (i = 0; i < huge_gem_object_phys_size(dst) / sizeof(u32); i += 17) {
|
if (!(dst->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
|
drm_clflush_virt_range(&vaddr[i], sizeof(vaddr[i]));
|
|
if (vaddr[i] != val) {
|
pr_err("vaddr[%u]=%x, expected=%x\n", i,
|
vaddr[i], val);
|
err = -EINVAL;
|
goto err_unpin;
|
}
|
}
|
|
i915_gem_object_unpin_map(dst);
|
|
i915_gem_object_put(src);
|
i915_gem_object_put(dst);
|
|
total <<= 1;
|
} while (!time_after(jiffies, end));
|
|
goto err_flush;
|
|
err_unpin:
|
i915_gem_object_unpin_map(dst);
|
err_put_dst:
|
i915_gem_object_put(dst);
|
err_put_src:
|
i915_gem_object_put(src);
|
err_flush:
|
if (err == -ENOMEM)
|
err = 0;
|
|
intel_context_put(ce);
|
return err;
|
}
|
|
static int igt_threaded_blt(struct intel_engine_cs *engine,
|
int (*blt_fn)(void *arg),
|
unsigned int flags)
|
#define SINGLE_CTX BIT(0)
|
{
|
struct igt_thread_arg *thread;
|
struct task_struct **tsk;
|
unsigned int n_cpus, i;
|
I915_RND_STATE(prng);
|
int err = 0;
|
|
n_cpus = num_online_cpus() + 1;
|
|
tsk = kcalloc(n_cpus, sizeof(struct task_struct *), GFP_KERNEL);
|
if (!tsk)
|
return 0;
|
|
thread = kcalloc(n_cpus, sizeof(struct igt_thread_arg), GFP_KERNEL);
|
if (!thread)
|
goto out_tsk;
|
|
thread[0].file = mock_file(engine->i915);
|
if (IS_ERR(thread[0].file)) {
|
err = PTR_ERR(thread[0].file);
|
goto out_thread;
|
}
|
|
if (flags & SINGLE_CTX) {
|
thread[0].ctx = live_context_for_engine(engine, thread[0].file);
|
if (IS_ERR(thread[0].ctx)) {
|
err = PTR_ERR(thread[0].ctx);
|
goto out_file;
|
}
|
}
|
|
for (i = 0; i < n_cpus; ++i) {
|
thread[i].engine = engine;
|
thread[i].file = thread[0].file;
|
thread[i].ctx = thread[0].ctx;
|
thread[i].n_cpus = n_cpus;
|
thread[i].prng =
|
I915_RND_STATE_INITIALIZER(prandom_u32_state(&prng));
|
|
tsk[i] = kthread_run(blt_fn, &thread[i], "igt/blt-%d", i);
|
if (IS_ERR(tsk[i])) {
|
err = PTR_ERR(tsk[i]);
|
break;
|
}
|
|
get_task_struct(tsk[i]);
|
}
|
|
yield(); /* start all threads before we kthread_stop() */
|
|
for (i = 0; i < n_cpus; ++i) {
|
int status;
|
|
if (IS_ERR_OR_NULL(tsk[i]))
|
continue;
|
|
status = kthread_stop(tsk[i]);
|
if (status && !err)
|
err = status;
|
|
put_task_struct(tsk[i]);
|
}
|
|
out_file:
|
fput(thread[0].file);
|
out_thread:
|
kfree(thread);
|
out_tsk:
|
kfree(tsk);
|
return err;
|
}
|
|
static int test_copy_engines(struct drm_i915_private *i915,
|
int (*fn)(void *arg),
|
unsigned int flags)
|
{
|
struct intel_engine_cs *engine;
|
int ret;
|
|
for_each_uabi_class_engine(engine, I915_ENGINE_CLASS_COPY, i915) {
|
ret = igt_threaded_blt(engine, fn, flags);
|
if (ret)
|
return ret;
|
}
|
|
return 0;
|
}
|
|
static int igt_fill_blt(void *arg)
|
{
|
return test_copy_engines(arg, igt_fill_blt_thread, 0);
|
}
|
|
static int igt_fill_blt_ctx0(void *arg)
|
{
|
return test_copy_engines(arg, igt_fill_blt_thread, SINGLE_CTX);
|
}
|
|
static int igt_copy_blt(void *arg)
|
{
|
return test_copy_engines(arg, igt_copy_blt_thread, 0);
|
}
|
|
static int igt_copy_blt_ctx0(void *arg)
|
{
|
return test_copy_engines(arg, igt_copy_blt_thread, SINGLE_CTX);
|
}
|
|
int i915_gem_object_blt_live_selftests(struct drm_i915_private *i915)
|
{
|
static const struct i915_subtest tests[] = {
|
SUBTEST(igt_fill_blt),
|
SUBTEST(igt_fill_blt_ctx0),
|
SUBTEST(igt_copy_blt),
|
SUBTEST(igt_copy_blt_ctx0),
|
};
|
|
if (intel_gt_is_wedged(&i915->gt))
|
return 0;
|
|
return i915_live_subtests(tests, i915);
|
}
|
|
int i915_gem_object_blt_perf_selftests(struct drm_i915_private *i915)
|
{
|
static const struct i915_subtest tests[] = {
|
SUBTEST(perf_fill_blt),
|
SUBTEST(perf_copy_blt),
|
};
|
|
if (intel_gt_is_wedged(&i915->gt))
|
return 0;
|
|
return i915_live_subtests(tests, i915);
|
}
|