/*
|
* Copyright 2016 Red Hat.
|
*
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
* copy of this software and associated documentation files (the "Software"),
|
* to deal in the Software without restriction, including without limitation
|
* on the rights to use, copy, modify, merge, publish, distribute, sub
|
* license, and/or sell copies of the Software, and to permit persons to whom
|
* the Software is furnished to do so, subject to the following conditions:
|
*
|
* The above copyright notice and this permission notice (including the next
|
* paragraph) shall be included in all copies or substantial portions of the
|
* Software.
|
*
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
|
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
* USE OR OTHER DEALINGS IN THE SOFTWARE.
|
*/
|
#include "util/u_inlines.h"
|
#include "util/u_math.h"
|
#include "util/u_memory.h"
|
#include "util/u_pstipple.h"
|
#include "pipe/p_shader_tokens.h"
|
#include "draw/draw_context.h"
|
#include "draw/draw_vertex.h"
|
#include "sp_context.h"
|
#include "sp_screen.h"
|
#include "sp_state.h"
|
#include "sp_texture.h"
|
#include "sp_tex_sample.h"
|
#include "sp_tex_tile_cache.h"
|
#include "tgsi/tgsi_parse.h"
|
|
static void
|
cs_prepare(const struct sp_compute_shader *cs,
|
struct tgsi_exec_machine *machine,
|
int w, int h, int d,
|
int g_w, int g_h, int g_d,
|
int b_w, int b_h, int b_d,
|
struct tgsi_sampler *sampler,
|
struct tgsi_image *image,
|
struct tgsi_buffer *buffer )
|
{
|
int j;
|
/*
|
* Bind tokens/shader to the interpreter's machine state.
|
*/
|
tgsi_exec_machine_bind_shader(machine,
|
cs->tokens,
|
sampler, image, buffer);
|
|
if (machine->SysSemanticToIndex[TGSI_SEMANTIC_THREAD_ID] != -1) {
|
unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_THREAD_ID];
|
for (j = 0; j < TGSI_QUAD_SIZE; j++) {
|
machine->SystemValue[i].xyzw[0].i[j] = w;
|
machine->SystemValue[i].xyzw[1].i[j] = h;
|
machine->SystemValue[i].xyzw[2].i[j] = d;
|
}
|
}
|
|
if (machine->SysSemanticToIndex[TGSI_SEMANTIC_GRID_SIZE] != -1) {
|
unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_GRID_SIZE];
|
for (j = 0; j < TGSI_QUAD_SIZE; j++) {
|
machine->SystemValue[i].xyzw[0].i[j] = g_w;
|
machine->SystemValue[i].xyzw[1].i[j] = g_h;
|
machine->SystemValue[i].xyzw[2].i[j] = g_d;
|
}
|
}
|
|
if (machine->SysSemanticToIndex[TGSI_SEMANTIC_BLOCK_SIZE] != -1) {
|
unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_BLOCK_SIZE];
|
for (j = 0; j < TGSI_QUAD_SIZE; j++) {
|
machine->SystemValue[i].xyzw[0].i[j] = b_w;
|
machine->SystemValue[i].xyzw[1].i[j] = b_h;
|
machine->SystemValue[i].xyzw[2].i[j] = b_d;
|
}
|
}
|
}
|
|
static bool
|
cs_run(const struct sp_compute_shader *cs,
|
int g_w, int g_h, int g_d,
|
struct tgsi_exec_machine *machine, bool restart)
|
{
|
if (!restart) {
|
if (machine->SysSemanticToIndex[TGSI_SEMANTIC_BLOCK_ID] != -1) {
|
unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_BLOCK_ID];
|
int j;
|
for (j = 0; j < TGSI_QUAD_SIZE; j++) {
|
machine->SystemValue[i].xyzw[0].i[j] = g_w;
|
machine->SystemValue[i].xyzw[1].i[j] = g_h;
|
machine->SystemValue[i].xyzw[2].i[j] = g_d;
|
}
|
}
|
machine->NonHelperMask = (1 << 1) - 1;
|
}
|
|
tgsi_exec_machine_run(machine, restart ? machine->pc : 0);
|
|
if (machine->pc != -1)
|
return true;
|
return false;
|
}
|
|
static void
|
run_workgroup(const struct sp_compute_shader *cs,
|
int g_w, int g_h, int g_d, int num_threads,
|
struct tgsi_exec_machine **machines)
|
{
|
int i;
|
bool grp_hit_barrier, restart_threads = false;
|
|
do {
|
grp_hit_barrier = false;
|
for (i = 0; i < num_threads; i++) {
|
grp_hit_barrier |= cs_run(cs, g_w, g_h, g_d, machines[i], restart_threads);
|
}
|
restart_threads = false;
|
if (grp_hit_barrier) {
|
grp_hit_barrier = false;
|
restart_threads = true;
|
}
|
} while (restart_threads);
|
}
|
|
static void
|
cs_delete(const struct sp_compute_shader *cs,
|
struct tgsi_exec_machine *machine)
|
{
|
if (machine->Tokens == cs->tokens) {
|
tgsi_exec_machine_bind_shader(machine, NULL, NULL, NULL, NULL);
|
}
|
}
|
|
static void
|
fill_grid_size(struct pipe_context *context,
|
const struct pipe_grid_info *info,
|
uint32_t grid_size[3])
|
{
|
struct pipe_transfer *transfer;
|
uint32_t *params;
|
if (!info->indirect) {
|
grid_size[0] = info->grid[0];
|
grid_size[1] = info->grid[1];
|
grid_size[2] = info->grid[2];
|
return;
|
}
|
params = pipe_buffer_map_range(context, info->indirect,
|
info->indirect_offset,
|
3 * sizeof(uint32_t),
|
PIPE_TRANSFER_READ,
|
&transfer);
|
|
if (!transfer)
|
return;
|
|
grid_size[0] = params[0];
|
grid_size[1] = params[1];
|
grid_size[2] = params[2];
|
pipe_buffer_unmap(context, transfer);
|
}
|
|
void
|
softpipe_launch_grid(struct pipe_context *context,
|
const struct pipe_grid_info *info)
|
{
|
struct softpipe_context *softpipe = softpipe_context(context);
|
struct sp_compute_shader *cs = softpipe->cs;
|
int num_threads_in_group;
|
struct tgsi_exec_machine **machines;
|
int bwidth, bheight, bdepth;
|
int w, h, d, i;
|
int g_w, g_h, g_d;
|
uint32_t grid_size[3] = {0};
|
void *local_mem = NULL;
|
|
softpipe_update_compute_samplers(softpipe);
|
bwidth = cs->info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH];
|
bheight = cs->info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT];
|
bdepth = cs->info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH];
|
num_threads_in_group = bwidth * bheight * bdepth;
|
|
fill_grid_size(context, info, grid_size);
|
|
if (cs->shader.req_local_mem) {
|
local_mem = CALLOC(1, cs->shader.req_local_mem);
|
}
|
|
machines = CALLOC(sizeof(struct tgsi_exec_machine *), num_threads_in_group);
|
if (!machines) {
|
FREE(local_mem);
|
return;
|
}
|
|
/* initialise machines + GRID_SIZE + THREAD_ID + BLOCK_SIZE */
|
for (d = 0; d < bdepth; d++) {
|
for (h = 0; h < bheight; h++) {
|
for (w = 0; w < bwidth; w++) {
|
int idx = w + (h * bwidth) + (d * bheight * bwidth);
|
machines[idx] = tgsi_exec_machine_create(PIPE_SHADER_COMPUTE);
|
|
machines[idx]->LocalMem = local_mem;
|
machines[idx]->LocalMemSize = cs->shader.req_local_mem;
|
cs_prepare(cs, machines[idx],
|
w, h, d,
|
grid_size[0], grid_size[1], grid_size[2],
|
bwidth, bheight, bdepth,
|
(struct tgsi_sampler *)softpipe->tgsi.sampler[PIPE_SHADER_COMPUTE],
|
(struct tgsi_image *)softpipe->tgsi.image[PIPE_SHADER_COMPUTE],
|
(struct tgsi_buffer *)softpipe->tgsi.buffer[PIPE_SHADER_COMPUTE]);
|
tgsi_exec_set_constant_buffers(machines[idx], PIPE_MAX_CONSTANT_BUFFERS,
|
softpipe->mapped_constants[PIPE_SHADER_COMPUTE],
|
softpipe->const_buffer_size[PIPE_SHADER_COMPUTE]);
|
}
|
}
|
}
|
|
for (g_d = 0; g_d < grid_size[2]; g_d++) {
|
for (g_h = 0; g_h < grid_size[1]; g_h++) {
|
for (g_w = 0; g_w < grid_size[0]; g_w++) {
|
run_workgroup(cs, g_w, g_h, g_d, num_threads_in_group, machines);
|
}
|
}
|
}
|
|
for (i = 0; i < num_threads_in_group; i++) {
|
cs_delete(cs, machines[i]);
|
tgsi_exec_machine_destroy(machines[i]);
|
}
|
|
FREE(local_mem);
|
FREE(machines);
|
}
|