/**************************************************************************
|
*
|
* Copyright 2009 VMware, Inc.
|
* Copyright 2007-2008 VMware, Inc.
|
* All Rights Reserved.
|
*
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
* copy of this software and associated documentation files (the
|
* "Software"), to deal in the Software without restriction, including
|
* without limitation the rights to use, copy, modify, merge, publish,
|
* distribute, sub license, and/or sell copies of the Software, and to
|
* permit persons to whom the Software is furnished to do so, subject to
|
* the following conditions:
|
*
|
* The above copyright notice and this permission notice (including the
|
* next paragraph) shall be included in all copies or substantial portions
|
* of the Software.
|
*
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
|
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
|
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
*
|
**************************************************************************/
|
|
/**
|
* @file
|
* TGSI to LLVM IR translation -- SoA.
|
*
|
* @author Jose Fonseca <jfonseca@vmware.com>
|
*
|
* Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
|
* Brian Paul, and others.
|
*/
|
|
#include "pipe/p_config.h"
|
#include "pipe/p_shader_tokens.h"
|
#include "util/u_debug.h"
|
#include "util/u_math.h"
|
#include "util/u_memory.h"
|
#include "tgsi/tgsi_dump.h"
|
#include "tgsi/tgsi_exec.h"
|
#include "tgsi/tgsi_info.h"
|
#include "tgsi/tgsi_parse.h"
|
#include "tgsi/tgsi_util.h"
|
#include "tgsi/tgsi_scan.h"
|
#include "tgsi/tgsi_strings.h"
|
#include "lp_bld_tgsi_action.h"
|
#include "lp_bld_type.h"
|
#include "lp_bld_const.h"
|
#include "lp_bld_arit.h"
|
#include "lp_bld_bitarit.h"
|
#include "lp_bld_gather.h"
|
#include "lp_bld_init.h"
|
#include "lp_bld_logic.h"
|
#include "lp_bld_swizzle.h"
|
#include "lp_bld_flow.h"
|
#include "lp_bld_quad.h"
|
#include "lp_bld_tgsi.h"
|
#include "lp_bld_limits.h"
|
#include "lp_bld_debug.h"
|
#include "lp_bld_printf.h"
|
#include "lp_bld_sample.h"
|
#include "lp_bld_struct.h"
|
|
/* SM 4.0 says that subroutines can nest 32 deep and
|
* we need one more for our main function */
|
#define LP_MAX_NUM_FUNCS 33
|
|
#define DUMP_GS_EMITS 0
|
|
/*
|
* If non-zero, the generated LLVM IR will print intermediate results on every TGSI
|
* instruction.
|
*
|
* TODO:
|
* - take execution masks in consideration
|
* - debug control-flow instructions
|
*/
|
#define DEBUG_EXECUTION 0
|
|
|
/*
|
* Emit code to print a register value.
|
*/
|
static void
|
emit_dump_reg(struct gallivm_state *gallivm,
|
unsigned file,
|
unsigned index,
|
unsigned chan,
|
LLVMValueRef value)
|
{
|
char buf[32];
|
|
util_snprintf(buf, sizeof buf, " %s[%u].%c = ",
|
tgsi_file_name(file),
|
index, "xyzw"[chan]);
|
|
lp_build_print_value(gallivm, buf, value);
|
}
|
|
/*
|
* Return the context for the current function.
|
* (always 'main', if shader doesn't do any function calls)
|
*/
|
static inline struct function_ctx *
|
func_ctx(struct lp_exec_mask *mask)
|
{
|
assert(mask->function_stack_size > 0);
|
assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS);
|
return &mask->function_stack[mask->function_stack_size - 1];
|
}
|
|
/*
|
* Returns true if we're in a loop.
|
* It's global, meaning that it returns true even if there's
|
* no loop inside the current function, but we were inside
|
* a loop inside another function, from which this one was called.
|
*/
|
static inline boolean
|
mask_has_loop(struct lp_exec_mask *mask)
|
{
|
int i;
|
for (i = mask->function_stack_size - 1; i >= 0; --i) {
|
const struct function_ctx *ctx = &mask->function_stack[i];
|
if (ctx->loop_stack_size > 0)
|
return TRUE;
|
}
|
return FALSE;
|
}
|
|
/*
|
* Returns true if we're inside a switch statement.
|
* It's global, meaning that it returns true even if there's
|
* no switch in the current function, but we were inside
|
* a switch inside another function, from which this one was called.
|
*/
|
static inline boolean
|
mask_has_switch(struct lp_exec_mask *mask)
|
{
|
int i;
|
for (i = mask->function_stack_size - 1; i >= 0; --i) {
|
const struct function_ctx *ctx = &mask->function_stack[i];
|
if (ctx->switch_stack_size > 0)
|
return TRUE;
|
}
|
return FALSE;
|
}
|
|
/*
|
* Returns true if we're inside a conditional.
|
* It's global, meaning that it returns true even if there's
|
* no conditional in the current function, but we were inside
|
* a conditional inside another function, from which this one was called.
|
*/
|
static inline boolean
|
mask_has_cond(struct lp_exec_mask *mask)
|
{
|
int i;
|
for (i = mask->function_stack_size - 1; i >= 0; --i) {
|
const struct function_ctx *ctx = &mask->function_stack[i];
|
if (ctx->cond_stack_size > 0)
|
return TRUE;
|
}
|
return FALSE;
|
}
|
|
|
/*
|
* Initialize a function context at the specified index.
|
*/
|
static void
|
lp_exec_mask_function_init(struct lp_exec_mask *mask, int function_idx)
|
{
|
LLVMTypeRef int_type = LLVMInt32TypeInContext(mask->bld->gallivm->context);
|
LLVMBuilderRef builder = mask->bld->gallivm->builder;
|
struct function_ctx *ctx = &mask->function_stack[function_idx];
|
|
ctx->cond_stack_size = 0;
|
ctx->loop_stack_size = 0;
|
ctx->switch_stack_size = 0;
|
|
if (function_idx == 0) {
|
ctx->ret_mask = mask->ret_mask;
|
}
|
|
ctx->loop_limiter = lp_build_alloca(mask->bld->gallivm,
|
int_type, "looplimiter");
|
LLVMBuildStore(
|
builder,
|
LLVMConstInt(int_type, LP_MAX_TGSI_LOOP_ITERATIONS, false),
|
ctx->loop_limiter);
|
}
|
|
static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
|
{
|
mask->bld = bld;
|
mask->has_mask = FALSE;
|
mask->ret_in_main = FALSE;
|
/* For the main function */
|
mask->function_stack_size = 1;
|
|
mask->int_vec_type = lp_build_int_vec_type(bld->gallivm, mask->bld->type);
|
mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask =
|
mask->cond_mask = mask->switch_mask =
|
LLVMConstAllOnes(mask->int_vec_type);
|
|
mask->function_stack = CALLOC(LP_MAX_NUM_FUNCS,
|
sizeof(mask->function_stack[0]));
|
lp_exec_mask_function_init(mask, 0);
|
}
|
|
static void
|
lp_exec_mask_fini(struct lp_exec_mask *mask)
|
{
|
FREE(mask->function_stack);
|
}
|
|
static void lp_exec_mask_update(struct lp_exec_mask *mask)
|
{
|
LLVMBuilderRef builder = mask->bld->gallivm->builder;
|
boolean has_loop_mask = mask_has_loop(mask);
|
boolean has_cond_mask = mask_has_cond(mask);
|
boolean has_switch_mask = mask_has_switch(mask);
|
boolean has_ret_mask = mask->function_stack_size > 1 ||
|
mask->ret_in_main;
|
|
if (has_loop_mask) {
|
/*for loops we need to update the entire mask at runtime */
|
LLVMValueRef tmp;
|
assert(mask->break_mask);
|
tmp = LLVMBuildAnd(builder,
|
mask->cont_mask,
|
mask->break_mask,
|
"maskcb");
|
mask->exec_mask = LLVMBuildAnd(builder,
|
mask->cond_mask,
|
tmp,
|
"maskfull");
|
} else
|
mask->exec_mask = mask->cond_mask;
|
|
if (has_switch_mask) {
|
mask->exec_mask = LLVMBuildAnd(builder,
|
mask->exec_mask,
|
mask->switch_mask,
|
"switchmask");
|
}
|
|
if (has_ret_mask) {
|
mask->exec_mask = LLVMBuildAnd(builder,
|
mask->exec_mask,
|
mask->ret_mask,
|
"callmask");
|
}
|
|
mask->has_mask = (has_cond_mask ||
|
has_loop_mask ||
|
has_switch_mask ||
|
has_ret_mask);
|
}
|
|
static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
|
LLVMValueRef val)
|
{
|
LLVMBuilderRef builder = mask->bld->gallivm->builder;
|
struct function_ctx *ctx = func_ctx(mask);
|
|
if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING) {
|
ctx->cond_stack_size++;
|
return;
|
}
|
if (ctx->cond_stack_size == 0 && mask->function_stack_size == 1) {
|
assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type));
|
}
|
ctx->cond_stack[ctx->cond_stack_size++] = mask->cond_mask;
|
assert(LLVMTypeOf(val) == mask->int_vec_type);
|
mask->cond_mask = LLVMBuildAnd(builder,
|
mask->cond_mask,
|
val,
|
"");
|
lp_exec_mask_update(mask);
|
}
|
|
static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
|
{
|
LLVMBuilderRef builder = mask->bld->gallivm->builder;
|
struct function_ctx *ctx = func_ctx(mask);
|
LLVMValueRef prev_mask;
|
LLVMValueRef inv_mask;
|
|
assert(ctx->cond_stack_size);
|
if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING)
|
return;
|
prev_mask = ctx->cond_stack[ctx->cond_stack_size - 1];
|
if (ctx->cond_stack_size == 1 && mask->function_stack_size == 1) {
|
assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type));
|
}
|
|
inv_mask = LLVMBuildNot(builder, mask->cond_mask, "");
|
|
mask->cond_mask = LLVMBuildAnd(builder,
|
inv_mask,
|
prev_mask, "");
|
lp_exec_mask_update(mask);
|
}
|
|
static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
|
{
|
struct function_ctx *ctx = func_ctx(mask);
|
assert(ctx->cond_stack_size);
|
--ctx->cond_stack_size;
|
if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING)
|
return;
|
mask->cond_mask = ctx->cond_stack[ctx->cond_stack_size];
|
lp_exec_mask_update(mask);
|
}
|
|
static void lp_exec_bgnloop(struct lp_exec_mask *mask)
|
{
|
LLVMBuilderRef builder = mask->bld->gallivm->builder;
|
struct function_ctx *ctx = func_ctx(mask);
|
|
if (ctx->loop_stack_size >= LP_MAX_TGSI_NESTING) {
|
++ctx->loop_stack_size;
|
return;
|
}
|
|
ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size] =
|
ctx->break_type;
|
ctx->break_type = LP_EXEC_MASK_BREAK_TYPE_LOOP;
|
|
ctx->loop_stack[ctx->loop_stack_size].loop_block = ctx->loop_block;
|
ctx->loop_stack[ctx->loop_stack_size].cont_mask = mask->cont_mask;
|
ctx->loop_stack[ctx->loop_stack_size].break_mask = mask->break_mask;
|
ctx->loop_stack[ctx->loop_stack_size].break_var = ctx->break_var;
|
++ctx->loop_stack_size;
|
|
ctx->break_var = lp_build_alloca(mask->bld->gallivm, mask->int_vec_type, "");
|
LLVMBuildStore(builder, mask->break_mask, ctx->break_var);
|
|
ctx->loop_block = lp_build_insert_new_block(mask->bld->gallivm, "bgnloop");
|
|
LLVMBuildBr(builder, ctx->loop_block);
|
LLVMPositionBuilderAtEnd(builder, ctx->loop_block);
|
|
mask->break_mask = LLVMBuildLoad(builder, ctx->break_var, "");
|
|
lp_exec_mask_update(mask);
|
}
|
|
static void lp_exec_break(struct lp_exec_mask *mask,
|
struct lp_build_tgsi_context * bld_base)
|
{
|
LLVMBuilderRef builder = mask->bld->gallivm->builder;
|
struct function_ctx *ctx = func_ctx(mask);
|
|
if (ctx->break_type == LP_EXEC_MASK_BREAK_TYPE_LOOP) {
|
LLVMValueRef exec_mask = LLVMBuildNot(builder,
|
mask->exec_mask,
|
"break");
|
|
mask->break_mask = LLVMBuildAnd(builder,
|
mask->break_mask,
|
exec_mask, "break_full");
|
}
|
else {
|
unsigned opcode = bld_base->instructions[bld_base->pc + 1].Instruction.Opcode;
|
boolean break_always = (opcode == TGSI_OPCODE_ENDSWITCH ||
|
opcode == TGSI_OPCODE_CASE);
|
|
|
if (ctx->switch_in_default) {
|
/*
|
* stop default execution but only if this is an unconditional switch.
|
* (The condition here is not perfect since dead code after break is
|
* allowed but should be sufficient since false negatives are just
|
* unoptimized - so we don't have to pre-evaluate that).
|
*/
|
if(break_always && ctx->switch_pc) {
|
bld_base->pc = ctx->switch_pc;
|
return;
|
}
|
}
|
|
if (break_always) {
|
mask->switch_mask = LLVMConstNull(mask->bld->int_vec_type);
|
}
|
else {
|
LLVMValueRef exec_mask = LLVMBuildNot(builder,
|
mask->exec_mask,
|
"break");
|
mask->switch_mask = LLVMBuildAnd(builder,
|
mask->switch_mask,
|
exec_mask, "break_switch");
|
}
|
}
|
|
lp_exec_mask_update(mask);
|
}
|
|
static void lp_exec_continue(struct lp_exec_mask *mask)
|
{
|
LLVMBuilderRef builder = mask->bld->gallivm->builder;
|
LLVMValueRef exec_mask = LLVMBuildNot(builder,
|
mask->exec_mask,
|
"");
|
|
mask->cont_mask = LLVMBuildAnd(builder,
|
mask->cont_mask,
|
exec_mask, "");
|
|
lp_exec_mask_update(mask);
|
}
|
|
|
static void lp_exec_endloop(struct gallivm_state *gallivm,
|
struct lp_exec_mask *mask)
|
{
|
LLVMBuilderRef builder = mask->bld->gallivm->builder;
|
struct function_ctx *ctx = func_ctx(mask);
|
LLVMBasicBlockRef endloop;
|
LLVMTypeRef int_type = LLVMInt32TypeInContext(mask->bld->gallivm->context);
|
LLVMTypeRef reg_type = LLVMIntTypeInContext(gallivm->context,
|
mask->bld->type.width *
|
mask->bld->type.length);
|
LLVMValueRef i1cond, i2cond, icond, limiter;
|
|
assert(mask->break_mask);
|
|
|
assert(ctx->loop_stack_size);
|
if (ctx->loop_stack_size > LP_MAX_TGSI_NESTING) {
|
--ctx->loop_stack_size;
|
return;
|
}
|
|
/*
|
* Restore the cont_mask, but don't pop
|
*/
|
mask->cont_mask = ctx->loop_stack[ctx->loop_stack_size - 1].cont_mask;
|
lp_exec_mask_update(mask);
|
|
/*
|
* Unlike the continue mask, the break_mask must be preserved across loop
|
* iterations
|
*/
|
LLVMBuildStore(builder, mask->break_mask, ctx->break_var);
|
|
/* Decrement the loop limiter */
|
limiter = LLVMBuildLoad(builder, ctx->loop_limiter, "");
|
|
limiter = LLVMBuildSub(
|
builder,
|
limiter,
|
LLVMConstInt(int_type, 1, false),
|
"");
|
|
LLVMBuildStore(builder, limiter, ctx->loop_limiter);
|
|
/* i1cond = (mask != 0) */
|
i1cond = LLVMBuildICmp(
|
builder,
|
LLVMIntNE,
|
LLVMBuildBitCast(builder, mask->exec_mask, reg_type, ""),
|
LLVMConstNull(reg_type), "i1cond");
|
|
/* i2cond = (looplimiter > 0) */
|
i2cond = LLVMBuildICmp(
|
builder,
|
LLVMIntSGT,
|
limiter,
|
LLVMConstNull(int_type), "i2cond");
|
|
/* if( i1cond && i2cond ) */
|
icond = LLVMBuildAnd(builder, i1cond, i2cond, "");
|
|
endloop = lp_build_insert_new_block(mask->bld->gallivm, "endloop");
|
|
LLVMBuildCondBr(builder,
|
icond, ctx->loop_block, endloop);
|
|
LLVMPositionBuilderAtEnd(builder, endloop);
|
|
assert(ctx->loop_stack_size);
|
--ctx->loop_stack_size;
|
mask->cont_mask = ctx->loop_stack[ctx->loop_stack_size].cont_mask;
|
mask->break_mask = ctx->loop_stack[ctx->loop_stack_size].break_mask;
|
ctx->loop_block = ctx->loop_stack[ctx->loop_stack_size].loop_block;
|
ctx->break_var = ctx->loop_stack[ctx->loop_stack_size].break_var;
|
ctx->break_type = ctx->break_type_stack[ctx->loop_stack_size +
|
ctx->switch_stack_size];
|
|
lp_exec_mask_update(mask);
|
}
|
|
static void lp_exec_switch(struct lp_exec_mask *mask,
|
LLVMValueRef switchval)
|
{
|
struct function_ctx *ctx = func_ctx(mask);
|
|
if (ctx->switch_stack_size >= LP_MAX_TGSI_NESTING ||
|
ctx->loop_stack_size > LP_MAX_TGSI_NESTING) {
|
ctx->switch_stack_size++;
|
return;
|
}
|
|
ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size] =
|
ctx->break_type;
|
ctx->break_type = LP_EXEC_MASK_BREAK_TYPE_SWITCH;
|
|
ctx->switch_stack[ctx->switch_stack_size].switch_mask = mask->switch_mask;
|
ctx->switch_stack[ctx->switch_stack_size].switch_val = ctx->switch_val;
|
ctx->switch_stack[ctx->switch_stack_size].switch_mask_default = ctx->switch_mask_default;
|
ctx->switch_stack[ctx->switch_stack_size].switch_in_default = ctx->switch_in_default;
|
ctx->switch_stack[ctx->switch_stack_size].switch_pc = ctx->switch_pc;
|
ctx->switch_stack_size++;
|
|
mask->switch_mask = LLVMConstNull(mask->int_vec_type);
|
ctx->switch_val = switchval;
|
ctx->switch_mask_default = LLVMConstNull(mask->int_vec_type);
|
ctx->switch_in_default = false;
|
ctx->switch_pc = 0;
|
|
lp_exec_mask_update(mask);
|
}
|
|
static void lp_exec_endswitch(struct lp_exec_mask *mask,
|
struct lp_build_tgsi_context * bld_base)
|
{
|
LLVMBuilderRef builder = mask->bld->gallivm->builder;
|
struct function_ctx *ctx = func_ctx(mask);
|
|
if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
|
ctx->switch_stack_size--;
|
return;
|
}
|
|
/* check if there's deferred default if so do it now */
|
if (ctx->switch_pc && !ctx->switch_in_default) {
|
LLVMValueRef prevmask, defaultmask;
|
unsigned tmp_pc;
|
prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
|
defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask");
|
mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
|
ctx->switch_in_default = true;
|
|
lp_exec_mask_update(mask);
|
|
assert(bld_base->instructions[ctx->switch_pc - 1].Instruction.Opcode ==
|
TGSI_OPCODE_DEFAULT);
|
|
tmp_pc = bld_base->pc;
|
bld_base->pc = ctx->switch_pc;
|
/*
|
* re-purpose switch_pc to point to here again, since we stop execution of
|
* the deferred default after next break.
|
*/
|
ctx->switch_pc = tmp_pc - 1;
|
|
return;
|
}
|
|
else if (ctx->switch_pc && ctx->switch_in_default) {
|
assert(bld_base->pc == ctx->switch_pc + 1);
|
}
|
|
ctx->switch_stack_size--;
|
mask->switch_mask = ctx->switch_stack[ctx->switch_stack_size].switch_mask;
|
ctx->switch_val = ctx->switch_stack[ctx->switch_stack_size].switch_val;
|
ctx->switch_mask_default = ctx->switch_stack[ctx->switch_stack_size].switch_mask_default;
|
ctx->switch_in_default = ctx->switch_stack[ctx->switch_stack_size].switch_in_default;
|
ctx->switch_pc = ctx->switch_stack[ctx->switch_stack_size].switch_pc;
|
|
ctx->break_type = ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size];
|
|
lp_exec_mask_update(mask);
|
}
|
|
static void lp_exec_case(struct lp_exec_mask *mask,
|
LLVMValueRef caseval)
|
{
|
LLVMBuilderRef builder = mask->bld->gallivm->builder;
|
struct function_ctx *ctx = func_ctx(mask);
|
|
LLVMValueRef casemask, prevmask;
|
|
if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
|
return;
|
}
|
|
/* skipping case mask evaluation here is NOT optional (not in all cases anyway). */
|
if (!ctx->switch_in_default) {
|
prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
|
casemask = lp_build_cmp(mask->bld, PIPE_FUNC_EQUAL, caseval, ctx->switch_val);
|
ctx->switch_mask_default = LLVMBuildOr(builder, casemask,
|
ctx->switch_mask_default, "sw_default_mask");
|
casemask = LLVMBuildOr(builder, casemask, mask->switch_mask, "");
|
mask->switch_mask = LLVMBuildAnd(builder, casemask, prevmask, "sw_mask");
|
|
lp_exec_mask_update(mask);
|
}
|
}
|
|
/*
|
* Analyse default statement in a switch.
|
* \return true if default is last statement, false otherwise
|
* \param default_pc_start contains pc of instruction to jump to
|
* if default wasn't last but there's no
|
* fallthrough into default.
|
*/
|
static boolean default_analyse_is_last(struct lp_exec_mask *mask,
|
struct lp_build_tgsi_context * bld_base,
|
int *default_pc_start)
|
{
|
unsigned pc = bld_base->pc;
|
struct function_ctx *ctx = func_ctx(mask);
|
int curr_switch_stack = ctx->switch_stack_size;
|
|
if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
|
return false;
|
}
|
|
/* skip over case statements which are together with default */
|
while (bld_base->instructions[pc].Instruction.Opcode == TGSI_OPCODE_CASE) {
|
pc++;
|
}
|
|
while (pc != ~0u && pc < bld_base->num_instructions) {
|
unsigned opcode = bld_base->instructions[pc].Instruction.Opcode;
|
switch (opcode) {
|
case TGSI_OPCODE_CASE:
|
if (curr_switch_stack == ctx->switch_stack_size) {
|
*default_pc_start = pc - 1;
|
return false;
|
}
|
break;
|
case TGSI_OPCODE_SWITCH:
|
curr_switch_stack++;
|
break;
|
case TGSI_OPCODE_ENDSWITCH:
|
if (curr_switch_stack == ctx->switch_stack_size) {
|
*default_pc_start = pc - 1;
|
return true;
|
}
|
curr_switch_stack--;
|
break;
|
}
|
pc++;
|
}
|
/* should never arrive here */
|
assert(0);
|
return true;
|
}
|
|
static void lp_exec_default(struct lp_exec_mask *mask,
|
struct lp_build_tgsi_context * bld_base)
|
{
|
LLVMBuilderRef builder = mask->bld->gallivm->builder;
|
struct function_ctx *ctx = func_ctx(mask);
|
|
int default_exec_pc;
|
boolean default_is_last;
|
|
if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
|
return;
|
}
|
|
/*
|
* This is a messy opcode, because it may not be always at the end and
|
* there can be fallthrough in and out of it.
|
*/
|
|
default_is_last = default_analyse_is_last(mask, bld_base, &default_exec_pc);
|
/*
|
* If it is last statement in switch (note that case statements appearing
|
* "at the same time" as default don't change that) everything is just fine,
|
* update switch mask and go on. This means we can handle default with
|
* fallthrough INTO it without overhead, if it is last.
|
*/
|
if (default_is_last) {
|
LLVMValueRef prevmask, defaultmask;
|
prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
|
defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask");
|
defaultmask = LLVMBuildOr(builder, defaultmask, mask->switch_mask, "");
|
mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
|
ctx->switch_in_default = true;
|
|
lp_exec_mask_update(mask);
|
}
|
else {
|
/*
|
* Technically, "case" immediately before default isn't really a
|
* fallthrough, however we still have to count them as such as we
|
* already have updated the masks.
|
* If that happens in practice could add a switch optimizer pass
|
* which just gets rid of all case statements appearing together with
|
* default (or could do switch analysis at switch start time instead).
|
*/
|
unsigned opcode = bld_base->instructions[bld_base->pc - 1].Instruction.Opcode;
|
boolean ft_into = (opcode != TGSI_OPCODE_BRK &&
|
opcode != TGSI_OPCODE_SWITCH);
|
/*
|
* If it is not last statement and there was no fallthrough into it,
|
* we record the PC and continue execution at next case (again, those
|
* case encountered at the same time don't count). At endswitch
|
* time, we update switchmask, and go back executing the code we skipped
|
* until the next break (possibly re-executing some code with changed mask
|
* if there was a fallthrough out of default).
|
* Finally, if it is not last statement and there was a fallthrough into it,
|
* do the same as with the former case, except instead of skipping the code
|
* just execute it without updating the mask, then go back and re-execute.
|
*/
|
ctx->switch_pc = bld_base->pc;
|
if (!ft_into) {
|
bld_base->pc = default_exec_pc;
|
}
|
}
|
}
|
|
|
/* stores val into an address pointed to by dst_ptr.
|
* mask->exec_mask is used to figure out which bits of val
|
* should be stored into the address
|
* (0 means don't store this bit, 1 means do store).
|
*/
|
static void lp_exec_mask_store(struct lp_exec_mask *mask,
|
struct lp_build_context *bld_store,
|
LLVMValueRef val,
|
LLVMValueRef dst_ptr)
|
{
|
LLVMBuilderRef builder = mask->bld->gallivm->builder;
|
LLVMValueRef exec_mask = mask->has_mask ? mask->exec_mask : NULL;
|
|
assert(lp_check_value(bld_store->type, val));
|
assert(LLVMGetTypeKind(LLVMTypeOf(dst_ptr)) == LLVMPointerTypeKind);
|
assert(LLVMGetElementType(LLVMTypeOf(dst_ptr)) == LLVMTypeOf(val));
|
|
if (exec_mask) {
|
LLVMValueRef res, dst;
|
|
dst = LLVMBuildLoad(builder, dst_ptr, "");
|
res = lp_build_select(bld_store, exec_mask, val, dst);
|
LLVMBuildStore(builder, res, dst_ptr);
|
} else
|
LLVMBuildStore(builder, val, dst_ptr);
|
}
|
|
static void lp_exec_mask_call(struct lp_exec_mask *mask,
|
int func,
|
int *pc)
|
{
|
if (mask->function_stack_size >= LP_MAX_NUM_FUNCS) {
|
return;
|
}
|
|
lp_exec_mask_function_init(mask, mask->function_stack_size);
|
mask->function_stack[mask->function_stack_size].pc = *pc;
|
mask->function_stack[mask->function_stack_size].ret_mask = mask->ret_mask;
|
mask->function_stack_size++;
|
*pc = func;
|
}
|
|
static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc)
|
{
|
LLVMBuilderRef builder = mask->bld->gallivm->builder;
|
struct function_ctx *ctx = func_ctx(mask);
|
LLVMValueRef exec_mask;
|
|
if (ctx->cond_stack_size == 0 &&
|
ctx->loop_stack_size == 0 &&
|
ctx->switch_stack_size == 0 &&
|
mask->function_stack_size == 1) {
|
/* returning from main() */
|
*pc = -1;
|
return;
|
}
|
|
if (mask->function_stack_size == 1) {
|
/*
|
* This requires special handling since we need to ensure
|
* we don't drop the mask even if we have no call stack
|
* (e.g. after a ret in a if clause after the endif)
|
*/
|
mask->ret_in_main = TRUE;
|
}
|
|
exec_mask = LLVMBuildNot(builder,
|
mask->exec_mask,
|
"ret");
|
|
mask->ret_mask = LLVMBuildAnd(builder,
|
mask->ret_mask,
|
exec_mask, "ret_full");
|
|
lp_exec_mask_update(mask);
|
}
|
|
static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask)
|
{
|
}
|
|
static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
|
{
|
struct function_ctx *ctx;
|
|
assert(mask->function_stack_size > 1);
|
assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS);
|
|
ctx = func_ctx(mask);
|
mask->function_stack_size--;
|
|
*pc = ctx->pc;
|
mask->ret_mask = ctx->ret_mask;
|
|
lp_exec_mask_update(mask);
|
}
|
|
|
static LLVMValueRef
|
get_file_ptr(struct lp_build_tgsi_soa_context *bld,
|
unsigned file,
|
int index,
|
unsigned chan)
|
{
|
LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
|
LLVMValueRef (*array_of_vars)[TGSI_NUM_CHANNELS];
|
LLVMValueRef var_of_array;
|
|
switch (file) {
|
case TGSI_FILE_TEMPORARY:
|
array_of_vars = bld->temps;
|
var_of_array = bld->temps_array;
|
break;
|
case TGSI_FILE_OUTPUT:
|
array_of_vars = bld->outputs;
|
var_of_array = bld->outputs_array;
|
break;
|
default:
|
assert(0);
|
return NULL;
|
}
|
|
assert(chan < 4);
|
|
if (bld->indirect_files & (1 << file)) {
|
LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm, index * 4 + chan);
|
return LLVMBuildGEP(builder, var_of_array, &lindex, 1, "");
|
}
|
else {
|
assert(index <= bld->bld_base.info->file_max[file]);
|
return array_of_vars[index][chan];
|
}
|
}
|
|
|
/**
|
* Return pointer to a temporary register channel (src or dest).
|
* Note that indirect addressing cannot be handled here.
|
* \param index which temporary register
|
* \param chan which channel of the temp register.
|
*/
|
LLVMValueRef
|
lp_get_temp_ptr_soa(struct lp_build_tgsi_soa_context *bld,
|
unsigned index,
|
unsigned chan)
|
{
|
return get_file_ptr(bld, TGSI_FILE_TEMPORARY, index, chan);
|
}
|
|
/**
|
* Return pointer to a output register channel (src or dest).
|
* Note that indirect addressing cannot be handled here.
|
* \param index which output register
|
* \param chan which channel of the output register.
|
*/
|
LLVMValueRef
|
lp_get_output_ptr(struct lp_build_tgsi_soa_context *bld,
|
unsigned index,
|
unsigned chan)
|
{
|
return get_file_ptr(bld, TGSI_FILE_OUTPUT, index, chan);
|
}
|
|
/*
|
* If we have indirect addressing in outputs copy our alloca array
|
* to the outputs slots specified by the caller to make sure
|
* our outputs are delivered consistently via the same interface.
|
*/
|
static void
|
gather_outputs(struct lp_build_tgsi_soa_context * bld)
|
{
|
if ((bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
|
unsigned index, chan;
|
assert(bld->bld_base.info->num_outputs <=
|
bld->bld_base.info->file_max[TGSI_FILE_OUTPUT] + 1);
|
for (index = 0; index < bld->bld_base.info->num_outputs; ++index) {
|
for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
|
bld->outputs[index][chan] = lp_get_output_ptr(bld, index, chan);
|
}
|
}
|
}
|
}
|
|
/**
|
* Gather vector.
|
* XXX the lp_build_gather() function should be capable of doing this
|
* with a little work.
|
*/
|
static LLVMValueRef
|
build_gather(struct lp_build_tgsi_context *bld_base,
|
LLVMValueRef base_ptr,
|
LLVMValueRef indexes,
|
LLVMValueRef overflow_mask,
|
LLVMValueRef indexes2)
|
{
|
struct gallivm_state *gallivm = bld_base->base.gallivm;
|
LLVMBuilderRef builder = gallivm->builder;
|
struct lp_build_context *uint_bld = &bld_base->uint_bld;
|
struct lp_build_context *bld = &bld_base->base;
|
LLVMValueRef res;
|
unsigned i;
|
|
if (indexes2)
|
res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2));
|
else
|
res = bld->undef;
|
/*
|
* overflow_mask is a vector telling us which channels
|
* in the vector overflowed. We use the overflow behavior for
|
* constant buffers which is defined as:
|
* Out of bounds access to constant buffer returns 0 in all
|
* components. Out of bounds behavior is always with respect
|
* to the size of the buffer bound at that slot.
|
*/
|
|
if (overflow_mask) {
|
/*
|
* We avoid per-element control flow here (also due to llvm going crazy,
|
* though I suspect it's better anyway since overflow is likely rare).
|
* Note that since we still fetch from buffers even if num_elements was
|
* zero (in this case we'll fetch from index zero) the jit func callers
|
* MUST provide valid fake constant buffers of size 4x32 (the values do
|
* not matter), otherwise we'd still need (not per element though)
|
* control flow.
|
*/
|
indexes = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes);
|
if (indexes2)
|
indexes2 = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes2);
|
}
|
|
/*
|
* Loop over elements of index_vec, load scalar value, insert it into 'res'.
|
*/
|
for (i = 0; i < bld->type.length * (indexes2 ? 2 : 1); i++) {
|
LLVMValueRef si, di;
|
LLVMValueRef index;
|
LLVMValueRef scalar_ptr, scalar;
|
|
di = lp_build_const_int32(bld->gallivm, i);
|
if (indexes2)
|
si = lp_build_const_int32(bld->gallivm, i >> 1);
|
else
|
si = di;
|
|
if (indexes2 && (i & 1)) {
|
index = LLVMBuildExtractElement(builder,
|
indexes2, si, "");
|
} else {
|
index = LLVMBuildExtractElement(builder,
|
indexes, si, "");
|
}
|
scalar_ptr = LLVMBuildGEP(builder, base_ptr,
|
&index, 1, "gather_ptr");
|
scalar = LLVMBuildLoad(builder, scalar_ptr, "");
|
|
res = LLVMBuildInsertElement(builder, res, scalar, di, "");
|
}
|
|
if (overflow_mask) {
|
if (indexes2) {
|
res = LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, "");
|
overflow_mask = LLVMBuildSExt(builder, overflow_mask,
|
bld_base->dbl_bld.int_vec_type, "");
|
res = lp_build_select(&bld_base->dbl_bld, overflow_mask,
|
bld_base->dbl_bld.zero, res);
|
} else
|
res = lp_build_select(bld, overflow_mask, bld->zero, res);
|
}
|
|
return res;
|
}
|
|
|
/**
|
* Scatter/store vector.
|
*/
|
static void
|
emit_mask_scatter(struct lp_build_tgsi_soa_context *bld,
|
LLVMValueRef base_ptr,
|
LLVMValueRef indexes,
|
LLVMValueRef values,
|
struct lp_exec_mask *mask)
|
{
|
struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
|
LLVMBuilderRef builder = gallivm->builder;
|
unsigned i;
|
LLVMValueRef pred = mask->has_mask ? mask->exec_mask : NULL;
|
|
/*
|
* Loop over elements of index_vec, store scalar value.
|
*/
|
for (i = 0; i < bld->bld_base.base.type.length; i++) {
|
LLVMValueRef ii = lp_build_const_int32(gallivm, i);
|
LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");
|
LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr");
|
LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val");
|
LLVMValueRef scalar_pred = pred ?
|
LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL;
|
|
if (0)
|
lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n",
|
ii, val, index, scalar_ptr);
|
|
if (scalar_pred) {
|
LLVMValueRef real_val, dst_val;
|
dst_val = LLVMBuildLoad(builder, scalar_ptr, "");
|
real_val = lp_build_select(&bld->elem_bld, scalar_pred, val, dst_val);
|
LLVMBuildStore(builder, real_val, scalar_ptr);
|
}
|
else {
|
LLVMBuildStore(builder, val, scalar_ptr);
|
}
|
}
|
}
|
|
|
/**
|
* Read the current value of the ADDR register, convert the floats to
|
* ints, add the base index and return the vector of offsets.
|
* The offsets will be used to index into the constant buffer or
|
* temporary register file.
|
*/
|
static LLVMValueRef
|
get_indirect_index(struct lp_build_tgsi_soa_context *bld,
|
unsigned reg_file, unsigned reg_index,
|
const struct tgsi_ind_register *indirect_reg)
|
{
|
LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
|
struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
|
/* always use X component of address register */
|
unsigned swizzle = indirect_reg->Swizzle;
|
LLVMValueRef base;
|
LLVMValueRef rel;
|
LLVMValueRef max_index;
|
LLVMValueRef index;
|
|
assert(bld->indirect_files & (1 << reg_file));
|
|
base = lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, reg_index);
|
|
assert(swizzle < 4);
|
switch (indirect_reg->File) {
|
case TGSI_FILE_ADDRESS:
|
rel = LLVMBuildLoad(builder,
|
bld->addr[indirect_reg->Index][swizzle],
|
"load addr reg");
|
/* ADDR LLVM values already have LLVM integer type. */
|
break;
|
case TGSI_FILE_TEMPORARY:
|
rel = lp_get_temp_ptr_soa(bld, indirect_reg->Index, swizzle);
|
rel = LLVMBuildLoad(builder, rel, "load temp reg");
|
/* TEMP LLVM values always have LLVM float type, but for indirection, the
|
* value actually stored is expected to be an integer */
|
rel = LLVMBuildBitCast(builder, rel, uint_bld->vec_type, "");
|
break;
|
default:
|
assert(0);
|
rel = uint_bld->zero;
|
}
|
|
index = lp_build_add(uint_bld, base, rel);
|
|
/*
|
* emit_fetch_constant handles constant buffer overflow so this code
|
* is pointless for them.
|
* Furthermore the D3D10 spec in section 6.5 says:
|
* If the constant buffer bound to a slot is larger than the size
|
* declared in the shader for that slot, implementations are allowed
|
* to return incorrect data (not necessarily 0) for indices that are
|
* larger than the declared size but smaller than the buffer size.
|
*/
|
if (reg_file != TGSI_FILE_CONSTANT) {
|
max_index = lp_build_const_int_vec(bld->bld_base.base.gallivm,
|
uint_bld->type,
|
bld->bld_base.info->file_max[reg_file]);
|
|
assert(!uint_bld->type.sign);
|
index = lp_build_min(uint_bld, index, max_index);
|
}
|
|
return index;
|
}
|
|
static struct lp_build_context *
|
stype_to_fetch(struct lp_build_tgsi_context * bld_base,
|
enum tgsi_opcode_type stype)
|
{
|
struct lp_build_context *bld_fetch;
|
|
switch (stype) {
|
case TGSI_TYPE_FLOAT:
|
case TGSI_TYPE_UNTYPED:
|
bld_fetch = &bld_base->base;
|
break;
|
case TGSI_TYPE_UNSIGNED:
|
bld_fetch = &bld_base->uint_bld;
|
break;
|
case TGSI_TYPE_SIGNED:
|
bld_fetch = &bld_base->int_bld;
|
break;
|
case TGSI_TYPE_DOUBLE:
|
bld_fetch = &bld_base->dbl_bld;
|
break;
|
case TGSI_TYPE_UNSIGNED64:
|
bld_fetch = &bld_base->uint64_bld;
|
break;
|
case TGSI_TYPE_SIGNED64:
|
bld_fetch = &bld_base->int64_bld;
|
break;
|
case TGSI_TYPE_VOID:
|
default:
|
assert(0);
|
bld_fetch = NULL;
|
break;
|
}
|
return bld_fetch;
|
}
|
|
static LLVMValueRef
|
get_soa_array_offsets(struct lp_build_context *uint_bld,
|
LLVMValueRef indirect_index,
|
unsigned chan_index,
|
boolean need_perelement_offset)
|
{
|
struct gallivm_state *gallivm = uint_bld->gallivm;
|
LLVMValueRef chan_vec =
|
lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, chan_index);
|
LLVMValueRef length_vec =
|
lp_build_const_int_vec(gallivm, uint_bld->type, uint_bld->type.length);
|
LLVMValueRef index_vec;
|
|
/* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
|
index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
|
index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
|
index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
|
|
if (need_perelement_offset) {
|
LLVMValueRef pixel_offsets;
|
unsigned i;
|
/* build pixel offset vector: {0, 1, 2, 3, ...} */
|
pixel_offsets = uint_bld->undef;
|
for (i = 0; i < uint_bld->type.length; i++) {
|
LLVMValueRef ii = lp_build_const_int32(gallivm, i);
|
pixel_offsets = LLVMBuildInsertElement(gallivm->builder, pixel_offsets,
|
ii, ii, "");
|
}
|
index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
|
}
|
return index_vec;
|
}
|
|
static LLVMValueRef
|
emit_fetch_constant(
|
struct lp_build_tgsi_context * bld_base,
|
const struct tgsi_full_src_register * reg,
|
enum tgsi_opcode_type stype,
|
unsigned swizzle)
|
{
|
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
|
struct gallivm_state *gallivm = bld_base->base.gallivm;
|
LLVMBuilderRef builder = gallivm->builder;
|
struct lp_build_context *uint_bld = &bld_base->uint_bld;
|
unsigned dimension = 0;
|
LLVMValueRef consts_ptr;
|
LLVMValueRef num_consts;
|
LLVMValueRef res;
|
|
/* XXX: Handle fetching xyzw components as a vector */
|
assert(swizzle != ~0u);
|
|
if (reg->Register.Dimension) {
|
assert(!reg->Dimension.Indirect);
|
dimension = reg->Dimension.Index;
|
assert(dimension < LP_MAX_TGSI_CONST_BUFFERS);
|
}
|
|
consts_ptr = bld->consts[dimension];
|
num_consts = bld->consts_sizes[dimension];
|
|
if (reg->Register.Indirect) {
|
LLVMValueRef indirect_index;
|
LLVMValueRef swizzle_vec =
|
lp_build_const_int_vec(gallivm, uint_bld->type, swizzle);
|
LLVMValueRef index_vec; /* index into the const buffer */
|
LLVMValueRef overflow_mask;
|
LLVMValueRef index_vec2 = NULL;
|
|
indirect_index = get_indirect_index(bld,
|
reg->Register.File,
|
reg->Register.Index,
|
®->Indirect);
|
|
/* All fetches are from the same constant buffer, so
|
* we need to propagate the size to a vector to do a
|
* vector comparison */
|
num_consts = lp_build_broadcast_scalar(uint_bld, num_consts);
|
/* Construct a boolean vector telling us which channels
|
* overflow the bound constant buffer */
|
overflow_mask = lp_build_compare(gallivm, uint_bld->type, PIPE_FUNC_GEQUAL,
|
indirect_index, num_consts);
|
|
/* index_vec = indirect_index * 4 + swizzle */
|
index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
|
index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
|
|
if (tgsi_type_is_64bit(stype)) {
|
LLVMValueRef swizzle_vec2;
|
swizzle_vec2 = lp_build_const_int_vec(gallivm, uint_bld->type, swizzle + 1);
|
index_vec2 = lp_build_shl_imm(uint_bld, indirect_index, 2);
|
index_vec2 = lp_build_add(uint_bld, index_vec2, swizzle_vec2);
|
}
|
/* Gather values from the constant buffer */
|
res = build_gather(bld_base, consts_ptr, index_vec, overflow_mask, index_vec2);
|
}
|
else {
|
LLVMValueRef index; /* index into the const buffer */
|
LLVMValueRef scalar, scalar_ptr;
|
struct lp_build_context *bld_broad = &bld_base->base;
|
index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle);
|
|
scalar_ptr = LLVMBuildGEP(builder, consts_ptr,
|
&index, 1, "");
|
if (stype == TGSI_TYPE_DOUBLE) {
|
LLVMTypeRef dptr_type = LLVMPointerType(LLVMDoubleTypeInContext(gallivm->context), 0);
|
scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, dptr_type, "");
|
bld_broad = &bld_base->dbl_bld;
|
} else if (stype == TGSI_TYPE_UNSIGNED64) {
|
LLVMTypeRef u64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0);
|
scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, u64ptr_type, "");
|
bld_broad = &bld_base->uint64_bld;
|
} else if (stype == TGSI_TYPE_SIGNED64) {
|
LLVMTypeRef i64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0);
|
scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, i64ptr_type, "");
|
bld_broad = &bld_base->int64_bld;
|
}
|
scalar = LLVMBuildLoad(builder, scalar_ptr, "");
|
res = lp_build_broadcast_scalar(bld_broad, scalar);
|
}
|
|
if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || stype == TGSI_TYPE_DOUBLE || stype == TGSI_TYPE_SIGNED64 || stype == TGSI_TYPE_UNSIGNED64) {
|
struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
|
res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
|
}
|
|
return res;
|
}
|
|
/**
|
* Fetch 64-bit values from two separate channels.
|
* 64-bit values are stored split across two channels, like xy and zw.
|
* This function creates a set of vec_length*2 floats,
|
* extracts the values from the two channels,
|
* puts them in the correct place, then casts to vec_length 64-bits.
|
*/
|
static LLVMValueRef
|
emit_fetch_64bit(
|
struct lp_build_tgsi_context * bld_base,
|
enum tgsi_opcode_type stype,
|
LLVMValueRef input,
|
LLVMValueRef input2)
|
{
|
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
|
struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
|
LLVMBuilderRef builder = gallivm->builder;
|
LLVMValueRef res;
|
struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
|
int i;
|
LLVMValueRef shuffles[2 * (LP_MAX_VECTOR_WIDTH/32)];
|
int len = bld_base->base.type.length * 2;
|
assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32)));
|
|
for (i = 0; i < bld_base->base.type.length * 2; i+=2) {
|
shuffles[i] = lp_build_const_int32(gallivm, i / 2);
|
shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
|
}
|
res = LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");
|
|
return LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
|
}
|
|
static LLVMValueRef
|
emit_fetch_immediate(
|
struct lp_build_tgsi_context * bld_base,
|
const struct tgsi_full_src_register * reg,
|
enum tgsi_opcode_type stype,
|
unsigned swizzle)
|
{
|
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
|
struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
|
LLVMBuilderRef builder = gallivm->builder;
|
LLVMValueRef res = NULL;
|
|
if (bld->use_immediates_array || reg->Register.Indirect) {
|
LLVMValueRef imms_array;
|
LLVMTypeRef fptr_type;
|
|
/* cast imms_array pointer to float* */
|
fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
|
imms_array = LLVMBuildBitCast(builder, bld->imms_array, fptr_type, "");
|
|
if (reg->Register.Indirect) {
|
LLVMValueRef indirect_index;
|
LLVMValueRef index_vec; /* index into the immediate register array */
|
LLVMValueRef index_vec2 = NULL;
|
indirect_index = get_indirect_index(bld,
|
reg->Register.File,
|
reg->Register.Index,
|
®->Indirect);
|
/*
|
* Unlike for other reg classes, adding pixel offsets is unnecessary -
|
* immediates are stored as full vectors (FIXME??? - might be better
|
* to store them the same as constants) but all elements are the same
|
* in any case.
|
*/
|
index_vec = get_soa_array_offsets(&bld_base->uint_bld,
|
indirect_index,
|
swizzle,
|
FALSE);
|
if (tgsi_type_is_64bit(stype))
|
index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
|
indirect_index,
|
swizzle + 1,
|
FALSE);
|
/* Gather values from the immediate register array */
|
res = build_gather(bld_base, imms_array, index_vec, NULL, index_vec2);
|
} else {
|
LLVMValueRef lindex = lp_build_const_int32(gallivm,
|
reg->Register.Index * 4 + swizzle);
|
LLVMValueRef imms_ptr = LLVMBuildGEP(builder,
|
bld->imms_array, &lindex, 1, "");
|
res = LLVMBuildLoad(builder, imms_ptr, "");
|
|
if (tgsi_type_is_64bit(stype)) {
|
LLVMValueRef lindex1;
|
LLVMValueRef imms_ptr2;
|
LLVMValueRef res2;
|
|
lindex1 = lp_build_const_int32(gallivm,
|
reg->Register.Index * 4 + swizzle + 1);
|
imms_ptr2 = LLVMBuildGEP(builder,
|
bld->imms_array, &lindex1, 1, "");
|
res2 = LLVMBuildLoad(builder, imms_ptr2, "");
|
res = emit_fetch_64bit(bld_base, stype, res, res2);
|
}
|
}
|
}
|
else {
|
res = bld->immediates[reg->Register.Index][swizzle];
|
if (tgsi_type_is_64bit(stype))
|
res = emit_fetch_64bit(bld_base, stype, res, bld->immediates[reg->Register.Index][swizzle + 1]);
|
}
|
|
if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || tgsi_type_is_64bit(stype)) {
|
struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
|
res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
|
}
|
return res;
|
}
|
|
static LLVMValueRef
|
emit_fetch_input(
|
struct lp_build_tgsi_context * bld_base,
|
const struct tgsi_full_src_register * reg,
|
enum tgsi_opcode_type stype,
|
unsigned swizzle)
|
{
|
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
|
struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
|
LLVMBuilderRef builder = gallivm->builder;
|
LLVMValueRef res;
|
|
if (reg->Register.Indirect) {
|
LLVMValueRef indirect_index;
|
LLVMValueRef index_vec; /* index into the input reg array */
|
LLVMValueRef index_vec2 = NULL;
|
LLVMValueRef inputs_array;
|
LLVMTypeRef fptr_type;
|
|
indirect_index = get_indirect_index(bld,
|
reg->Register.File,
|
reg->Register.Index,
|
®->Indirect);
|
|
index_vec = get_soa_array_offsets(&bld_base->uint_bld,
|
indirect_index,
|
swizzle,
|
TRUE);
|
if (tgsi_type_is_64bit(stype)) {
|
index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
|
indirect_index,
|
swizzle + 1,
|
TRUE);
|
}
|
/* cast inputs_array pointer to float* */
|
fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
|
inputs_array = LLVMBuildBitCast(builder, bld->inputs_array, fptr_type, "");
|
|
/* Gather values from the input register array */
|
res = build_gather(bld_base, inputs_array, index_vec, NULL, index_vec2);
|
} else {
|
if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
|
LLVMValueRef lindex = lp_build_const_int32(gallivm,
|
reg->Register.Index * 4 + swizzle);
|
LLVMValueRef input_ptr = LLVMBuildGEP(builder,
|
bld->inputs_array, &lindex, 1, "");
|
|
res = LLVMBuildLoad(builder, input_ptr, "");
|
if (tgsi_type_is_64bit(stype)) {
|
LLVMValueRef lindex1;
|
LLVMValueRef input_ptr2;
|
LLVMValueRef res2;
|
|
lindex1 = lp_build_const_int32(gallivm,
|
reg->Register.Index * 4 + swizzle + 1);
|
input_ptr2 = LLVMBuildGEP(builder,
|
bld->inputs_array, &lindex1, 1, "");
|
res2 = LLVMBuildLoad(builder, input_ptr2, "");
|
res = emit_fetch_64bit(bld_base, stype, res, res2);
|
}
|
}
|
else {
|
res = bld->inputs[reg->Register.Index][swizzle];
|
if (tgsi_type_is_64bit(stype))
|
res = emit_fetch_64bit(bld_base, stype, res, bld->inputs[reg->Register.Index][swizzle + 1]);
|
}
|
}
|
|
assert(res);
|
|
if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || tgsi_type_is_64bit(stype)) {
|
struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
|
res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
|
}
|
|
return res;
|
}
|
|
|
static LLVMValueRef
|
emit_fetch_gs_input(
|
struct lp_build_tgsi_context * bld_base,
|
const struct tgsi_full_src_register * reg,
|
enum tgsi_opcode_type stype,
|
unsigned swizzle)
|
{
|
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
|
struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
|
const struct tgsi_shader_info *info = bld->bld_base.info;
|
LLVMBuilderRef builder = gallivm->builder;
|
LLVMValueRef attrib_index = NULL;
|
LLVMValueRef vertex_index = NULL;
|
LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle);
|
LLVMValueRef res;
|
|
if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PRIMID) {
|
/* This is really a system value not a regular input */
|
assert(!reg->Register.Indirect);
|
assert(!reg->Dimension.Indirect);
|
res = bld->system_values.prim_id;
|
if (stype != TGSI_TYPE_UNSIGNED && stype != TGSI_TYPE_SIGNED) {
|
res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
|
}
|
return res;
|
}
|
|
if (reg->Register.Indirect) {
|
attrib_index = get_indirect_index(bld,
|
reg->Register.File,
|
reg->Register.Index,
|
®->Indirect);
|
} else {
|
attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
|
}
|
|
if (reg->Dimension.Indirect) {
|
vertex_index = get_indirect_index(bld,
|
reg->Register.File,
|
reg->Dimension.Index,
|
®->DimIndirect);
|
} else {
|
vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
|
}
|
|
res = bld->gs_iface->fetch_input(bld->gs_iface, bld_base,
|
reg->Dimension.Indirect,
|
vertex_index,
|
reg->Register.Indirect,
|
attrib_index,
|
swizzle_index);
|
|
assert(res);
|
if (tgsi_type_is_64bit(stype)) {
|
LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle + 1);
|
LLVMValueRef res2;
|
res2 = bld->gs_iface->fetch_input(bld->gs_iface, bld_base,
|
reg->Dimension.Indirect,
|
vertex_index,
|
reg->Register.Indirect,
|
attrib_index,
|
swizzle_index);
|
assert(res2);
|
res = emit_fetch_64bit(bld_base, stype, res, res2);
|
} else if (stype == TGSI_TYPE_UNSIGNED) {
|
res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
|
} else if (stype == TGSI_TYPE_SIGNED) {
|
res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
|
}
|
|
return res;
|
}
|
|
static LLVMValueRef
|
emit_fetch_temporary(
|
struct lp_build_tgsi_context * bld_base,
|
const struct tgsi_full_src_register * reg,
|
enum tgsi_opcode_type stype,
|
unsigned swizzle)
|
{
|
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
|
struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
|
LLVMBuilderRef builder = gallivm->builder;
|
LLVMValueRef res;
|
|
if (reg->Register.Indirect) {
|
LLVMValueRef indirect_index;
|
LLVMValueRef index_vec, index_vec2 = NULL; /* index into the temp reg array */
|
LLVMValueRef temps_array;
|
LLVMTypeRef fptr_type;
|
|
indirect_index = get_indirect_index(bld,
|
reg->Register.File,
|
reg->Register.Index,
|
®->Indirect);
|
|
index_vec = get_soa_array_offsets(&bld_base->uint_bld,
|
indirect_index,
|
swizzle,
|
TRUE);
|
if (tgsi_type_is_64bit(stype)) {
|
index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
|
indirect_index,
|
swizzle + 1,
|
TRUE);
|
}
|
|
/* cast temps_array pointer to float* */
|
fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
|
temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, "");
|
|
/* Gather values from the temporary register array */
|
res = build_gather(bld_base, temps_array, index_vec, NULL, index_vec2);
|
}
|
else {
|
LLVMValueRef temp_ptr;
|
temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle);
|
res = LLVMBuildLoad(builder, temp_ptr, "");
|
|
if (tgsi_type_is_64bit(stype)) {
|
LLVMValueRef temp_ptr2, res2;
|
|
temp_ptr2 = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle + 1);
|
res2 = LLVMBuildLoad(builder, temp_ptr2, "");
|
res = emit_fetch_64bit(bld_base, stype, res, res2);
|
}
|
}
|
|
if (stype == TGSI_TYPE_SIGNED ||
|
stype == TGSI_TYPE_UNSIGNED ||
|
stype == TGSI_TYPE_DOUBLE ||
|
stype == TGSI_TYPE_SIGNED64 ||
|
stype == TGSI_TYPE_UNSIGNED64) {
|
struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
|
res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
|
}
|
|
return res;
|
}
|
|
static LLVMValueRef
|
emit_fetch_system_value(
|
struct lp_build_tgsi_context * bld_base,
|
const struct tgsi_full_src_register * reg,
|
enum tgsi_opcode_type stype,
|
unsigned swizzle)
|
{
|
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
|
struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
|
const struct tgsi_shader_info *info = bld->bld_base.info;
|
LLVMBuilderRef builder = gallivm->builder;
|
LLVMValueRef res;
|
enum tgsi_opcode_type atype; // Actual type of the value
|
|
assert(!reg->Register.Indirect);
|
|
switch (info->system_value_semantic_name[reg->Register.Index]) {
|
case TGSI_SEMANTIC_INSTANCEID:
|
res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.instance_id);
|
atype = TGSI_TYPE_UNSIGNED;
|
break;
|
|
case TGSI_SEMANTIC_VERTEXID:
|
res = bld->system_values.vertex_id;
|
atype = TGSI_TYPE_UNSIGNED;
|
break;
|
|
case TGSI_SEMANTIC_VERTEXID_NOBASE:
|
res = bld->system_values.vertex_id_nobase;
|
atype = TGSI_TYPE_UNSIGNED;
|
break;
|
|
case TGSI_SEMANTIC_BASEVERTEX:
|
res = bld->system_values.basevertex;
|
atype = TGSI_TYPE_UNSIGNED;
|
break;
|
|
case TGSI_SEMANTIC_PRIMID:
|
res = bld->system_values.prim_id;
|
atype = TGSI_TYPE_UNSIGNED;
|
break;
|
|
case TGSI_SEMANTIC_INVOCATIONID:
|
res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.invocation_id);
|
atype = TGSI_TYPE_UNSIGNED;
|
break;
|
|
default:
|
assert(!"unexpected semantic in emit_fetch_system_value");
|
res = bld_base->base.zero;
|
atype = TGSI_TYPE_FLOAT;
|
break;
|
}
|
|
if (atype != stype) {
|
if (stype == TGSI_TYPE_FLOAT) {
|
res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
|
} else if (stype == TGSI_TYPE_UNSIGNED) {
|
res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
|
} else if (stype == TGSI_TYPE_SIGNED) {
|
res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
|
}
|
}
|
|
return res;
|
}
|
|
/**
|
* Register fetch with derivatives.
|
*/
|
static void
|
emit_fetch_deriv(
|
struct lp_build_tgsi_soa_context *bld,
|
LLVMValueRef src,
|
LLVMValueRef *res,
|
LLVMValueRef *ddx,
|
LLVMValueRef *ddy)
|
{
|
if (res)
|
*res = src;
|
|
/* TODO: use interpolation coeffs for inputs */
|
|
if (ddx)
|
*ddx = lp_build_ddx(&bld->bld_base.base, src);
|
|
if (ddy)
|
*ddy = lp_build_ddy(&bld->bld_base.base, src);
|
}
|
|
/**
|
* store an array of vec-length 64-bit into two arrays of vec_length floats
|
* i.e.
|
* value is d0, d1, d2, d3 etc.
|
* each 64-bit has high and low pieces x, y
|
* so gets stored into the separate channels as:
|
* chan_ptr = d0.x, d1.x, d2.x, d3.x
|
* chan_ptr2 = d0.y, d1.y, d2.y, d3.y
|
*/
|
static void
|
emit_store_64bit_chan(struct lp_build_tgsi_context *bld_base,
|
LLVMValueRef chan_ptr, LLVMValueRef chan_ptr2,
|
LLVMValueRef value)
|
{
|
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
|
struct gallivm_state *gallivm = bld_base->base.gallivm;
|
LLVMBuilderRef builder = gallivm->builder;
|
struct lp_build_context *float_bld = &bld_base->base;
|
unsigned i;
|
LLVMValueRef temp, temp2;
|
LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH/32];
|
LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32];
|
|
for (i = 0; i < bld_base->base.type.length; i++) {
|
shuffles[i] = lp_build_const_int32(gallivm, i * 2);
|
shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
|
}
|
|
temp = LLVMBuildShuffleVector(builder, value,
|
LLVMGetUndef(LLVMTypeOf(value)),
|
LLVMConstVector(shuffles,
|
bld_base->base.type.length),
|
"");
|
temp2 = LLVMBuildShuffleVector(builder, value,
|
LLVMGetUndef(LLVMTypeOf(value)),
|
LLVMConstVector(shuffles2,
|
bld_base->base.type.length),
|
"");
|
|
lp_exec_mask_store(&bld->exec_mask, float_bld, temp, chan_ptr);
|
lp_exec_mask_store(&bld->exec_mask, float_bld, temp2, chan_ptr2);
|
}
|
|
/**
|
* Register store.
|
*/
|
static void
|
emit_store_chan(
|
struct lp_build_tgsi_context *bld_base,
|
const struct tgsi_full_instruction *inst,
|
unsigned index,
|
unsigned chan_index,
|
LLVMValueRef value)
|
{
|
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
|
struct gallivm_state *gallivm = bld_base->base.gallivm;
|
LLVMBuilderRef builder = gallivm->builder;
|
const struct tgsi_full_dst_register *reg = &inst->Dst[index];
|
struct lp_build_context *float_bld = &bld_base->base;
|
struct lp_build_context *int_bld = &bld_base->int_bld;
|
LLVMValueRef indirect_index = NULL;
|
enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index);
|
|
/*
|
* Apply saturation.
|
*
|
* It is always assumed to be float.
|
*/
|
if (inst->Instruction.Saturate) {
|
assert(dtype == TGSI_TYPE_FLOAT ||
|
dtype == TGSI_TYPE_UNTYPED);
|
value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
|
value = lp_build_clamp_zero_one_nanzero(float_bld, value);
|
}
|
|
if (reg->Register.Indirect) {
|
/*
|
* Currently the mesa/st doesn't generate indirect stores
|
* to 64-bit values, it normally uses MOV to do indirect stores.
|
*/
|
assert(!tgsi_type_is_64bit(dtype));
|
indirect_index = get_indirect_index(bld,
|
reg->Register.File,
|
reg->Register.Index,
|
®->Indirect);
|
} else {
|
assert(reg->Register.Index <=
|
bld_base->info->file_max[reg->Register.File]);
|
}
|
|
if (DEBUG_EXECUTION) {
|
emit_dump_reg(gallivm, reg->Register.File, reg->Register.Index, chan_index, value);
|
}
|
|
switch( reg->Register.File ) {
|
case TGSI_FILE_OUTPUT:
|
/* Outputs are always stored as floats */
|
value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
|
|
if (reg->Register.Indirect) {
|
LLVMValueRef index_vec; /* indexes into the output registers */
|
LLVMValueRef outputs_array;
|
LLVMTypeRef fptr_type;
|
|
index_vec = get_soa_array_offsets(&bld_base->uint_bld,
|
indirect_index,
|
chan_index,
|
TRUE);
|
|
fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
|
outputs_array = LLVMBuildBitCast(builder, bld->outputs_array, fptr_type, "");
|
|
/* Scatter store values into output registers */
|
emit_mask_scatter(bld, outputs_array, index_vec, value,
|
&bld->exec_mask);
|
}
|
else {
|
LLVMValueRef out_ptr = lp_get_output_ptr(bld, reg->Register.Index,
|
chan_index);
|
|
if (tgsi_type_is_64bit(dtype)) {
|
LLVMValueRef out_ptr2 = lp_get_output_ptr(bld, reg->Register.Index,
|
chan_index + 1);
|
emit_store_64bit_chan(bld_base, out_ptr, out_ptr2,
|
value);
|
} else
|
lp_exec_mask_store(&bld->exec_mask, float_bld, value, out_ptr);
|
}
|
break;
|
|
case TGSI_FILE_TEMPORARY:
|
/* Temporaries are always stored as floats */
|
if (!tgsi_type_is_64bit(dtype))
|
value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
|
else
|
value = LLVMBuildBitCast(builder, value, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2), "");
|
|
if (reg->Register.Indirect) {
|
LLVMValueRef index_vec; /* indexes into the temp registers */
|
LLVMValueRef temps_array;
|
LLVMTypeRef fptr_type;
|
|
index_vec = get_soa_array_offsets(&bld_base->uint_bld,
|
indirect_index,
|
chan_index,
|
TRUE);
|
|
fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
|
temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, "");
|
|
/* Scatter store values into temp registers */
|
emit_mask_scatter(bld, temps_array, index_vec, value,
|
&bld->exec_mask);
|
}
|
else {
|
LLVMValueRef temp_ptr;
|
temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, chan_index);
|
|
if (tgsi_type_is_64bit(dtype)) {
|
LLVMValueRef temp_ptr2 = lp_get_temp_ptr_soa(bld,
|
reg->Register.Index,
|
chan_index + 1);
|
emit_store_64bit_chan(bld_base, temp_ptr, temp_ptr2,
|
value);
|
}
|
else
|
lp_exec_mask_store(&bld->exec_mask, float_bld, value, temp_ptr);
|
}
|
break;
|
|
case TGSI_FILE_ADDRESS:
|
assert(dtype == TGSI_TYPE_SIGNED);
|
assert(LLVMTypeOf(value) == int_bld->vec_type);
|
value = LLVMBuildBitCast(builder, value, int_bld->vec_type, "");
|
lp_exec_mask_store(&bld->exec_mask, int_bld, value,
|
bld->addr[reg->Register.Index][chan_index]);
|
break;
|
|
default:
|
assert( 0 );
|
}
|
|
(void)dtype;
|
}
|
|
/*
|
* Called at the beginning of the translation of each TGSI instruction, to
|
* emit some debug code.
|
*/
|
static void
|
emit_debug(
|
struct lp_build_tgsi_context * bld_base,
|
const struct tgsi_full_instruction * inst,
|
const struct tgsi_opcode_info * info)
|
|
{
|
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
|
|
if (DEBUG_EXECUTION) {
|
/*
|
* Dump the TGSI instruction.
|
*/
|
|
struct gallivm_state *gallivm = bld_base->base.gallivm;
|
char buf[512];
|
buf[0] = '$';
|
buf[1] = ' ';
|
tgsi_dump_instruction_str(inst, bld_base->pc, &buf[2], sizeof buf - 2);
|
lp_build_printf(gallivm, buf);
|
|
/* Dump the execution mask.
|
*/
|
if (bld->exec_mask.has_mask) {
|
lp_build_print_value(gallivm, " mask = ", bld->exec_mask.exec_mask);
|
}
|
}
|
}
|
|
static void
|
emit_store(
|
struct lp_build_tgsi_context * bld_base,
|
const struct tgsi_full_instruction * inst,
|
const struct tgsi_opcode_info * info,
|
unsigned index,
|
LLVMValueRef dst[4])
|
|
{
|
enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index);
|
|
unsigned writemask = inst->Dst[index].Register.WriteMask;
|
while (writemask) {
|
unsigned chan_index = u_bit_scan(&writemask);
|
if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3))
|
continue;
|
emit_store_chan(bld_base, inst, index, chan_index, dst[chan_index]);
|
}
|
}
|
|
static unsigned
|
tgsi_to_pipe_tex_target(unsigned tgsi_target)
|
{
|
switch (tgsi_target) {
|
case TGSI_TEXTURE_BUFFER:
|
return PIPE_BUFFER;
|
case TGSI_TEXTURE_1D:
|
case TGSI_TEXTURE_SHADOW1D:
|
return PIPE_TEXTURE_1D;
|
case TGSI_TEXTURE_2D:
|
case TGSI_TEXTURE_SHADOW2D:
|
case TGSI_TEXTURE_2D_MSAA:
|
return PIPE_TEXTURE_2D;
|
case TGSI_TEXTURE_3D:
|
return PIPE_TEXTURE_3D;
|
case TGSI_TEXTURE_CUBE:
|
case TGSI_TEXTURE_SHADOWCUBE:
|
return PIPE_TEXTURE_CUBE;
|
case TGSI_TEXTURE_RECT:
|
case TGSI_TEXTURE_SHADOWRECT:
|
return PIPE_TEXTURE_RECT;
|
case TGSI_TEXTURE_1D_ARRAY:
|
case TGSI_TEXTURE_SHADOW1D_ARRAY:
|
return PIPE_TEXTURE_1D_ARRAY;
|
case TGSI_TEXTURE_2D_ARRAY:
|
case TGSI_TEXTURE_SHADOW2D_ARRAY:
|
case TGSI_TEXTURE_2D_ARRAY_MSAA:
|
return PIPE_TEXTURE_2D_ARRAY;
|
case TGSI_TEXTURE_CUBE_ARRAY:
|
case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
|
return PIPE_TEXTURE_CUBE_ARRAY;
|
default:
|
assert(0);
|
return PIPE_BUFFER;
|
}
|
}
|
|
|
static enum lp_sampler_lod_property
|
lp_build_lod_property(
|
struct lp_build_tgsi_context *bld_base,
|
const struct tgsi_full_instruction *inst,
|
unsigned src_op)
|
{
|
const struct tgsi_full_src_register *reg = &inst->Src[src_op];
|
enum lp_sampler_lod_property lod_property;
|
|
/*
|
* Not much we can do here. We could try catching inputs declared
|
* with constant interpolation but not sure it's worth it - since for
|
* TEX opcodes as well as FETCH/LD the lod comes from same reg as
|
* the coords, so it could only work for SAMPLE/TXQ/SVIEWINFO), just
|
* like the constant/immediate recognition below.
|
* What seems to be of more value would be to recognize temps holding
|
* broadcasted scalars but no way we can do it.
|
* Tried asking llvm but without any success (using LLVMIsConstant
|
* even though this isn't exactly what we'd need), even as simple as
|
* IMM[0] UINT32 (0,-1,0,0)
|
* MOV TEMP[0] IMM[0].yyyy
|
* SVIEWINFO TEMP[1], TEMP[0].xxxx, SVIEWINFO[0]
|
* doesn't work.
|
* This means there's ZERO chance this will ever catch a scalar lod
|
* with traditional tex opcodes as well as texel fetches, since the lod
|
* comes from the same reg as coords (except some test shaders using
|
* constant coords maybe).
|
* There's at least hope for sample opcodes as well as size queries.
|
*/
|
if (reg->Register.File == TGSI_FILE_CONSTANT ||
|
reg->Register.File == TGSI_FILE_IMMEDIATE) {
|
lod_property = LP_SAMPLER_LOD_SCALAR;
|
}
|
else if (bld_base->info->processor == PIPE_SHADER_FRAGMENT) {
|
if (gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) {
|
lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
|
}
|
else {
|
lod_property = LP_SAMPLER_LOD_PER_QUAD;
|
}
|
}
|
else {
|
/* never use scalar (per-quad) lod the results are just too wrong. */
|
lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
|
}
|
return lod_property;
|
}
|
|
|
/**
|
* High-level instruction translators.
|
*/
|
|
static void
|
emit_tex( struct lp_build_tgsi_soa_context *bld,
|
const struct tgsi_full_instruction *inst,
|
enum lp_build_tex_modifier modifier,
|
LLVMValueRef *texel,
|
unsigned sampler_reg,
|
enum lp_sampler_op_type sampler_op)
|
{
|
unsigned unit = inst->Src[sampler_reg].Register.Index;
|
LLVMValueRef oow = NULL;
|
LLVMValueRef lod = NULL;
|
LLVMValueRef coords[5];
|
LLVMValueRef offsets[3] = { NULL };
|
struct lp_derivatives derivs;
|
struct lp_sampler_params params;
|
enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
|
unsigned num_derivs, num_offsets, i;
|
unsigned shadow_coord = 0;
|
unsigned layer_coord = 0;
|
unsigned sample_key = sampler_op << LP_SAMPLER_OP_TYPE_SHIFT;
|
|
memset(¶ms, 0, sizeof(params));
|
|
if (!bld->sampler) {
|
_debug_printf("warning: found texture instruction but no sampler generator supplied\n");
|
for (i = 0; i < 4; i++) {
|
texel[i] = bld->bld_base.base.undef;
|
}
|
return;
|
}
|
|
switch (inst->Texture.Texture) {
|
case TGSI_TEXTURE_1D_ARRAY:
|
layer_coord = 1;
|
/* fallthrough */
|
case TGSI_TEXTURE_1D:
|
num_offsets = 1;
|
num_derivs = 1;
|
break;
|
case TGSI_TEXTURE_2D_ARRAY:
|
layer_coord = 2;
|
/* fallthrough */
|
case TGSI_TEXTURE_2D:
|
case TGSI_TEXTURE_RECT:
|
num_offsets = 2;
|
num_derivs = 2;
|
break;
|
case TGSI_TEXTURE_SHADOW1D_ARRAY:
|
layer_coord = 1;
|
/* fallthrough */
|
case TGSI_TEXTURE_SHADOW1D:
|
shadow_coord = 2;
|
num_offsets = 1;
|
num_derivs = 1;
|
break;
|
case TGSI_TEXTURE_SHADOW2D_ARRAY:
|
layer_coord = 2;
|
shadow_coord = 3;
|
num_offsets = 2;
|
num_derivs = 2;
|
break;
|
case TGSI_TEXTURE_SHADOW2D:
|
case TGSI_TEXTURE_SHADOWRECT:
|
shadow_coord = 2;
|
num_offsets = 2;
|
num_derivs = 2;
|
break;
|
case TGSI_TEXTURE_CUBE:
|
num_offsets = 2;
|
num_derivs = 3;
|
break;
|
case TGSI_TEXTURE_3D:
|
num_offsets = 3;
|
num_derivs = 3;
|
break;
|
case TGSI_TEXTURE_SHADOWCUBE:
|
shadow_coord = 3;
|
num_offsets = 2;
|
num_derivs = 3;
|
break;
|
case TGSI_TEXTURE_CUBE_ARRAY:
|
num_offsets = 2;
|
num_derivs = 3;
|
layer_coord = 3;
|
break;
|
case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
|
num_offsets = 2;
|
num_derivs = 3;
|
layer_coord = 3;
|
shadow_coord = 4; /* shadow coord special different reg */
|
break;
|
case TGSI_TEXTURE_2D_MSAA:
|
case TGSI_TEXTURE_2D_ARRAY_MSAA:
|
default:
|
assert(0);
|
return;
|
}
|
|
/* Note lod and especially projected are illegal in a LOT of cases */
|
if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
|
modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
|
if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
|
inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY) {
|
/* note that shadow cube array with bias/explicit lod does not exist */
|
lod = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0);
|
}
|
else {
|
lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
|
}
|
if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
|
sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT;
|
}
|
else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
|
sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
|
}
|
lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
|
}
|
|
if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) {
|
oow = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
|
oow = lp_build_rcp(&bld->bld_base.base, oow);
|
}
|
|
for (i = 0; i < num_derivs; i++) {
|
coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
|
if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
|
coords[i] = lp_build_mul(&bld->bld_base.base, coords[i], oow);
|
}
|
for (i = num_derivs; i < 5; i++) {
|
coords[i] = bld->bld_base.base.undef;
|
}
|
|
/* Layer coord always goes into 3rd slot, except for cube map arrays */
|
if (layer_coord) {
|
if (layer_coord == 3) {
|
coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
|
}
|
else {
|
coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
|
}
|
if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
|
coords[2] = lp_build_mul(&bld->bld_base.base, coords[2], oow);
|
}
|
/* Shadow coord occupies always 5th slot. */
|
if (shadow_coord) {
|
sample_key |= LP_SAMPLER_SHADOW;
|
if (shadow_coord == 4) {
|
coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0);
|
}
|
else {
|
coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 0, shadow_coord);
|
}
|
if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
|
coords[4] = lp_build_mul(&bld->bld_base.base, coords[4], oow);
|
}
|
|
if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
|
unsigned dim;
|
sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT;
|
for (dim = 0; dim < num_derivs; ++dim) {
|
derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 1, dim);
|
derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 2, dim);
|
}
|
params.derivs = &derivs;
|
/*
|
* could also check all src regs if constant but I doubt such
|
* cases exist in practice.
|
*/
|
if (bld->bld_base.info->processor == PIPE_SHADER_FRAGMENT) {
|
if (gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) {
|
lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
|
}
|
else {
|
lod_property = LP_SAMPLER_LOD_PER_QUAD;
|
}
|
}
|
else {
|
lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
|
}
|
}
|
sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
|
|
/* we don't handle the 4 offset version of tg4 */
|
if (inst->Texture.NumOffsets == 1) {
|
unsigned dim;
|
sample_key |= LP_SAMPLER_OFFSETS;
|
for (dim = 0; dim < num_offsets; dim++) {
|
offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
|
}
|
}
|
|
params.type = bld->bld_base.base.type;
|
params.sample_key = sample_key;
|
params.texture_index = unit;
|
params.sampler_index = unit;
|
params.context_ptr = bld->context_ptr;
|
params.thread_data_ptr = bld->thread_data_ptr;
|
params.coords = coords;
|
params.offsets = offsets;
|
params.lod = lod;
|
params.texel = texel;
|
|
bld->sampler->emit_tex_sample(bld->sampler,
|
bld->bld_base.base.gallivm,
|
¶ms);
|
}
|
|
static void
|
emit_sample(struct lp_build_tgsi_soa_context *bld,
|
const struct tgsi_full_instruction *inst,
|
enum lp_build_tex_modifier modifier,
|
boolean compare,
|
enum lp_sampler_op_type sample_type,
|
LLVMValueRef *texel)
|
{
|
struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
|
unsigned texture_unit, sampler_unit;
|
LLVMValueRef lod = NULL;
|
LLVMValueRef coords[5];
|
LLVMValueRef offsets[3] = { NULL };
|
struct lp_derivatives derivs;
|
struct lp_sampler_params params;
|
enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
|
|
unsigned num_offsets, num_derivs, i;
|
unsigned layer_coord = 0;
|
unsigned sample_key = sample_type << LP_SAMPLER_OP_TYPE_SHIFT;
|
|
memset(¶ms, 0, sizeof(params));
|
|
if (!bld->sampler) {
|
_debug_printf("warning: found texture instruction but no sampler generator supplied\n");
|
for (i = 0; i < 4; i++) {
|
texel[i] = bld->bld_base.base.undef;
|
}
|
return;
|
}
|
|
/*
|
* unlike old-style tex opcodes the texture/sampler indices
|
* always come from src1 and src2 respectively.
|
*/
|
texture_unit = inst->Src[1].Register.Index;
|
sampler_unit = inst->Src[2].Register.Index;
|
|
/*
|
* Note inst->Texture.Texture will contain the number of offsets,
|
* however the target information is NOT there and comes from the
|
* declared sampler views instead.
|
*/
|
switch (bld->sv[texture_unit].Resource) {
|
case TGSI_TEXTURE_1D:
|
num_offsets = 1;
|
num_derivs = 1;
|
break;
|
case TGSI_TEXTURE_1D_ARRAY:
|
layer_coord = 1;
|
num_offsets = 1;
|
num_derivs = 1;
|
break;
|
case TGSI_TEXTURE_2D:
|
case TGSI_TEXTURE_RECT:
|
num_offsets = 2;
|
num_derivs = 2;
|
break;
|
case TGSI_TEXTURE_2D_ARRAY:
|
layer_coord = 2;
|
num_offsets = 2;
|
num_derivs = 2;
|
break;
|
case TGSI_TEXTURE_CUBE:
|
num_offsets = 2;
|
num_derivs = 3;
|
break;
|
case TGSI_TEXTURE_3D:
|
num_offsets = 3;
|
num_derivs = 3;
|
break;
|
case TGSI_TEXTURE_CUBE_ARRAY:
|
layer_coord = 3;
|
num_offsets = 2;
|
num_derivs = 3;
|
break;
|
default:
|
assert(0);
|
return;
|
}
|
|
if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
|
modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
|
lod = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0);
|
if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
|
sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT;
|
}
|
else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
|
sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
|
}
|
lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
|
}
|
else if (modifier == LP_BLD_TEX_MODIFIER_LOD_ZERO) {
|
/* XXX might be better to explicitly pass the level zero information */
|
sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
|
lod = lp_build_const_vec(gallivm, bld->bld_base.base.type, 0.0F);
|
}
|
|
for (i = 0; i < num_derivs; i++) {
|
coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
|
}
|
for (i = num_derivs; i < 5; i++) {
|
coords[i] = bld->bld_base.base.undef;
|
}
|
|
/* Layer coord always goes into 3rd slot, except for cube map arrays */
|
if (layer_coord) {
|
if (layer_coord == 3)
|
coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
|
else
|
coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
|
}
|
/* Shadow coord occupies always 5th slot. */
|
if (compare) {
|
sample_key |= LP_SAMPLER_SHADOW;
|
coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0);
|
}
|
|
if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
|
unsigned dim;
|
sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT;
|
for (dim = 0; dim < num_derivs; ++dim) {
|
derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 3, dim);
|
derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 4, dim);
|
}
|
params.derivs = &derivs;
|
/*
|
* could also check all src regs if constant but I doubt such
|
* cases exist in practice.
|
*/
|
if (bld->bld_base.info->processor == PIPE_SHADER_FRAGMENT) {
|
if (gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) {
|
lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
|
}
|
else {
|
lod_property = LP_SAMPLER_LOD_PER_QUAD;
|
}
|
}
|
else {
|
lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
|
}
|
}
|
|
/* some advanced gather instructions (txgo) would require 4 offsets */
|
if (inst->Texture.NumOffsets == 1) {
|
unsigned dim;
|
sample_key |= LP_SAMPLER_OFFSETS;
|
for (dim = 0; dim < num_offsets; dim++) {
|
offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
|
}
|
}
|
sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
|
|
params.type = bld->bld_base.base.type;
|
params.sample_key = sample_key;
|
params.texture_index = texture_unit;
|
params.sampler_index = sampler_unit;
|
params.context_ptr = bld->context_ptr;
|
params.thread_data_ptr = bld->thread_data_ptr;
|
params.coords = coords;
|
params.offsets = offsets;
|
params.lod = lod;
|
params.texel = texel;
|
|
bld->sampler->emit_tex_sample(bld->sampler,
|
bld->bld_base.base.gallivm,
|
¶ms);
|
|
if (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_X ||
|
inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_Y ||
|
inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_Z ||
|
inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_W) {
|
unsigned char swizzles[4];
|
swizzles[0] = inst->Src[1].Register.SwizzleX;
|
swizzles[1] = inst->Src[1].Register.SwizzleY;
|
swizzles[2] = inst->Src[1].Register.SwizzleZ;
|
swizzles[3] = inst->Src[1].Register.SwizzleW;
|
|
lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles);
|
}
|
}
|
|
static void
|
emit_fetch_texels( struct lp_build_tgsi_soa_context *bld,
|
const struct tgsi_full_instruction *inst,
|
LLVMValueRef *texel,
|
boolean is_samplei)
|
{
|
unsigned unit, target;
|
LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
|
LLVMValueRef explicit_lod = NULL;
|
LLVMValueRef coords[5];
|
LLVMValueRef offsets[3] = { NULL };
|
struct lp_sampler_params params;
|
enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
|
unsigned dims, i;
|
unsigned layer_coord = 0;
|
unsigned sample_key = LP_SAMPLER_OP_FETCH << LP_SAMPLER_OP_TYPE_SHIFT;
|
|
memset(¶ms, 0, sizeof(params));
|
|
if (!bld->sampler) {
|
_debug_printf("warning: found texture instruction but no sampler generator supplied\n");
|
for (i = 0; i < 4; i++) {
|
texel[i] = coord_undef;
|
}
|
return;
|
}
|
|
unit = inst->Src[1].Register.Index;
|
|
if (is_samplei) {
|
target = bld->sv[unit].Resource;
|
}
|
else {
|
target = inst->Texture.Texture;
|
}
|
|
switch (target) {
|
case TGSI_TEXTURE_1D:
|
case TGSI_TEXTURE_BUFFER:
|
dims = 1;
|
break;
|
case TGSI_TEXTURE_1D_ARRAY:
|
layer_coord = 1;
|
dims = 1;
|
break;
|
case TGSI_TEXTURE_2D:
|
case TGSI_TEXTURE_RECT:
|
case TGSI_TEXTURE_2D_MSAA:
|
dims = 2;
|
break;
|
case TGSI_TEXTURE_2D_ARRAY:
|
case TGSI_TEXTURE_2D_ARRAY_MSAA:
|
layer_coord = 2;
|
dims = 2;
|
break;
|
case TGSI_TEXTURE_3D:
|
dims = 3;
|
break;
|
default:
|
assert(0);
|
return;
|
}
|
|
/* always have lod except for buffers and msaa targets ? */
|
if (target != TGSI_TEXTURE_BUFFER &&
|
target != TGSI_TEXTURE_2D_MSAA &&
|
target != TGSI_TEXTURE_2D_ARRAY_MSAA) {
|
sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
|
explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
|
lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
|
}
|
/*
|
* XXX: for real msaa support, the w component (or src2.x for sample_i_ms)
|
* would be the sample index.
|
*/
|
|
for (i = 0; i < dims; i++) {
|
coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
|
}
|
/* never use more than 3 coords here but emit_fetch_texel copies all 5 anyway */
|
for (i = dims; i < 5; i++) {
|
coords[i] = coord_undef;
|
}
|
if (layer_coord)
|
coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
|
|
if (inst->Texture.NumOffsets == 1) {
|
unsigned dim;
|
sample_key |= LP_SAMPLER_OFFSETS;
|
for (dim = 0; dim < dims; dim++) {
|
offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
|
}
|
}
|
sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
|
|
params.type = bld->bld_base.base.type;
|
params.sample_key = sample_key;
|
params.texture_index = unit;
|
/*
|
* sampler not actually used, set to 0 so it won't exceed PIPE_MAX_SAMPLERS
|
* and trigger some assertions with d3d10 where the sampler view number
|
* can exceed this.
|
*/
|
params.sampler_index = 0;
|
params.context_ptr = bld->context_ptr;
|
params.thread_data_ptr = bld->thread_data_ptr;
|
params.coords = coords;
|
params.offsets = offsets;
|
params.derivs = NULL;
|
params.lod = explicit_lod;
|
params.texel = texel;
|
|
bld->sampler->emit_tex_sample(bld->sampler,
|
bld->bld_base.base.gallivm,
|
¶ms);
|
|
if (is_samplei &&
|
(inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_X ||
|
inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_Y ||
|
inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_Z ||
|
inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_W)) {
|
unsigned char swizzles[4];
|
swizzles[0] = inst->Src[1].Register.SwizzleX;
|
swizzles[1] = inst->Src[1].Register.SwizzleY;
|
swizzles[2] = inst->Src[1].Register.SwizzleZ;
|
swizzles[3] = inst->Src[1].Register.SwizzleW;
|
|
lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles);
|
}
|
}
|
|
static void
|
emit_size_query( struct lp_build_tgsi_soa_context *bld,
|
const struct tgsi_full_instruction *inst,
|
LLVMValueRef *sizes_out,
|
boolean is_sviewinfo)
|
{
|
LLVMValueRef explicit_lod;
|
enum lp_sampler_lod_property lod_property;
|
unsigned has_lod;
|
unsigned i;
|
unsigned unit = inst->Src[1].Register.Index;
|
unsigned target, pipe_target;
|
struct lp_sampler_size_query_params params;
|
|
if (is_sviewinfo) {
|
target = bld->sv[unit].Resource;
|
}
|
else {
|
target = inst->Texture.Texture;
|
}
|
switch (target) {
|
case TGSI_TEXTURE_BUFFER:
|
case TGSI_TEXTURE_RECT:
|
case TGSI_TEXTURE_SHADOWRECT:
|
has_lod = 0;
|
break;
|
default:
|
has_lod = 1;
|
break;
|
}
|
|
if (!bld->sampler) {
|
_debug_printf("warning: found texture query instruction but no sampler generator supplied\n");
|
for (i = 0; i < 4; i++)
|
sizes_out[i] = bld->bld_base.int_bld.undef;
|
return;
|
}
|
|
if (has_lod) {
|
explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 0);
|
lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
|
}
|
else {
|
explicit_lod = NULL;
|
lod_property = LP_SAMPLER_LOD_SCALAR;
|
}
|
|
|
pipe_target = tgsi_to_pipe_tex_target(target);
|
|
params.int_type = bld->bld_base.int_bld.type;
|
params.texture_unit = unit;
|
params.target = pipe_target;
|
params.context_ptr = bld->context_ptr;
|
params.is_sviewinfo = TRUE;
|
params.lod_property = lod_property;
|
params.explicit_lod = explicit_lod;
|
params.sizes_out = sizes_out;
|
|
bld->sampler->emit_size_query(bld->sampler,
|
bld->bld_base.base.gallivm,
|
¶ms);
|
}
|
|
static boolean
|
near_end_of_shader(struct lp_build_tgsi_soa_context *bld,
|
int pc)
|
{
|
unsigned i;
|
|
for (i = 0; i < 5; i++) {
|
unsigned opcode;
|
|
if (pc + i >= bld->bld_base.info->num_instructions)
|
return TRUE;
|
|
opcode = bld->bld_base.instructions[pc + i].Instruction.Opcode;
|
|
if (opcode == TGSI_OPCODE_END)
|
return TRUE;
|
|
if (opcode == TGSI_OPCODE_TEX ||
|
opcode == TGSI_OPCODE_TXP ||
|
opcode == TGSI_OPCODE_TXD ||
|
opcode == TGSI_OPCODE_TXB ||
|
opcode == TGSI_OPCODE_TXL ||
|
opcode == TGSI_OPCODE_TXF ||
|
opcode == TGSI_OPCODE_TXQ ||
|
opcode == TGSI_OPCODE_TEX2 ||
|
opcode == TGSI_OPCODE_TXB2 ||
|
opcode == TGSI_OPCODE_TXL2 ||
|
opcode == TGSI_OPCODE_SAMPLE ||
|
opcode == TGSI_OPCODE_SAMPLE_B ||
|
opcode == TGSI_OPCODE_SAMPLE_C ||
|
opcode == TGSI_OPCODE_SAMPLE_C_LZ ||
|
opcode == TGSI_OPCODE_SAMPLE_D ||
|
opcode == TGSI_OPCODE_SAMPLE_I ||
|
opcode == TGSI_OPCODE_SAMPLE_I_MS ||
|
opcode == TGSI_OPCODE_SAMPLE_L ||
|
opcode == TGSI_OPCODE_SVIEWINFO ||
|
opcode == TGSI_OPCODE_CAL ||
|
opcode == TGSI_OPCODE_IF ||
|
opcode == TGSI_OPCODE_UIF ||
|
opcode == TGSI_OPCODE_BGNLOOP ||
|
opcode == TGSI_OPCODE_SWITCH)
|
return FALSE;
|
}
|
|
return TRUE;
|
}
|
|
|
|
/**
|
* Kill fragment if any of the src register values are negative.
|
*/
|
static void
|
emit_kill_if(
|
struct lp_build_tgsi_soa_context *bld,
|
const struct tgsi_full_instruction *inst,
|
int pc)
|
{
|
LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
|
const struct tgsi_full_src_register *reg = &inst->Src[0];
|
LLVMValueRef terms[TGSI_NUM_CHANNELS];
|
LLVMValueRef mask;
|
unsigned chan_index;
|
|
memset(&terms, 0, sizeof terms);
|
|
TGSI_FOR_EACH_CHANNEL( chan_index ) {
|
unsigned swizzle;
|
|
/* Unswizzle channel */
|
swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
|
|
/* Check if the component has not been already tested. */
|
assert(swizzle < TGSI_NUM_CHANNELS);
|
if( !terms[swizzle] )
|
/* TODO: change the comparison operator instead of setting the sign */
|
terms[swizzle] = lp_build_emit_fetch(&bld->bld_base, inst, 0, chan_index );
|
}
|
|
mask = NULL;
|
TGSI_FOR_EACH_CHANNEL( chan_index ) {
|
if(terms[chan_index]) {
|
LLVMValueRef chan_mask;
|
|
/*
|
* If term < 0 then mask = 0 else mask = ~0.
|
*/
|
chan_mask = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->bld_base.base.zero);
|
|
if(mask)
|
mask = LLVMBuildAnd(builder, mask, chan_mask, "");
|
else
|
mask = chan_mask;
|
}
|
}
|
|
if (bld->exec_mask.has_mask) {
|
LLVMValueRef invmask;
|
invmask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
|
mask = LLVMBuildOr(builder, mask, invmask, "");
|
}
|
|
lp_build_mask_update(bld->mask, mask);
|
if (!near_end_of_shader(bld, pc))
|
lp_build_mask_check(bld->mask);
|
}
|
|
|
/**
|
* Unconditional fragment kill.
|
* The only predication is the execution mask which will apply if
|
* we're inside a loop or conditional.
|
*/
|
static void
|
emit_kill(struct lp_build_tgsi_soa_context *bld,
|
int pc)
|
{
|
LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
|
LLVMValueRef mask;
|
|
/* For those channels which are "alive", disable fragment shader
|
* execution.
|
*/
|
if (bld->exec_mask.has_mask) {
|
mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
|
}
|
else {
|
LLVMValueRef zero = LLVMConstNull(bld->bld_base.base.int_vec_type);
|
mask = zero;
|
}
|
|
lp_build_mask_update(bld->mask, mask);
|
|
if (!near_end_of_shader(bld, pc))
|
lp_build_mask_check(bld->mask);
|
}
|
|
|
/**
|
* Emit code which will dump the value of all the temporary registers
|
* to stdout.
|
*/
|
static void
|
emit_dump_file(struct lp_build_tgsi_soa_context *bld,
|
unsigned file)
|
{
|
const struct tgsi_shader_info *info = bld->bld_base.info;
|
struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
|
LLVMBuilderRef builder = gallivm->builder;
|
LLVMValueRef reg_ptr;
|
int index;
|
int max_index = info->file_max[file];
|
|
/*
|
* Some register files, particularly constants, can be very large,
|
* and dumping everything could make this unusably slow.
|
*/
|
max_index = MIN2(max_index, 32);
|
|
for (index = 0; index <= max_index; index++) {
|
LLVMValueRef res;
|
unsigned mask;
|
int chan;
|
|
if (index < 8 * sizeof(unsigned) &&
|
(info->file_mask[file] & (1u << index)) == 0) {
|
/* This was not declared.*/
|
continue;
|
}
|
|
if (file == TGSI_FILE_INPUT) {
|
mask = info->input_usage_mask[index];
|
} else {
|
mask = TGSI_WRITEMASK_XYZW;
|
}
|
|
for (chan = 0; chan < 4; chan++) {
|
if ((mask & (1 << chan)) == 0) {
|
/* This channel is not used.*/
|
continue;
|
}
|
|
if (file == TGSI_FILE_CONSTANT) {
|
struct tgsi_full_src_register reg;
|
memset(®, 0, sizeof reg);
|
reg.Register.File = file;
|
reg.Register.Index = index;
|
reg.Register.SwizzleX = 0;
|
reg.Register.SwizzleY = 1;
|
reg.Register.SwizzleZ = 2;
|
reg.Register.SwizzleW = 3;
|
|
res = bld->bld_base.emit_fetch_funcs[file](&bld->bld_base, ®, TGSI_TYPE_FLOAT, chan);
|
if (!res) {
|
continue;
|
}
|
} else if (file == TGSI_FILE_INPUT) {
|
res = bld->inputs[index][chan];
|
if (!res) {
|
continue;
|
}
|
} else if (file == TGSI_FILE_TEMPORARY) {
|
reg_ptr = lp_get_temp_ptr_soa(bld, index, chan);
|
assert(reg_ptr);
|
res = LLVMBuildLoad(builder, reg_ptr, "");
|
} else if (file == TGSI_FILE_OUTPUT) {
|
reg_ptr = lp_get_output_ptr(bld, index, chan);
|
assert(reg_ptr);
|
res = LLVMBuildLoad(builder, reg_ptr, "");
|
} else {
|
assert(0);
|
continue;
|
}
|
|
emit_dump_reg(gallivm, file, index, chan, res);
|
}
|
}
|
}
|
|
|
|
void
|
lp_emit_declaration_soa(
|
struct lp_build_tgsi_context *bld_base,
|
const struct tgsi_full_declaration *decl)
|
{
|
struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
|
struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
|
LLVMTypeRef vec_type = bld->bld_base.base.vec_type;
|
const unsigned first = decl->Range.First;
|
const unsigned last = decl->Range.Last;
|
unsigned idx, i;
|
|
assert(last <= bld->bld_base.info->file_max[decl->Declaration.File]);
|
|
switch (decl->Declaration.File) {
|
case TGSI_FILE_TEMPORARY:
|
if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) {
|
assert(last < LP_MAX_INLINED_TEMPS);
|
for (idx = first; idx <= last; ++idx) {
|
for (i = 0; i < TGSI_NUM_CHANNELS; i++)
|
bld->temps[idx][i] = lp_build_alloca(gallivm, vec_type, "temp");
|
}
|
}
|
break;
|
|
case TGSI_FILE_OUTPUT:
|
if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
|
for (idx = first; idx <= last; ++idx) {
|
for (i = 0; i < TGSI_NUM_CHANNELS; i++)
|
bld->outputs[idx][i] = lp_build_alloca(gallivm,
|
vec_type, "output");
|
}
|
}
|
break;
|
|
case TGSI_FILE_ADDRESS:
|
/* ADDR registers are only allocated with an integer LLVM IR type,
|
* as they are guaranteed to always have integers.
|
* XXX: Not sure if this exception is worthwhile (or the whole idea of
|
* an ADDR register for that matter).
|
*/
|
assert(last < LP_MAX_TGSI_ADDRS);
|
for (idx = first; idx <= last; ++idx) {
|
assert(idx < LP_MAX_TGSI_ADDRS);
|
for (i = 0; i < TGSI_NUM_CHANNELS; i++)
|
bld->addr[idx][i] = lp_build_alloca(gallivm, bld_base->base.int_vec_type, "addr");
|
}
|
break;
|
|
case TGSI_FILE_SAMPLER_VIEW:
|
/*
|
* The target stored here MUST match whatever there actually
|
* is in the set sampler views (what about return type?).
|
*/
|
assert(last < PIPE_MAX_SHADER_SAMPLER_VIEWS);
|
for (idx = first; idx <= last; ++idx) {
|
bld->sv[idx] = decl->SamplerView;
|
}
|
break;
|
|
case TGSI_FILE_CONSTANT:
|
{
|
/*
|
* We could trivially fetch the per-buffer pointer when fetching the
|
* constant, relying on llvm to figure out it's always the same pointer
|
* anyway. However, doing so results in a huge (more than factor of 10)
|
* slowdown in llvm compilation times for some (but not all) shaders
|
* (more specifically, the IR optimization spends way more time in
|
* DominatorTree::dominates). At least with llvm versions 3.1, 3.3.
|
*/
|
unsigned idx2D = decl->Dim.Index2D;
|
LLVMValueRef index2D = lp_build_const_int32(gallivm, idx2D);
|
assert(idx2D < LP_MAX_TGSI_CONST_BUFFERS);
|
bld->consts[idx2D] =
|
lp_build_array_get(gallivm, bld->consts_ptr, index2D);
|
bld->consts_sizes[idx2D] =
|
lp_build_array_get(gallivm, bld->const_sizes_ptr, index2D);
|
}
|
break;
|
|
default:
|
/* don't need to declare other vars */
|
break;
|
}
|
}
|
|
|
void lp_emit_immediate_soa(
|
struct lp_build_tgsi_context *bld_base,
|
const struct tgsi_full_immediate *imm)
|
{
|
struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
|
struct gallivm_state * gallivm = bld_base->base.gallivm;
|
LLVMValueRef imms[4];
|
unsigned i;
|
const uint size = imm->Immediate.NrTokens - 1;
|
assert(size <= 4);
|
switch (imm->Immediate.DataType) {
|
case TGSI_IMM_FLOAT32:
|
for( i = 0; i < size; ++i )
|
imms[i] =
|
lp_build_const_vec(gallivm, bld_base->base.type, imm->u[i].Float);
|
|
break;
|
case TGSI_IMM_FLOAT64:
|
case TGSI_IMM_UINT64:
|
case TGSI_IMM_INT64:
|
case TGSI_IMM_UINT32:
|
for( i = 0; i < size; ++i ) {
|
LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->uint_bld.type, imm->u[i].Uint);
|
imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type);
|
}
|
|
break;
|
case TGSI_IMM_INT32:
|
for( i = 0; i < size; ++i ) {
|
LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->int_bld.type, imm->u[i].Int);
|
imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type);
|
}
|
|
break;
|
}
|
for( i = size; i < 4; ++i )
|
imms[i] = bld_base->base.undef;
|
|
if (bld->use_immediates_array) {
|
unsigned index = bld->num_immediates;
|
struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
|
LLVMBuilderRef builder = gallivm->builder;
|
|
assert(bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE));
|
for (i = 0; i < 4; ++i ) {
|
LLVMValueRef lindex = lp_build_const_int32(
|
bld->bld_base.base.gallivm, index * 4 + i);
|
LLVMValueRef imm_ptr = LLVMBuildGEP(builder,
|
bld->imms_array, &lindex, 1, "");
|
LLVMBuildStore(builder, imms[i], imm_ptr);
|
}
|
} else {
|
/* simply copy the immediate values into the next immediates[] slot */
|
unsigned i;
|
assert(imm->Immediate.NrTokens - 1 <= 4);
|
assert(bld->num_immediates < LP_MAX_INLINED_IMMEDIATES);
|
|
for(i = 0; i < 4; ++i )
|
bld->immediates[bld->num_immediates][i] = imms[i];
|
|
if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
|
unsigned index = bld->num_immediates;
|
struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
|
LLVMBuilderRef builder = gallivm->builder;
|
for (i = 0; i < 4; ++i ) {
|
LLVMValueRef lindex = lp_build_const_int32(
|
bld->bld_base.base.gallivm, index * 4 + i);
|
LLVMValueRef imm_ptr = LLVMBuildGEP(builder,
|
bld->imms_array, &lindex, 1, "");
|
LLVMBuildStore(builder,
|
bld->immediates[index][i],
|
imm_ptr);
|
}
|
}
|
}
|
|
bld->num_immediates++;
|
}
|
|
static void
|
ddx_emit(
|
const struct lp_build_tgsi_action * action,
|
struct lp_build_tgsi_context * bld_base,
|
struct lp_build_emit_data * emit_data)
|
{
|
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
|
|
emit_fetch_deriv(bld, emit_data->args[0], NULL,
|
&emit_data->output[emit_data->chan], NULL);
|
}
|
|
static void
|
ddy_emit(
|
const struct lp_build_tgsi_action * action,
|
struct lp_build_tgsi_context * bld_base,
|
struct lp_build_emit_data * emit_data)
|
{
|
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
|
|
emit_fetch_deriv(bld, emit_data->args[0], NULL, NULL,
|
&emit_data->output[emit_data->chan]);
|
}
|
|
static void
|
kill_emit(
|
const struct lp_build_tgsi_action * action,
|
struct lp_build_tgsi_context * bld_base,
|
struct lp_build_emit_data * emit_data)
|
{
|
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
|
|
emit_kill(bld, bld_base->pc - 1);
|
}
|
|
static void
|
kill_if_emit(
|
const struct lp_build_tgsi_action * action,
|
struct lp_build_tgsi_context * bld_base,
|
struct lp_build_emit_data * emit_data)
|
{
|
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
|
|
emit_kill_if(bld, emit_data->inst, bld_base->pc - 1);
|
}
|
|
static void
|
tex_emit(
|
const struct lp_build_tgsi_action * action,
|
struct lp_build_tgsi_context * bld_base,
|
struct lp_build_emit_data * emit_data)
|
{
|
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
|
|
emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
|
emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
|
}
|
|
static void
|
tex2_emit(
|
const struct lp_build_tgsi_action * action,
|
struct lp_build_tgsi_context * bld_base,
|
struct lp_build_emit_data * emit_data)
|
{
|
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
|
|
emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
|
emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
|
}
|
|
static void
|
txb_emit(
|
const struct lp_build_tgsi_action * action,
|
struct lp_build_tgsi_context * bld_base,
|
struct lp_build_emit_data * emit_data)
|
{
|
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
|
|
emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
|
emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
|
}
|
|
static void
|
txb2_emit(
|
const struct lp_build_tgsi_action * action,
|
struct lp_build_tgsi_context * bld_base,
|
struct lp_build_emit_data * emit_data)
|
{
|
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
|
|
emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
|
emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
|
}
|
|
static void
|
txd_emit(
|
const struct lp_build_tgsi_action * action,
|
struct lp_build_tgsi_context * bld_base,
|
struct lp_build_emit_data * emit_data)
|
{
|
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
|
|
emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
|
emit_data->output, 3, LP_SAMPLER_OP_TEXTURE);
|
}
|
|
static void
|
txl_emit(
|
const struct lp_build_tgsi_action * action,
|
struct lp_build_tgsi_context * bld_base,
|
struct lp_build_emit_data * emit_data)
|
{
|
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
|
|
emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
|
emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
|
}
|
|
static void
|
txl2_emit(
|
const struct lp_build_tgsi_action * action,
|
struct lp_build_tgsi_context * bld_base,
|
struct lp_build_emit_data * emit_data)
|
{
|
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
|
|
emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
|
emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
|
}
|
|
static void
|
txp_emit(
|
const struct lp_build_tgsi_action * action,
|
struct lp_build_tgsi_context * bld_base,
|
struct lp_build_emit_data * emit_data)
|
{
|
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
|
|
emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_PROJECTED,
|
emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
|
}
|
|
static void
|
tg4_emit(
|
const struct lp_build_tgsi_action * action,
|
struct lp_build_tgsi_context * bld_base,
|
struct lp_build_emit_data * emit_data)
|
{
|
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
|
|
emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
|
emit_data->output, 2, LP_SAMPLER_OP_GATHER);
|
}
|
|
static void
|
lodq_emit(
|
const struct lp_build_tgsi_action * action,
|
struct lp_build_tgsi_context * bld_base,
|
struct lp_build_emit_data * emit_data)
|
{
|
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
|
|
emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
|
emit_data->output, 1, LP_SAMPLER_OP_LODQ);
|
}
|
|
static void
|
txq_emit(
|
const struct lp_build_tgsi_action * action,
|
struct lp_build_tgsi_context * bld_base,
|
struct lp_build_emit_data * emit_data)
|
{
|
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
|
|
emit_size_query(bld, emit_data->inst, emit_data->output, FALSE);
|
}
|
|
static void
|
txf_emit(
|
const struct lp_build_tgsi_action * action,
|
struct lp_build_tgsi_context * bld_base,
|
struct lp_build_emit_data * emit_data)
|
{
|
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
|
|
emit_fetch_texels(bld, emit_data->inst, emit_data->output, FALSE);
|
}
|
|
static void
|
sample_i_emit(
|
const struct lp_build_tgsi_action * action,
|
struct lp_build_tgsi_context * bld_base,
|
struct lp_build_emit_data * emit_data)
|
{
|
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
|
|
emit_fetch_texels(bld, emit_data->inst, emit_data->output, TRUE);
|
}
|
|
static void
|
sample_emit(
|
const struct lp_build_tgsi_action * action,
|
struct lp_build_tgsi_context * bld_base,
|
struct lp_build_emit_data * emit_data)
|
{
|
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
|
|
emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
|
FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
|
}
|
|
static void
|
sample_b_emit(
|
const struct lp_build_tgsi_action * action,
|
struct lp_build_tgsi_context * bld_base,
|
struct lp_build_emit_data * emit_data)
|
{
|
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
|
|
emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
|
FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
|
}
|
|
static void
|
sample_c_emit(
|
const struct lp_build_tgsi_action * action,
|
struct lp_build_tgsi_context * bld_base,
|
struct lp_build_emit_data * emit_data)
|
{
|
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
|
|
emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
|
TRUE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
|
}
|
|
static void
|
sample_c_lz_emit(
|
const struct lp_build_tgsi_action * action,
|
struct lp_build_tgsi_context * bld_base,
|
struct lp_build_emit_data * emit_data)
|
{
|
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
|
|
emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_ZERO,
|
TRUE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
|
}
|
|
static void
|
sample_d_emit(
|
const struct lp_build_tgsi_action * action,
|
struct lp_build_tgsi_context * bld_base,
|
struct lp_build_emit_data * emit_data)
|
{
|
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
|
|
emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
|
FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
|
}
|
|
static void
|
sample_l_emit(
|
const struct lp_build_tgsi_action * action,
|
struct lp_build_tgsi_context * bld_base,
|
struct lp_build_emit_data * emit_data)
|
{
|
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
|
|
emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
|
FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
|
}
|
|
static void
|
gather4_emit(
|
const struct lp_build_tgsi_action * action,
|
struct lp_build_tgsi_context * bld_base,
|
struct lp_build_emit_data * emit_data)
|
{
|
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
|
|
emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
|
FALSE, LP_SAMPLER_OP_GATHER, emit_data->output);
|
}
|
|
static void
|
sviewinfo_emit(
|
const struct lp_build_tgsi_action * action,
|
struct lp_build_tgsi_context * bld_base,
|
struct lp_build_emit_data * emit_data)
|
{
|
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
|
|
emit_size_query(bld, emit_data->inst, emit_data->output, TRUE);
|
}
|
|
static void
|
lod_emit(
|
const struct lp_build_tgsi_action * action,
|
struct lp_build_tgsi_context * bld_base,
|
struct lp_build_emit_data * emit_data)
|
{
|
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
|
|
emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
|
FALSE, LP_SAMPLER_OP_LODQ, emit_data->output);
|
}
|
|
static LLVMValueRef
|
mask_vec(struct lp_build_tgsi_context *bld_base)
|
{
|
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
|
LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
|
struct lp_exec_mask *exec_mask = &bld->exec_mask;
|
|
if (!exec_mask->has_mask) {
|
return lp_build_mask_value(bld->mask);
|
}
|
return LLVMBuildAnd(builder, lp_build_mask_value(bld->mask),
|
exec_mask->exec_mask, "");
|
}
|
|
static void
|
increment_vec_ptr_by_mask(struct lp_build_tgsi_context * bld_base,
|
LLVMValueRef ptr,
|
LLVMValueRef mask)
|
{
|
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
|
LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
|
|
current_vec = LLVMBuildSub(builder, current_vec, mask, "");
|
|
LLVMBuildStore(builder, current_vec, ptr);
|
}
|
|
static void
|
clear_uint_vec_ptr_from_mask(struct lp_build_tgsi_context * bld_base,
|
LLVMValueRef ptr,
|
LLVMValueRef mask)
|
{
|
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
|
LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
|
|
current_vec = lp_build_select(&bld_base->uint_bld,
|
mask,
|
bld_base->uint_bld.zero,
|
current_vec);
|
|
LLVMBuildStore(builder, current_vec, ptr);
|
}
|
|
static LLVMValueRef
|
clamp_mask_to_max_output_vertices(struct lp_build_tgsi_soa_context * bld,
|
LLVMValueRef current_mask_vec,
|
LLVMValueRef total_emitted_vertices_vec)
|
{
|
LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
|
struct lp_build_context *int_bld = &bld->bld_base.int_bld;
|
LLVMValueRef max_mask = lp_build_cmp(int_bld, PIPE_FUNC_LESS,
|
total_emitted_vertices_vec,
|
bld->max_output_vertices_vec);
|
|
return LLVMBuildAnd(builder, current_mask_vec, max_mask, "");
|
}
|
|
static void
|
emit_vertex(
|
const struct lp_build_tgsi_action * action,
|
struct lp_build_tgsi_context * bld_base,
|
struct lp_build_emit_data * emit_data)
|
{
|
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
|
LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
|
|
if (bld->gs_iface->emit_vertex) {
|
LLVMValueRef mask = mask_vec(bld_base);
|
LLVMValueRef total_emitted_vertices_vec =
|
LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
|
mask = clamp_mask_to_max_output_vertices(bld, mask,
|
total_emitted_vertices_vec);
|
gather_outputs(bld);
|
bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base,
|
bld->outputs,
|
total_emitted_vertices_vec);
|
increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr,
|
mask);
|
increment_vec_ptr_by_mask(bld_base, bld->total_emitted_vertices_vec_ptr,
|
mask);
|
#if DUMP_GS_EMITS
|
lp_build_print_value(bld->bld_base.base.gallivm,
|
" +++ emit vertex masked ones = ",
|
mask);
|
lp_build_print_value(bld->bld_base.base.gallivm,
|
" +++ emit vertex emitted = ",
|
total_emitted_vertices_vec);
|
#endif
|
}
|
}
|
|
|
static void
|
end_primitive_masked(struct lp_build_tgsi_context * bld_base,
|
LLVMValueRef mask)
|
{
|
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
|
LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
|
|
if (bld->gs_iface->end_primitive) {
|
struct lp_build_context *uint_bld = &bld_base->uint_bld;
|
LLVMValueRef emitted_vertices_vec =
|
LLVMBuildLoad(builder, bld->emitted_vertices_vec_ptr, "");
|
LLVMValueRef emitted_prims_vec =
|
LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
|
|
LLVMValueRef emitted_mask = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
|
emitted_vertices_vec,
|
uint_bld->zero);
|
/* We need to combine the current execution mask with the mask
|
telling us which, if any, execution slots actually have
|
unemitted primitives, this way we make sure that end_primitives
|
executes only on the paths that have unflushed vertices */
|
mask = LLVMBuildAnd(builder, mask, emitted_mask, "");
|
|
bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base,
|
emitted_vertices_vec,
|
emitted_prims_vec);
|
|
#if DUMP_GS_EMITS
|
lp_build_print_value(bld->bld_base.base.gallivm,
|
" +++ end prim masked ones = ",
|
mask);
|
lp_build_print_value(bld->bld_base.base.gallivm,
|
" +++ end prim emitted verts1 = ",
|
emitted_vertices_vec);
|
lp_build_print_value(bld->bld_base.base.gallivm,
|
" +++ end prim emitted prims1 = ",
|
LLVMBuildLoad(builder,
|
bld->emitted_prims_vec_ptr, ""));
|
#endif
|
increment_vec_ptr_by_mask(bld_base, bld->emitted_prims_vec_ptr,
|
mask);
|
clear_uint_vec_ptr_from_mask(bld_base, bld->emitted_vertices_vec_ptr,
|
mask);
|
#if DUMP_GS_EMITS
|
lp_build_print_value(bld->bld_base.base.gallivm,
|
" +++ end prim emitted verts2 = ",
|
LLVMBuildLoad(builder,
|
bld->emitted_vertices_vec_ptr, ""));
|
#endif
|
}
|
|
}
|
|
static void
|
end_primitive(
|
const struct lp_build_tgsi_action * action,
|
struct lp_build_tgsi_context * bld_base,
|
struct lp_build_emit_data * emit_data)
|
{
|
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
|
|
if (bld->gs_iface->end_primitive) {
|
LLVMValueRef mask = mask_vec(bld_base);
|
end_primitive_masked(bld_base, mask);
|
}
|
}
|
|
static void
|
cal_emit(
|
const struct lp_build_tgsi_action * action,
|
struct lp_build_tgsi_context * bld_base,
|
struct lp_build_emit_data * emit_data)
|
{
|
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
|
|
lp_exec_mask_call(&bld->exec_mask, emit_data->inst->Label.Label,
|
&bld_base->pc);
|
}
|
|
static void
|
ret_emit(
|
const struct lp_build_tgsi_action * action,
|
struct lp_build_tgsi_context * bld_base,
|
struct lp_build_emit_data * emit_data)
|
{
|
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
|
|
lp_exec_mask_ret(&bld->exec_mask, &bld_base->pc);
|
}
|
|
static void
|
brk_emit(
|
const struct lp_build_tgsi_action * action,
|
struct lp_build_tgsi_context * bld_base,
|
struct lp_build_emit_data * emit_data)
|
{
|
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
|
|
lp_exec_break(&bld->exec_mask, bld_base);
|
}
|
|
static void
|
if_emit(
|
const struct lp_build_tgsi_action * action,
|
struct lp_build_tgsi_context * bld_base,
|
struct lp_build_emit_data * emit_data)
|
{
|
LLVMValueRef tmp;
|
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
|
|
tmp = lp_build_cmp(&bld_base->base, PIPE_FUNC_NOTEQUAL,
|
emit_data->args[0], bld->bld_base.base.zero);
|
lp_exec_mask_cond_push(&bld->exec_mask, tmp);
|
}
|
|
static void
|
uif_emit(
|
const struct lp_build_tgsi_action * action,
|
struct lp_build_tgsi_context * bld_base,
|
struct lp_build_emit_data * emit_data)
|
{
|
LLVMValueRef tmp;
|
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
|
struct lp_build_context *uint_bld = &bld_base->uint_bld;
|
|
tmp = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
|
emit_data->args[0], uint_bld->zero);
|
lp_exec_mask_cond_push(&bld->exec_mask, tmp);
|
}
|
|
static void
|
case_emit(
|
const struct lp_build_tgsi_action * action,
|
struct lp_build_tgsi_context * bld_base,
|
struct lp_build_emit_data * emit_data)
|
{
|
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
|
|
lp_exec_case(&bld->exec_mask, emit_data->args[0]);
|
}
|
|
static void
|
default_emit(
|
const struct lp_build_tgsi_action * action,
|
struct lp_build_tgsi_context * bld_base,
|
struct lp_build_emit_data * emit_data)
|
{
|
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
|
|
lp_exec_default(&bld->exec_mask, bld_base);
|
}
|
|
static void
|
switch_emit(
|
const struct lp_build_tgsi_action * action,
|
struct lp_build_tgsi_context * bld_base,
|
struct lp_build_emit_data * emit_data)
|
{
|
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
|
|
lp_exec_switch(&bld->exec_mask, emit_data->args[0]);
|
}
|
|
static void
|
endswitch_emit(
|
const struct lp_build_tgsi_action * action,
|
struct lp_build_tgsi_context * bld_base,
|
struct lp_build_emit_data * emit_data)
|
{
|
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
|
|
lp_exec_endswitch(&bld->exec_mask, bld_base);
|
}
|
|
static void
|
bgnloop_emit(
|
const struct lp_build_tgsi_action * action,
|
struct lp_build_tgsi_context * bld_base,
|
struct lp_build_emit_data * emit_data)
|
{
|
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
|
|
lp_exec_bgnloop(&bld->exec_mask);
|
}
|
|
static void
|
bgnsub_emit(
|
const struct lp_build_tgsi_action * action,
|
struct lp_build_tgsi_context * bld_base,
|
struct lp_build_emit_data * emit_data)
|
{
|
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
|
|
lp_exec_mask_bgnsub(&bld->exec_mask);
|
}
|
|
static void
|
else_emit(
|
const struct lp_build_tgsi_action * action,
|
struct lp_build_tgsi_context * bld_base,
|
struct lp_build_emit_data * emit_data)
|
{
|
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
|
|
lp_exec_mask_cond_invert(&bld->exec_mask);
|
}
|
|
static void
|
endif_emit(
|
const struct lp_build_tgsi_action * action,
|
struct lp_build_tgsi_context * bld_base,
|
struct lp_build_emit_data * emit_data)
|
{
|
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
|
|
lp_exec_mask_cond_pop(&bld->exec_mask);
|
}
|
|
static void
|
endloop_emit(
|
const struct lp_build_tgsi_action * action,
|
struct lp_build_tgsi_context * bld_base,
|
struct lp_build_emit_data * emit_data)
|
{
|
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
|
|
lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask);
|
}
|
|
static void
|
endsub_emit(
|
const struct lp_build_tgsi_action * action,
|
struct lp_build_tgsi_context * bld_base,
|
struct lp_build_emit_data * emit_data)
|
{
|
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
|
|
lp_exec_mask_endsub(&bld->exec_mask, &bld_base->pc);
|
}
|
|
static void
|
cont_emit(
|
const struct lp_build_tgsi_action * action,
|
struct lp_build_tgsi_context * bld_base,
|
struct lp_build_emit_data * emit_data)
|
{
|
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
|
|
lp_exec_continue(&bld->exec_mask);
|
}
|
|
static void emit_prologue(struct lp_build_tgsi_context * bld_base)
|
{
|
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
|
struct gallivm_state * gallivm = bld_base->base.gallivm;
|
|
if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
|
LLVMValueRef array_size =
|
lp_build_const_int32(gallivm,
|
bld_base->info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4);
|
bld->temps_array = lp_build_array_alloca(gallivm,
|
bld_base->base.vec_type, array_size,
|
"temp_array");
|
}
|
|
if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
|
LLVMValueRef array_size =
|
lp_build_const_int32(gallivm,
|
bld_base->info->file_max[TGSI_FILE_OUTPUT] * 4 + 4);
|
bld->outputs_array = lp_build_array_alloca(gallivm,
|
bld_base->base.vec_type, array_size,
|
"output_array");
|
}
|
|
if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
|
LLVMValueRef array_size =
|
lp_build_const_int32(gallivm,
|
bld_base->info->file_max[TGSI_FILE_IMMEDIATE] * 4 + 4);
|
bld->imms_array = lp_build_array_alloca(gallivm,
|
bld_base->base.vec_type, array_size,
|
"imms_array");
|
}
|
|
/* If we have indirect addressing in inputs we need to copy them into
|
* our alloca array to be able to iterate over them */
|
if (bld->indirect_files & (1 << TGSI_FILE_INPUT) && !bld->gs_iface) {
|
unsigned index, chan;
|
LLVMTypeRef vec_type = bld_base->base.vec_type;
|
LLVMValueRef array_size = lp_build_const_int32(gallivm,
|
bld_base->info->file_max[TGSI_FILE_INPUT]*4 + 4);
|
bld->inputs_array = lp_build_array_alloca(gallivm,
|
vec_type, array_size,
|
"input_array");
|
|
assert(bld_base->info->num_inputs
|
<= bld_base->info->file_max[TGSI_FILE_INPUT] + 1);
|
|
for (index = 0; index < bld_base->info->num_inputs; ++index) {
|
for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
|
LLVMValueRef lindex =
|
lp_build_const_int32(gallivm, index * 4 + chan);
|
LLVMValueRef input_ptr =
|
LLVMBuildGEP(gallivm->builder, bld->inputs_array,
|
&lindex, 1, "");
|
LLVMValueRef value = bld->inputs[index][chan];
|
if (value)
|
LLVMBuildStore(gallivm->builder, value, input_ptr);
|
}
|
}
|
}
|
|
if (bld->gs_iface) {
|
struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
|
bld->emitted_prims_vec_ptr =
|
lp_build_alloca(gallivm,
|
uint_bld->vec_type,
|
"emitted_prims_ptr");
|
bld->emitted_vertices_vec_ptr =
|
lp_build_alloca(gallivm,
|
uint_bld->vec_type,
|
"emitted_vertices_ptr");
|
bld->total_emitted_vertices_vec_ptr =
|
lp_build_alloca(gallivm,
|
uint_bld->vec_type,
|
"total_emitted_vertices_ptr");
|
|
LLVMBuildStore(gallivm->builder, uint_bld->zero,
|
bld->emitted_prims_vec_ptr);
|
LLVMBuildStore(gallivm->builder, uint_bld->zero,
|
bld->emitted_vertices_vec_ptr);
|
LLVMBuildStore(gallivm->builder, uint_bld->zero,
|
bld->total_emitted_vertices_vec_ptr);
|
}
|
|
if (DEBUG_EXECUTION) {
|
lp_build_printf(gallivm, "\n");
|
emit_dump_file(bld, TGSI_FILE_CONSTANT);
|
if (!bld->gs_iface)
|
emit_dump_file(bld, TGSI_FILE_INPUT);
|
}
|
}
|
|
static void emit_epilogue(struct lp_build_tgsi_context * bld_base)
|
{
|
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
|
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
|
|
if (DEBUG_EXECUTION) {
|
/* for debugging */
|
if (0) {
|
emit_dump_file(bld, TGSI_FILE_TEMPORARY);
|
}
|
emit_dump_file(bld, TGSI_FILE_OUTPUT);
|
lp_build_printf(bld_base->base.gallivm, "\n");
|
}
|
|
/* If we have indirect addressing in outputs we need to copy our alloca array
|
* to the outputs slots specified by the caller */
|
if (bld->gs_iface) {
|
LLVMValueRef total_emitted_vertices_vec;
|
LLVMValueRef emitted_prims_vec;
|
/* implicit end_primitives, needed in case there are any unflushed
|
vertices in the cache. Note must not call end_primitive here
|
since the exec_mask is not valid at this point. */
|
end_primitive_masked(bld_base, lp_build_mask_value(bld->mask));
|
|
total_emitted_vertices_vec =
|
LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
|
emitted_prims_vec =
|
LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
|
|
bld->gs_iface->gs_epilogue(bld->gs_iface,
|
&bld->bld_base,
|
total_emitted_vertices_vec,
|
emitted_prims_vec);
|
} else {
|
gather_outputs(bld);
|
}
|
}
|
|
void
|
lp_build_tgsi_soa(struct gallivm_state *gallivm,
|
const struct tgsi_token *tokens,
|
struct lp_type type,
|
struct lp_build_mask_context *mask,
|
LLVMValueRef consts_ptr,
|
LLVMValueRef const_sizes_ptr,
|
const struct lp_bld_tgsi_system_values *system_values,
|
const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS],
|
LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
|
LLVMValueRef context_ptr,
|
LLVMValueRef thread_data_ptr,
|
struct lp_build_sampler_soa *sampler,
|
const struct tgsi_shader_info *info,
|
const struct lp_build_tgsi_gs_iface *gs_iface)
|
{
|
struct lp_build_tgsi_soa_context bld;
|
|
struct lp_type res_type;
|
|
assert(type.length <= LP_MAX_VECTOR_LENGTH);
|
memset(&res_type, 0, sizeof res_type);
|
res_type.width = type.width;
|
res_type.length = type.length;
|
res_type.sign = 1;
|
|
/* Setup build context */
|
memset(&bld, 0, sizeof bld);
|
lp_build_context_init(&bld.bld_base.base, gallivm, type);
|
lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
|
lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
|
lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type));
|
{
|
struct lp_type dbl_type;
|
dbl_type = type;
|
dbl_type.width *= 2;
|
lp_build_context_init(&bld.bld_base.dbl_bld, gallivm, dbl_type);
|
}
|
{
|
struct lp_type uint64_type;
|
uint64_type = lp_uint_type(type);
|
uint64_type.width *= 2;
|
lp_build_context_init(&bld.bld_base.uint64_bld, gallivm, uint64_type);
|
}
|
{
|
struct lp_type int64_type;
|
int64_type = lp_int_type(type);
|
int64_type.width *= 2;
|
lp_build_context_init(&bld.bld_base.int64_bld, gallivm, int64_type);
|
}
|
bld.mask = mask;
|
bld.inputs = inputs;
|
bld.outputs = outputs;
|
bld.consts_ptr = consts_ptr;
|
bld.const_sizes_ptr = const_sizes_ptr;
|
bld.sampler = sampler;
|
bld.bld_base.info = info;
|
bld.indirect_files = info->indirect_files;
|
bld.context_ptr = context_ptr;
|
bld.thread_data_ptr = thread_data_ptr;
|
|
/*
|
* If the number of temporaries is rather large then we just
|
* allocate them as an array right from the start and treat
|
* like indirect temporaries.
|
*/
|
if (info->file_max[TGSI_FILE_TEMPORARY] >= LP_MAX_INLINED_TEMPS) {
|
bld.indirect_files |= (1 << TGSI_FILE_TEMPORARY);
|
}
|
/*
|
* For performance reason immediates are always backed in a static
|
* array, but if their number is too great, we have to use just
|
* a dynamically allocated array.
|
*/
|
bld.use_immediates_array =
|
(info->file_max[TGSI_FILE_IMMEDIATE] >= LP_MAX_INLINED_IMMEDIATES);
|
if (bld.use_immediates_array) {
|
bld.indirect_files |= (1 << TGSI_FILE_IMMEDIATE);
|
}
|
|
|
bld.bld_base.soa = TRUE;
|
bld.bld_base.emit_debug = emit_debug;
|
bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant;
|
bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
|
bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
|
bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary;
|
bld.bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = emit_fetch_system_value;
|
bld.bld_base.emit_store = emit_store;
|
|
bld.bld_base.emit_declaration = lp_emit_declaration_soa;
|
bld.bld_base.emit_immediate = lp_emit_immediate_soa;
|
|
bld.bld_base.emit_prologue = emit_prologue;
|
bld.bld_base.emit_epilogue = emit_epilogue;
|
|
/* Set opcode actions */
|
lp_set_default_actions_cpu(&bld.bld_base);
|
|
bld.bld_base.op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
|
bld.bld_base.op_actions[TGSI_OPCODE_BGNSUB].emit = bgnsub_emit;
|
bld.bld_base.op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
|
bld.bld_base.op_actions[TGSI_OPCODE_CAL].emit = cal_emit;
|
bld.bld_base.op_actions[TGSI_OPCODE_CASE].emit = case_emit;
|
bld.bld_base.op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
|
bld.bld_base.op_actions[TGSI_OPCODE_DDX].emit = ddx_emit;
|
bld.bld_base.op_actions[TGSI_OPCODE_DDY].emit = ddy_emit;
|
bld.bld_base.op_actions[TGSI_OPCODE_DEFAULT].emit = default_emit;
|
bld.bld_base.op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
|
bld.bld_base.op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
|
bld.bld_base.op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
|
bld.bld_base.op_actions[TGSI_OPCODE_ENDSUB].emit = endsub_emit;
|
bld.bld_base.op_actions[TGSI_OPCODE_ENDSWITCH].emit = endswitch_emit;
|
bld.bld_base.op_actions[TGSI_OPCODE_IF].emit = if_emit;
|
bld.bld_base.op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
|
bld.bld_base.op_actions[TGSI_OPCODE_KILL_IF].emit = kill_if_emit;
|
bld.bld_base.op_actions[TGSI_OPCODE_KILL].emit = kill_emit;
|
bld.bld_base.op_actions[TGSI_OPCODE_RET].emit = ret_emit;
|
bld.bld_base.op_actions[TGSI_OPCODE_SWITCH].emit = switch_emit;
|
bld.bld_base.op_actions[TGSI_OPCODE_TEX].emit = tex_emit;
|
bld.bld_base.op_actions[TGSI_OPCODE_TXB].emit = txb_emit;
|
bld.bld_base.op_actions[TGSI_OPCODE_TXD].emit = txd_emit;
|
bld.bld_base.op_actions[TGSI_OPCODE_TXL].emit = txl_emit;
|
bld.bld_base.op_actions[TGSI_OPCODE_TXP].emit = txp_emit;
|
bld.bld_base.op_actions[TGSI_OPCODE_TXQ].emit = txq_emit;
|
bld.bld_base.op_actions[TGSI_OPCODE_TXF].emit = txf_emit;
|
bld.bld_base.op_actions[TGSI_OPCODE_TEX2].emit = tex2_emit;
|
bld.bld_base.op_actions[TGSI_OPCODE_TXB2].emit = txb2_emit;
|
bld.bld_base.op_actions[TGSI_OPCODE_TXL2].emit = txl2_emit;
|
bld.bld_base.op_actions[TGSI_OPCODE_TG4].emit = tg4_emit;
|
bld.bld_base.op_actions[TGSI_OPCODE_LODQ].emit = lodq_emit;
|
/* DX10 sampling ops */
|
bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE].emit = sample_emit;
|
bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_B].emit = sample_b_emit;
|
bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C].emit = sample_c_emit;
|
bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C_LZ].emit = sample_c_lz_emit;
|
bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_D].emit = sample_d_emit;
|
bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I].emit = sample_i_emit;
|
bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I_MS].emit = sample_i_emit;
|
bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_L].emit = sample_l_emit;
|
bld.bld_base.op_actions[TGSI_OPCODE_GATHER4].emit = gather4_emit;
|
bld.bld_base.op_actions[TGSI_OPCODE_SVIEWINFO].emit = sviewinfo_emit;
|
bld.bld_base.op_actions[TGSI_OPCODE_LOD].emit = lod_emit;
|
|
|
if (gs_iface) {
|
/* There's no specific value for this because it should always
|
* be set, but apps using ext_geometry_shader4 quite often
|
* were forgetting so we're using MAX_VERTEX_VARYING from
|
* that spec even though we could debug_assert if it's not
|
* set, but that's a lot uglier. */
|
uint max_output_vertices;
|
|
/* inputs are always indirect with gs */
|
bld.indirect_files |= (1 << TGSI_FILE_INPUT);
|
bld.gs_iface = gs_iface;
|
bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_gs_input;
|
bld.bld_base.op_actions[TGSI_OPCODE_EMIT].emit = emit_vertex;
|
bld.bld_base.op_actions[TGSI_OPCODE_ENDPRIM].emit = end_primitive;
|
|
max_output_vertices =
|
info->properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES];
|
if (!max_output_vertices)
|
max_output_vertices = 32;
|
|
bld.max_output_vertices_vec =
|
lp_build_const_int_vec(gallivm, bld.bld_base.int_bld.type,
|
max_output_vertices);
|
}
|
|
lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.int_bld);
|
|
bld.system_values = *system_values;
|
|
lp_build_tgsi_llvm(&bld.bld_base, tokens);
|
|
if (0) {
|
LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
|
LLVMValueRef function = LLVMGetBasicBlockParent(block);
|
debug_printf("11111111111111111111111111111 \n");
|
tgsi_dump(tokens, 0);
|
lp_debug_dump_value(function);
|
debug_printf("2222222222222222222222222222 \n");
|
}
|
|
if (0) {
|
LLVMModuleRef module = LLVMGetGlobalParent(
|
LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
|
LLVMDumpModule(module);
|
|
}
|
lp_exec_mask_fini(&bld.exec_mask);
|
}
|