/*
|
* Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
|
*
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
* copy of this software and associated documentation files (the "Software"),
|
* to deal in the Software without restriction, including without limitation
|
* on the rights to use, copy, modify, merge, publish, distribute, sub
|
* license, and/or sell copies of the Software, and to permit persons to whom
|
* the Software is furnished to do so, subject to the following conditions:
|
*
|
* The above copyright notice and this permission notice (including the next
|
* paragraph) shall be included in all copies or substantial portions of the
|
* Software.
|
*
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
|
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
* USE OR OTHER DEALINGS IN THE SOFTWARE.
|
*
|
* Authors:
|
* Vadim Girlin
|
*/
|
|
#define FBC_DEBUG 0
|
|
#if FBC_DEBUG
|
#define FBC_DUMP(q) do { q } while (0)
|
#else
|
#define FBC_DUMP(q)
|
#endif
|
|
#include "sb_bc.h"
|
#include "sb_shader.h"
|
#include "sb_pass.h"
|
|
namespace r600_sb {
|
|
void bc_finalizer::insert_rv6xx_load_ar_workaround(alu_group_node *b4) {
|
|
alu_group_node *g = sh.create_alu_group();
|
alu_node *a = sh.create_alu();
|
|
a->bc.set_op(ALU_OP0_NOP);
|
a->bc.last = 1;
|
|
g->push_back(a);
|
b4->insert_before(g);
|
}
|
|
int bc_finalizer::run() {
|
|
run_on(sh.root);
|
|
regions_vec &rv = sh.get_regions();
|
for (regions_vec::reverse_iterator I = rv.rbegin(), E = rv.rend(); I != E;
|
++I) {
|
region_node *r = *I;
|
|
assert(r);
|
|
bool loop = r->is_loop();
|
|
if (loop)
|
finalize_loop(r);
|
else
|
finalize_if(r);
|
|
r->expand();
|
}
|
|
cf_peephole();
|
|
// workaround for some problems on r6xx/7xx
|
// add ALU NOP to each vertex shader
|
if (!ctx.is_egcm() && (sh.target == TARGET_VS || sh.target == TARGET_ES)) {
|
cf_node *c = sh.create_clause(NST_ALU_CLAUSE);
|
|
alu_group_node *g = sh.create_alu_group();
|
|
alu_node *a = sh.create_alu();
|
a->bc.set_op(ALU_OP0_NOP);
|
a->bc.last = 1;
|
|
g->push_back(a);
|
c->push_back(g);
|
|
sh.root->push_back(c);
|
|
c = sh.create_cf(CF_OP_NOP);
|
sh.root->push_back(c);
|
|
last_cf = c;
|
}
|
|
if (!ctx.is_cayman() && last_cf->bc.op_ptr->flags & CF_ALU) {
|
last_cf = sh.create_cf(CF_OP_NOP);
|
sh.root->push_back(last_cf);
|
}
|
|
if (ctx.is_cayman()) {
|
if (!last_cf) {
|
cf_node *c = sh.create_cf(CF_OP_CF_END);
|
sh.root->push_back(c);
|
} else
|
last_cf->insert_after(sh.create_cf(CF_OP_CF_END));
|
} else
|
last_cf->bc.end_of_program = 1;
|
|
for (unsigned t = EXP_PIXEL; t < EXP_TYPE_COUNT; ++t) {
|
cf_node *le = last_export[t];
|
if (le)
|
le->bc.set_op(CF_OP_EXPORT_DONE);
|
}
|
|
sh.ngpr = ngpr;
|
sh.nstack = nstack;
|
return 0;
|
}
|
|
void bc_finalizer::finalize_loop(region_node* r) {
|
|
update_nstack(r);
|
|
cf_node *loop_start = sh.create_cf(CF_OP_LOOP_START_DX10);
|
cf_node *loop_end = sh.create_cf(CF_OP_LOOP_END);
|
|
// Update last_cf, but don't overwrite it if it's outside the current loop nest since
|
// it may point to a cf that is later in program order.
|
// The single parent level check is sufficient since finalize_loop() is processed in
|
// reverse order from innermost to outermost loop nest level.
|
if (!last_cf || last_cf->get_parent_region() == r) {
|
last_cf = loop_end;
|
}
|
|
loop_start->jump_after(loop_end);
|
loop_end->jump_after(loop_start);
|
|
for (depart_vec::iterator I = r->departs.begin(), E = r->departs.end();
|
I != E; ++I) {
|
depart_node *dep = *I;
|
cf_node *loop_break = sh.create_cf(CF_OP_LOOP_BREAK);
|
loop_break->jump(loop_end);
|
dep->push_back(loop_break);
|
dep->expand();
|
}
|
|
// FIXME produces unnecessary LOOP_CONTINUE
|
for (repeat_vec::iterator I = r->repeats.begin(), E = r->repeats.end();
|
I != E; ++I) {
|
repeat_node *rep = *I;
|
if (!(rep->parent == r && rep->prev == NULL)) {
|
cf_node *loop_cont = sh.create_cf(CF_OP_LOOP_CONTINUE);
|
loop_cont->jump(loop_end);
|
rep->push_back(loop_cont);
|
}
|
rep->expand();
|
}
|
|
r->push_front(loop_start);
|
r->push_back(loop_end);
|
}
|
|
void bc_finalizer::finalize_if(region_node* r) {
|
|
update_nstack(r);
|
|
// expecting the following control flow structure here:
|
// - region
|
// {
|
// - depart/repeat 1 (it may be depart/repeat for some outer region)
|
// {
|
// - if
|
// {
|
// - depart/repeat 2 (possibly for outer region)
|
// {
|
// - some optional code
|
// }
|
// }
|
// - optional <else> code> ...
|
// }
|
// }
|
|
container_node *repdep1 = static_cast<container_node*>(r->first);
|
assert(repdep1->is_depart() || repdep1->is_repeat());
|
|
if_node *n_if = static_cast<if_node*>(repdep1->first);
|
|
if (n_if) {
|
|
|
assert(n_if->is_if());
|
|
container_node *repdep2 = static_cast<container_node*>(n_if->first);
|
assert(repdep2->is_depart() || repdep2->is_repeat());
|
|
cf_node *if_jump = sh.create_cf(CF_OP_JUMP);
|
cf_node *if_pop = sh.create_cf(CF_OP_POP);
|
|
if (!last_cf || last_cf->get_parent_region() == r) {
|
last_cf = if_pop;
|
}
|
if_pop->bc.pop_count = 1;
|
if_pop->jump_after(if_pop);
|
|
r->push_front(if_jump);
|
r->push_back(if_pop);
|
|
/* the depart/repeat 1 is actually part of the "else" code.
|
* if it's a depart for an outer loop region it will want to
|
* insert a LOOP_BREAK or LOOP_CONTINUE in here, so we need
|
* to emit the else clause.
|
*/
|
bool has_else = n_if->next;
|
|
if (repdep1->is_depart()) {
|
depart_node *dep1 = static_cast<depart_node*>(repdep1);
|
if (dep1->target != r && dep1->target->is_loop())
|
has_else = true;
|
}
|
|
if (repdep1->is_repeat()) {
|
repeat_node *rep1 = static_cast<repeat_node*>(repdep1);
|
if (rep1->target != r && rep1->target->is_loop())
|
has_else = true;
|
}
|
|
if (has_else) {
|
cf_node *nelse = sh.create_cf(CF_OP_ELSE);
|
n_if->insert_after(nelse);
|
if_jump->jump(nelse);
|
nelse->jump_after(if_pop);
|
nelse->bc.pop_count = 1;
|
|
} else {
|
if_jump->jump_after(if_pop);
|
if_jump->bc.pop_count = 1;
|
}
|
|
n_if->expand();
|
}
|
|
for (depart_vec::iterator I = r->departs.begin(), E = r->departs.end();
|
I != E; ++I) {
|
(*I)->expand();
|
}
|
r->departs.clear();
|
assert(r->repeats.empty());
|
}
|
|
void bc_finalizer::run_on(container_node* c) {
|
node *prev_node = NULL;
|
for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
|
node *n = *I;
|
|
if (n->is_alu_group()) {
|
finalize_alu_group(static_cast<alu_group_node*>(n), prev_node);
|
} else {
|
if (n->is_alu_clause()) {
|
cf_node *c = static_cast<cf_node*>(n);
|
|
if (c->bc.op == CF_OP_ALU_PUSH_BEFORE && ctx.is_egcm()) {
|
if (ctx.stack_workaround_8xx) {
|
region_node *r = c->get_parent_region();
|
if (r) {
|
unsigned ifs, loops;
|
unsigned elems = get_stack_depth(r, loops, ifs);
|
unsigned dmod1 = elems % ctx.stack_entry_size;
|
unsigned dmod2 = (elems + 1) % ctx.stack_entry_size;
|
|
if (elems && (!dmod1 || !dmod2))
|
c->flags |= NF_ALU_STACK_WORKAROUND;
|
}
|
} else if (ctx.stack_workaround_9xx) {
|
region_node *r = c->get_parent_region();
|
if (r) {
|
unsigned ifs, loops;
|
get_stack_depth(r, loops, ifs);
|
if (loops >= 2)
|
c->flags |= NF_ALU_STACK_WORKAROUND;
|
}
|
}
|
}
|
last_cf = c;
|
} else if (n->is_fetch_inst()) {
|
finalize_fetch(static_cast<fetch_node*>(n));
|
} else if (n->is_cf_inst()) {
|
finalize_cf(static_cast<cf_node*>(n));
|
}
|
if (n->is_container())
|
run_on(static_cast<container_node*>(n));
|
}
|
prev_node = n;
|
}
|
}
|
|
void bc_finalizer::finalize_alu_group(alu_group_node* g, node *prev_node) {
|
|
alu_node *last = NULL;
|
alu_group_node *prev_g = NULL;
|
bool add_nop = false;
|
if (prev_node && prev_node->is_alu_group()) {
|
prev_g = static_cast<alu_group_node*>(prev_node);
|
}
|
|
for (node_iterator I = g->begin(), E = g->end(); I != E; ++I) {
|
alu_node *n = static_cast<alu_node*>(*I);
|
unsigned slot = n->bc.slot;
|
value *d = n->dst.empty() ? NULL : n->dst[0];
|
|
if (d && d->is_special_reg()) {
|
assert((n->bc.op_ptr->flags & AF_MOVA) || d->is_geometry_emit() || d->is_lds_oq() || d->is_lds_access());
|
d = NULL;
|
}
|
|
sel_chan fdst = d ? d->get_final_gpr() : sel_chan(0, 0);
|
|
if (d) {
|
assert(fdst.chan() == slot || slot == SLOT_TRANS);
|
}
|
|
if (!(n->bc.op_ptr->flags & AF_MOVA && ctx.is_cayman()))
|
n->bc.dst_gpr = fdst.sel();
|
n->bc.dst_chan = d ? fdst.chan() : slot < SLOT_TRANS ? slot : 0;
|
|
|
if (d && d->is_rel() && d->rel && !d->rel->is_const()) {
|
n->bc.dst_rel = 1;
|
update_ngpr(d->array->gpr.sel() + d->array->array_size -1);
|
} else {
|
n->bc.dst_rel = 0;
|
}
|
|
n->bc.write_mask = d != NULL;
|
n->bc.last = 0;
|
|
if (n->bc.op_ptr->flags & AF_PRED) {
|
n->bc.update_pred = (n->dst[1] != NULL);
|
n->bc.update_exec_mask = (n->dst[2] != NULL);
|
}
|
|
// FIXME handle predication here
|
n->bc.pred_sel = PRED_SEL_OFF;
|
|
update_ngpr(n->bc.dst_gpr);
|
|
add_nop |= finalize_alu_src(g, n, prev_g);
|
|
last = n;
|
}
|
|
if (add_nop) {
|
if (sh.get_ctx().r6xx_gpr_index_workaround) {
|
insert_rv6xx_load_ar_workaround(g);
|
}
|
}
|
last->bc.last = 1;
|
}
|
|
bool bc_finalizer::finalize_alu_src(alu_group_node* g, alu_node* a, alu_group_node *prev) {
|
vvec &sv = a->src;
|
bool add_nop = false;
|
FBC_DUMP(
|
sblog << "finalize_alu_src: ";
|
dump::dump_op(a);
|
sblog << "\n";
|
);
|
|
unsigned si = 0;
|
|
for (vvec::iterator I = sv.begin(), E = sv.end(); I != E; ++I, ++si) {
|
value *v = *I;
|
assert(v);
|
|
bc_alu_src &src = a->bc.src[si];
|
sel_chan sc;
|
src.rel = 0;
|
|
sel_chan gpr;
|
|
switch (v->kind) {
|
case VLK_REL_REG:
|
sc = v->get_final_gpr();
|
src.sel = sc.sel();
|
src.chan = sc.chan();
|
if (!v->rel->is_const()) {
|
src.rel = 1;
|
update_ngpr(v->array->gpr.sel() + v->array->array_size -1);
|
if (prev && !add_nop) {
|
for (node_iterator pI = prev->begin(), pE = prev->end(); pI != pE; ++pI) {
|
alu_node *pn = static_cast<alu_node*>(*pI);
|
if (pn->bc.dst_gpr == src.sel) {
|
add_nop = true;
|
break;
|
}
|
}
|
}
|
} else
|
src.rel = 0;
|
|
break;
|
case VLK_REG:
|
gpr = v->get_final_gpr();
|
src.sel = gpr.sel();
|
src.chan = gpr.chan();
|
update_ngpr(src.sel);
|
break;
|
case VLK_TEMP:
|
src.sel = v->gpr.sel();
|
src.chan = v->gpr.chan();
|
update_ngpr(src.sel);
|
break;
|
case VLK_UNDEF:
|
case VLK_CONST: {
|
literal lv = v->literal_value;
|
src.chan = 0;
|
|
if (lv == literal(0))
|
src.sel = ALU_SRC_0;
|
else if (lv == literal(0.5f))
|
src.sel = ALU_SRC_0_5;
|
else if (lv == literal(1.0f))
|
src.sel = ALU_SRC_1;
|
else if (lv == literal(1))
|
src.sel = ALU_SRC_1_INT;
|
else if (lv == literal(-1))
|
src.sel = ALU_SRC_M_1_INT;
|
else {
|
src.sel = ALU_SRC_LITERAL;
|
src.chan = g->literal_chan(lv);
|
src.value = lv;
|
}
|
break;
|
}
|
case VLK_KCACHE: {
|
cf_node *clause = static_cast<cf_node*>(g->parent);
|
assert(clause->is_alu_clause());
|
sel_chan k = translate_kcache(clause, v);
|
|
assert(k && "kcache translation failed");
|
|
src.sel = k.sel();
|
src.chan = k.chan();
|
break;
|
}
|
case VLK_SPECIAL_REG:
|
if (v->select.sel() == SV_LDS_OQA) {
|
src.sel = ALU_SRC_LDS_OQ_A_POP;
|
src.chan = 0;
|
} else if (v->select.sel() == SV_LDS_OQB) {
|
src.sel = ALU_SRC_LDS_OQ_B_POP;
|
src.chan = 0;
|
} else {
|
src.sel = ALU_SRC_0;
|
src.chan = 0;
|
}
|
break;
|
case VLK_PARAM:
|
case VLK_SPECIAL_CONST:
|
src.sel = v->select.sel();
|
src.chan = v->select.chan();
|
break;
|
default:
|
assert(!"unknown value kind");
|
break;
|
}
|
if (prev && !add_nop) {
|
for (node_iterator pI = prev->begin(), pE = prev->end(); pI != pE; ++pI) {
|
alu_node *pn = static_cast<alu_node*>(*pI);
|
if (pn->bc.dst_rel) {
|
if (pn->bc.dst_gpr == src.sel) {
|
add_nop = true;
|
break;
|
}
|
}
|
}
|
}
|
}
|
|
while (si < 3) {
|
a->bc.src[si++].sel = 0;
|
}
|
return add_nop;
|
}
|
|
void bc_finalizer::copy_fetch_src(fetch_node &dst, fetch_node &src, unsigned arg_start)
|
{
|
int reg = -1;
|
|
for (unsigned chan = 0; chan < 4; ++chan) {
|
|
dst.bc.dst_sel[chan] = SEL_MASK;
|
|
unsigned sel = SEL_MASK;
|
|
value *v = src.src[arg_start + chan];
|
|
if (!v || v->is_undef()) {
|
sel = SEL_MASK;
|
} else if (v->is_const()) {
|
literal l = v->literal_value;
|
if (l == literal(0))
|
sel = SEL_0;
|
else if (l == literal(1.0f))
|
sel = SEL_1;
|
else {
|
sblog << "invalid fetch constant operand " << chan << " ";
|
dump::dump_op(&src);
|
sblog << "\n";
|
abort();
|
}
|
|
} else if (v->is_any_gpr()) {
|
unsigned vreg = v->gpr.sel();
|
unsigned vchan = v->gpr.chan();
|
|
if (reg == -1)
|
reg = vreg;
|
else if ((unsigned)reg != vreg) {
|
sblog << "invalid fetch source operand " << chan << " ";
|
dump::dump_op(&src);
|
sblog << "\n";
|
abort();
|
}
|
|
sel = vchan;
|
|
} else {
|
sblog << "invalid fetch source operand " << chan << " ";
|
dump::dump_op(&src);
|
sblog << "\n";
|
abort();
|
}
|
|
dst.bc.src_sel[chan] = sel;
|
}
|
|
if (reg >= 0)
|
update_ngpr(reg);
|
|
dst.bc.src_gpr = reg >= 0 ? reg : 0;
|
}
|
|
void bc_finalizer::emit_set_grad(fetch_node* f) {
|
|
assert(f->src.size() == 12 || f->src.size() == 13);
|
unsigned ops[2] = { FETCH_OP_SET_GRADIENTS_V, FETCH_OP_SET_GRADIENTS_H };
|
|
unsigned arg_start = 0;
|
|
for (unsigned op = 0; op < 2; ++op) {
|
fetch_node *n = sh.create_fetch();
|
n->bc.set_op(ops[op]);
|
|
arg_start += 4;
|
|
copy_fetch_src(*n, *f, arg_start);
|
|
f->insert_before(n);
|
}
|
|
}
|
|
void bc_finalizer::emit_set_texture_offsets(fetch_node &f) {
|
assert(f.src.size() == 8);
|
|
fetch_node *n = sh.create_fetch();
|
|
n->bc.set_op(FETCH_OP_SET_TEXTURE_OFFSETS);
|
|
copy_fetch_src(*n, f, 4);
|
|
f.insert_before(n);
|
}
|
|
void bc_finalizer::finalize_fetch(fetch_node* f) {
|
|
int reg = -1;
|
|
// src
|
|
unsigned src_count = 4;
|
|
unsigned flags = f->bc.op_ptr->flags;
|
|
if (flags & FF_VTX) {
|
src_count = 1;
|
} else if (flags & FF_GDS) {
|
src_count = 2;
|
} else if (flags & FF_USEGRAD) {
|
emit_set_grad(f);
|
} else if (flags & FF_USE_TEXTURE_OFFSETS) {
|
emit_set_texture_offsets(*f);
|
}
|
|
for (unsigned chan = 0; chan < src_count; ++chan) {
|
|
unsigned sel = f->bc.src_sel[chan];
|
|
if (sel > SEL_W)
|
continue;
|
|
value *v = f->src[chan];
|
|
if (v->is_undef()) {
|
sel = SEL_MASK;
|
} else if (v->is_const()) {
|
literal l = v->literal_value;
|
if (l == literal(0))
|
sel = SEL_0;
|
else if (l == literal(1.0f))
|
sel = SEL_1;
|
else {
|
sblog << "invalid fetch constant operand " << chan << " ";
|
dump::dump_op(f);
|
sblog << "\n";
|
abort();
|
}
|
|
} else if (v->is_any_gpr()) {
|
unsigned vreg = v->gpr.sel();
|
unsigned vchan = v->gpr.chan();
|
|
if (reg == -1)
|
reg = vreg;
|
else if ((unsigned)reg != vreg) {
|
sblog << "invalid fetch source operand " << chan << " ";
|
dump::dump_op(f);
|
sblog << "\n";
|
abort();
|
}
|
|
sel = vchan;
|
|
} else {
|
sblog << "invalid fetch source operand " << chan << " ";
|
dump::dump_op(f);
|
sblog << "\n";
|
abort();
|
}
|
|
f->bc.src_sel[chan] = sel;
|
}
|
|
if (reg >= 0)
|
update_ngpr(reg);
|
|
f->bc.src_gpr = reg >= 0 ? reg : 0;
|
|
// dst
|
|
reg = -1;
|
|
unsigned dst_swz[4] = {SEL_MASK, SEL_MASK, SEL_MASK, SEL_MASK};
|
|
for (unsigned chan = 0; chan < 4; ++chan) {
|
|
unsigned sel = f->bc.dst_sel[chan];
|
|
if (sel == SEL_MASK)
|
continue;
|
|
value *v = f->dst[chan];
|
if (!v)
|
continue;
|
|
if (v->is_any_gpr()) {
|
unsigned vreg = v->gpr.sel();
|
unsigned vchan = v->gpr.chan();
|
|
if (reg == -1)
|
reg = vreg;
|
else if ((unsigned)reg != vreg) {
|
sblog << "invalid fetch dst operand " << chan << " ";
|
dump::dump_op(f);
|
sblog << "\n";
|
abort();
|
}
|
|
dst_swz[vchan] = sel;
|
|
} else {
|
sblog << "invalid fetch dst operand " << chan << " ";
|
dump::dump_op(f);
|
sblog << "\n";
|
abort();
|
}
|
|
}
|
|
for (unsigned i = 0; i < 4; ++i)
|
f->bc.dst_sel[i] = dst_swz[i];
|
|
if ((flags & FF_GDS) && reg == -1) {
|
f->bc.dst_sel[0] = SEL_MASK;
|
f->bc.dst_gpr = 0;
|
return ;
|
}
|
assert(reg >= 0);
|
|
if (reg >= 0)
|
update_ngpr(reg);
|
|
f->bc.dst_gpr = reg >= 0 ? reg : 0;
|
}
|
|
void bc_finalizer::finalize_cf(cf_node* c) {
|
|
unsigned flags = c->bc.op_ptr->flags;
|
|
c->bc.end_of_program = 0;
|
last_cf = c;
|
|
if (flags & CF_EXP) {
|
c->bc.set_op(CF_OP_EXPORT);
|
last_export[c->bc.type] = c;
|
|
int reg = -1;
|
|
for (unsigned chan = 0; chan < 4; ++chan) {
|
|
unsigned sel = c->bc.sel[chan];
|
|
if (sel > SEL_W)
|
continue;
|
|
value *v = c->src[chan];
|
|
if (v->is_undef()) {
|
sel = SEL_MASK;
|
} else if (v->is_const()) {
|
literal l = v->literal_value;
|
if (l == literal(0))
|
sel = SEL_0;
|
else if (l == literal(1.0f))
|
sel = SEL_1;
|
else {
|
sblog << "invalid export constant operand " << chan << " ";
|
dump::dump_op(c);
|
sblog << "\n";
|
abort();
|
}
|
|
} else if (v->is_any_gpr()) {
|
unsigned vreg = v->gpr.sel();
|
unsigned vchan = v->gpr.chan();
|
|
if (reg == -1)
|
reg = vreg;
|
else if ((unsigned)reg != vreg) {
|
sblog << "invalid export source operand " << chan << " ";
|
dump::dump_op(c);
|
sblog << "\n";
|
abort();
|
}
|
|
sel = vchan;
|
|
} else {
|
sblog << "invalid export source operand " << chan << " ";
|
dump::dump_op(c);
|
sblog << "\n";
|
abort();
|
}
|
|
c->bc.sel[chan] = sel;
|
}
|
|
if (reg >= 0)
|
update_ngpr(reg);
|
|
c->bc.rw_gpr = reg >= 0 ? reg : 0;
|
|
} else if (flags & CF_MEM) {
|
|
int reg = -1;
|
unsigned mask = 0;
|
|
for (unsigned chan = 0; chan < 4; ++chan) {
|
value *v = c->src[chan];
|
if (!v || v->is_undef())
|
continue;
|
|
if (!v->is_any_gpr() || v->gpr.chan() != chan) {
|
sblog << "invalid source operand " << chan << " ";
|
dump::dump_op(c);
|
sblog << "\n";
|
abort();
|
}
|
unsigned vreg = v->gpr.sel();
|
if (reg == -1)
|
reg = vreg;
|
else if ((unsigned)reg != vreg) {
|
sblog << "invalid source operand " << chan << " ";
|
dump::dump_op(c);
|
sblog << "\n";
|
abort();
|
}
|
|
mask |= (1 << chan);
|
}
|
|
if (reg >= 0)
|
update_ngpr(reg);
|
|
c->bc.rw_gpr = reg >= 0 ? reg : 0;
|
c->bc.comp_mask = mask;
|
|
if (((flags & CF_RAT) || (!(flags & CF_STRM))) && (c->bc.type & 1)) {
|
|
reg = -1;
|
|
for (unsigned chan = 0; chan < 4; ++chan) {
|
value *v = c->src[4 + chan];
|
if (!v || v->is_undef())
|
continue;
|
|
if (!v->is_any_gpr() || v->gpr.chan() != chan) {
|
sblog << "invalid source operand " << chan << " ";
|
dump::dump_op(c);
|
sblog << "\n";
|
abort();
|
}
|
unsigned vreg = v->gpr.sel();
|
if (reg == -1)
|
reg = vreg;
|
else if ((unsigned)reg != vreg) {
|
sblog << "invalid source operand " << chan << " ";
|
dump::dump_op(c);
|
sblog << "\n";
|
abort();
|
}
|
}
|
|
assert(reg >= 0);
|
|
if (reg >= 0)
|
update_ngpr(reg);
|
|
c->bc.index_gpr = reg >= 0 ? reg : 0;
|
}
|
} else if (flags & CF_CALL) {
|
update_nstack(c->get_parent_region(), ctx.wavefront_size == 16 ? 2 : 1);
|
}
|
}
|
|
sel_chan bc_finalizer::translate_kcache(cf_node* alu, value* v) {
|
unsigned sel = v->select.kcache_sel();
|
unsigned bank = v->select.kcache_bank();
|
unsigned chan = v->select.chan();
|
static const unsigned kc_base[] = {128, 160, 256, 288};
|
|
sel &= 4095;
|
|
unsigned line = sel >> 4;
|
|
for (unsigned k = 0; k < 4; ++k) {
|
bc_kcache &kc = alu->bc.kc[k];
|
|
if (kc.mode == KC_LOCK_NONE)
|
break;
|
|
if (kc.bank == bank && (kc.addr == line ||
|
(kc.mode == KC_LOCK_2 && kc.addr + 1 == line))) {
|
|
sel = kc_base[k] + (sel - (kc.addr << 4));
|
|
return sel_chan(sel, chan);
|
}
|
}
|
|
assert(!"kcache translation error");
|
return 0;
|
}
|
|
void bc_finalizer::update_ngpr(unsigned gpr) {
|
if (gpr < MAX_GPR - ctx.alu_temp_gprs && gpr >= ngpr)
|
ngpr = gpr + 1;
|
}
|
|
unsigned bc_finalizer::get_stack_depth(node *n, unsigned &loops,
|
unsigned &ifs, unsigned add) {
|
unsigned stack_elements = add;
|
bool has_non_wqm_push = (add != 0);
|
region_node *r = n->is_region() ?
|
static_cast<region_node*>(n) : n->get_parent_region();
|
|
loops = 0;
|
ifs = 0;
|
|
while (r) {
|
if (r->is_loop()) {
|
++loops;
|
} else {
|
++ifs;
|
has_non_wqm_push = true;
|
}
|
r = r->get_parent_region();
|
}
|
stack_elements += (loops * ctx.stack_entry_size) + ifs;
|
|
// reserve additional elements in some cases
|
switch (ctx.hw_class) {
|
case HW_CLASS_R600:
|
case HW_CLASS_R700:
|
// If any non-WQM push is invoked, 2 elements should be reserved.
|
if (has_non_wqm_push)
|
stack_elements += 2;
|
break;
|
case HW_CLASS_CAYMAN:
|
// If any stack operation is invoked, 2 elements should be reserved
|
if (stack_elements)
|
stack_elements += 2;
|
break;
|
case HW_CLASS_EVERGREEN:
|
// According to the docs we need to reserve 1 element for each of the
|
// following cases:
|
// 1) non-WQM push is used with WQM/LOOP frames on stack
|
// 2) ALU_ELSE_AFTER is used at the point of max stack usage
|
// NOTE:
|
// It was found that the conditions above are not sufficient, there are
|
// other cases where we also need to reserve stack space, that's why
|
// we always reserve 1 stack element if we have non-WQM push on stack.
|
// Condition 2 is ignored for now because we don't use this instruction.
|
if (has_non_wqm_push)
|
++stack_elements;
|
break;
|
case HW_CLASS_UNKNOWN:
|
assert(0);
|
}
|
return stack_elements;
|
}
|
|
void bc_finalizer::update_nstack(region_node* r, unsigned add) {
|
unsigned loops = 0;
|
unsigned ifs = 0;
|
unsigned elems = r ? get_stack_depth(r, loops, ifs, add) : add;
|
|
// XXX all chips expect this value to be computed using 4 as entry size,
|
// not the real entry size
|
unsigned stack_entries = (elems + 3) >> 2;
|
|
if (nstack < stack_entries)
|
nstack = stack_entries;
|
}
|
|
void bc_finalizer::cf_peephole() {
|
if (ctx.stack_workaround_8xx || ctx.stack_workaround_9xx) {
|
for (node_iterator N, I = sh.root->begin(), E = sh.root->end(); I != E;
|
I = N) {
|
N = I; ++N;
|
cf_node *c = static_cast<cf_node*>(*I);
|
|
if (c->bc.op == CF_OP_ALU_PUSH_BEFORE &&
|
(c->flags & NF_ALU_STACK_WORKAROUND)) {
|
cf_node *push = sh.create_cf(CF_OP_PUSH);
|
c->insert_before(push);
|
push->jump(c);
|
c->bc.set_op(CF_OP_ALU);
|
}
|
}
|
}
|
|
for (node_iterator N, I = sh.root->begin(), E = sh.root->end(); I != E;
|
I = N) {
|
N = I; ++N;
|
|
cf_node *c = static_cast<cf_node*>(*I);
|
|
if (c->jump_after_target) {
|
if (c->jump_target->next == NULL) {
|
c->jump_target->insert_after(sh.create_cf(CF_OP_NOP));
|
if (last_cf == c->jump_target)
|
last_cf = static_cast<cf_node*>(c->jump_target->next);
|
}
|
c->jump_target = static_cast<cf_node*>(c->jump_target->next);
|
c->jump_after_target = false;
|
}
|
|
if (c->is_cf_op(CF_OP_POP)) {
|
node *p = c->prev;
|
if (p->is_alu_clause()) {
|
cf_node *a = static_cast<cf_node*>(p);
|
|
if (a->bc.op == CF_OP_ALU) {
|
a->bc.set_op(CF_OP_ALU_POP_AFTER);
|
c->remove();
|
}
|
}
|
} else if (c->is_cf_op(CF_OP_JUMP) && c->jump_target == c->next) {
|
// if JUMP is immediately followed by its jump target,
|
// then JUMP is useless and we can eliminate it
|
c->remove();
|
}
|
}
|
}
|
|
} // namespace r600_sb
|