/*
|
* Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
|
*
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
* copy of this software and associated documentation files (the "Software"),
|
* to deal in the Software without restriction, including without limitation
|
* on the rights to use, copy, modify, merge, publish, distribute, sub
|
* license, and/or sell copies of the Software, and to permit persons to whom
|
* the Software is furnished to do so, subject to the following conditions:
|
*
|
* The above copyright notice and this permission notice (including the next
|
* paragraph) shall be included in all copies or substantial portions of the
|
* Software.
|
*
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
|
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
* USE OR OTHER DEALINGS IN THE SOFTWARE.
|
*
|
* Authors:
|
* Vadim Girlin
|
*/
|
|
#define PPH_DEBUG 0
|
|
#if PPH_DEBUG
|
#define PPH_DUMP(q) do { q } while (0)
|
#else
|
#define PPH_DUMP(q)
|
#endif
|
|
#include "sb_shader.h"
|
#include "sb_pass.h"
|
|
namespace r600_sb {
|
|
int peephole::run() {
|
|
run_on(sh.root);
|
|
return 0;
|
}
|
|
void peephole::run_on(container_node* c) {
|
|
for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
|
node *n = *I;
|
|
if (n->is_container())
|
run_on(static_cast<container_node*>(n));
|
else {
|
if (n->is_fetch_inst() && (n->fetch_op_flags() & FF_GDS)) {
|
fetch_node *f = static_cast<fetch_node*>(n);
|
bool has_dst = false;
|
|
for(vvec::iterator I = f->dst.begin(), E = f->dst.end(); I != E; ++I) {
|
value *v = *I;
|
if (v)
|
has_dst = true;
|
}
|
if (!has_dst)
|
if (f->bc.op >= FETCH_OP_GDS_ADD_RET && f->bc.op <= FETCH_OP_GDS_USHORT_READ_RET)
|
f->bc.set_op(f->bc.op - FETCH_OP_GDS_ADD_RET + FETCH_OP_GDS_ADD);
|
}
|
if (n->is_alu_inst()) {
|
alu_node *a = static_cast<alu_node*>(n);
|
|
if (a->bc.op_ptr->flags & AF_LDS) {
|
if (!a->dst[0]) {
|
if (a->bc.op >= LDS_OP2_LDS_ADD_RET && a->bc.op <= LDS_OP3_LDS_MSKOR_RET)
|
a->bc.set_op(a->bc.op - LDS_OP2_LDS_ADD_RET + LDS_OP2_LDS_ADD);
|
if (a->bc.op == LDS_OP1_LDS_READ_RET)
|
a->src[0] = sh.get_undef_value();
|
}
|
} else if (a->bc.op_ptr->flags &
|
(AF_PRED | AF_SET | AF_CMOV | AF_KILL)) {
|
optimize_cc_op(a);
|
} else if (a->bc.op == ALU_OP1_FLT_TO_INT) {
|
|
alu_node *s = a;
|
if (get_bool_flt_to_int_source(s)) {
|
convert_float_setcc(a, s);
|
}
|
}
|
}
|
}
|
}
|
}
|
|
void peephole::optimize_cc_op(alu_node* a) {
|
unsigned aflags = a->bc.op_ptr->flags;
|
|
if (aflags & (AF_PRED | AF_SET | AF_KILL)) {
|
optimize_cc_op2(a);
|
} else if (aflags & AF_CMOV) {
|
optimize_CNDcc_op(a);
|
}
|
}
|
|
void peephole::convert_float_setcc(alu_node *f2i, alu_node *s) {
|
alu_node *ns = sh.clone(s);
|
|
ns->dst[0] = f2i->dst[0];
|
ns->dst[0]->def = ns;
|
ns->bc.set_op(ns->bc.op + (ALU_OP2_SETE_DX10 - ALU_OP2_SETE));
|
f2i->insert_after(ns);
|
f2i->remove();
|
}
|
|
void peephole::optimize_cc_op2(alu_node* a) {
|
|
unsigned flags = a->bc.op_ptr->flags;
|
unsigned cc = flags & AF_CC_MASK;
|
|
if ((cc != AF_CC_E && cc != AF_CC_NE) || a->pred)
|
return;
|
|
unsigned cmp_type = flags & AF_CMP_TYPE_MASK;
|
unsigned dst_type = flags & AF_DST_TYPE_MASK;
|
|
int op_kind = (flags & AF_PRED) ? 1 :
|
(flags & AF_SET) ? 2 :
|
(flags & AF_KILL) ? 3 : 0;
|
|
bool swapped = false;
|
|
if (a->src[0]->is_const() && a->src[0]->literal_value == literal(0)) {
|
std::swap(a->src[0],a->src[1]);
|
swapped = true;
|
// clear modifiers
|
memset(&a->bc.src[0], 0, sizeof(bc_alu_src));
|
memset(&a->bc.src[1], 0, sizeof(bc_alu_src));
|
}
|
|
if (swapped || (a->src[1]->is_const() &&
|
a->src[1]->literal_value == literal(0))) {
|
|
value *s = a->src[0];
|
|
bool_op_info bop = {};
|
|
PPH_DUMP(
|
sblog << "cc_op2: ";
|
dump::dump_op(a);
|
sblog << "\n";
|
);
|
|
if (!get_bool_op_info(s, bop))
|
return;
|
|
if (cc == AF_CC_E)
|
bop.invert = !bop.invert;
|
|
bool swap_args = false;
|
|
cc = bop.n->bc.op_ptr->flags & AF_CC_MASK;
|
|
if (bop.invert)
|
cc = invert_setcc_condition(cc, swap_args);
|
|
if (bop.int_cvt) {
|
assert(cmp_type != AF_FLOAT_CMP);
|
cmp_type = AF_FLOAT_CMP;
|
}
|
|
PPH_DUMP(
|
sblog << "boi node: ";
|
dump::dump_op(bop.n);
|
sblog << " invert: " << bop.invert << " int_cvt: " << bop.int_cvt;
|
sblog <<"\n";
|
);
|
|
unsigned newop;
|
|
switch(op_kind) {
|
case 1:
|
newop = get_predsetcc_op(cc, cmp_type);
|
break;
|
case 2:
|
newop = get_setcc_op(cc, cmp_type, dst_type != AF_FLOAT_DST);
|
break;
|
case 3:
|
newop = get_killcc_op(cc, cmp_type);
|
break;
|
default:
|
newop = ALU_OP0_NOP;
|
assert(!"invalid op kind");
|
break;
|
}
|
|
a->bc.set_op(newop);
|
|
if (swap_args) {
|
a->src[0] = bop.n->src[1];
|
a->src[1] = bop.n->src[0];
|
a->bc.src[0] = bop.n->bc.src[1];
|
a->bc.src[1] = bop.n->bc.src[0];
|
|
} else {
|
a->src[0] = bop.n->src[0];
|
a->src[1] = bop.n->src[1];
|
a->bc.src[0] = bop.n->bc.src[0];
|
a->bc.src[1] = bop.n->bc.src[1];
|
}
|
}
|
}
|
|
void peephole::optimize_CNDcc_op(alu_node* a) {
|
unsigned flags = a->bc.op_ptr->flags;
|
unsigned cc = flags & AF_CC_MASK;
|
unsigned cmp_type = flags & AF_CMP_TYPE_MASK;
|
bool swap = false;
|
|
if (cc == AF_CC_E) {
|
swap = !swap;
|
cc = AF_CC_NE;
|
} else if (cc != AF_CC_NE)
|
return;
|
|
value *s = a->src[0];
|
|
bool_op_info bop = {};
|
|
PPH_DUMP(
|
sblog << "cndcc: ";
|
dump::dump_op(a);
|
sblog << "\n";
|
);
|
|
if (!get_bool_op_info(s, bop))
|
return;
|
|
alu_node *d = bop.n;
|
|
if (d->bc.omod)
|
return;
|
|
PPH_DUMP(
|
sblog << "cndcc def: ";
|
dump::dump_op(d);
|
sblog << "\n";
|
);
|
|
|
unsigned dflags = d->bc.op_ptr->flags;
|
unsigned dcc = dflags & AF_CC_MASK;
|
unsigned dcmp_type = dflags & AF_CMP_TYPE_MASK;
|
unsigned ddst_type = dflags & AF_DST_TYPE_MASK;
|
int nds;
|
|
// TODO we can handle some of these cases,
|
// though probably this shouldn't happen
|
if (cmp_type != AF_FLOAT_CMP && ddst_type == AF_FLOAT_DST)
|
return;
|
|
if (d->src[0]->is_const() && d->src[0]->literal_value == literal(0))
|
nds = 1;
|
else if ((d->src[1]->is_const() &&
|
d->src[1]->literal_value == literal(0)))
|
nds = 0;
|
else
|
return;
|
|
// can't propagate ABS modifier to CNDcc because it's OP3
|
if (d->bc.src[nds].abs)
|
return;
|
|
// TODO we can handle some cases for uint comparison
|
if (dcmp_type == AF_UINT_CMP)
|
return;
|
|
if (dcc == AF_CC_NE) {
|
dcc = AF_CC_E;
|
swap = !swap;
|
}
|
|
if (nds == 1) {
|
switch (dcc) {
|
case AF_CC_GT: dcc = AF_CC_GE; swap = !swap; break;
|
case AF_CC_GE: dcc = AF_CC_GT; swap = !swap; break;
|
default: break;
|
}
|
}
|
|
a->src[0] = d->src[nds];
|
a->bc.src[0] = d->bc.src[nds];
|
|
if (swap) {
|
std::swap(a->src[1], a->src[2]);
|
std::swap(a->bc.src[1], a->bc.src[2]);
|
}
|
|
a->bc.set_op(get_cndcc_op(dcc, dcmp_type));
|
|
}
|
|
bool peephole::get_bool_flt_to_int_source(alu_node* &a) {
|
|
if (a->bc.op == ALU_OP1_FLT_TO_INT) {
|
|
if (a->bc.src[0].neg || a->bc.src[0].abs || a->bc.src[0].rel)
|
return false;
|
|
value *s = a->src[0];
|
if (!s || !s->def || !s->def->is_alu_inst())
|
return false;
|
|
alu_node *dn = static_cast<alu_node*>(s->def);
|
|
if (dn->is_alu_op(ALU_OP1_TRUNC)) {
|
s = dn->src[0];
|
if (!s || !s->def || !s->def->is_alu_inst())
|
return false;
|
|
if (dn->bc.src[0].neg != 1 || dn->bc.src[0].abs != 0 ||
|
dn->bc.src[0].rel != 0) {
|
return false;
|
}
|
|
dn = static_cast<alu_node*>(s->def);
|
|
}
|
|
if (dn->bc.op_ptr->flags & AF_SET) {
|
a = dn;
|
return true;
|
}
|
}
|
return false;
|
}
|
|
bool peephole::get_bool_op_info(value* b, bool_op_info& bop) {
|
|
node *d = b->def;
|
|
if (!d || !d->is_alu_inst())
|
return false;
|
|
alu_node *dn = static_cast<alu_node*>(d);
|
|
if (dn->bc.op_ptr->flags & AF_SET) {
|
bop.n = dn;
|
|
if (dn->bc.op_ptr->flags & AF_DX10)
|
bop.int_cvt = true;
|
|
return true;
|
}
|
|
if (get_bool_flt_to_int_source(dn)) {
|
bop.n = dn;
|
bop.int_cvt = true;
|
return true;
|
}
|
|
return false;
|
}
|
|
} // namespace r600_sb
|