/*
|
* Copyright (C) 2009 Nicolai Haehnle.
|
*
|
* All Rights Reserved.
|
*
|
* Permission is hereby granted, free of charge, to any person obtaining
|
* a copy of this software and associated documentation files (the
|
* "Software"), to deal in the Software without restriction, including
|
* without limitation the rights to use, copy, modify, merge, publish,
|
* distribute, sublicense, and/or sell copies of the Software, and to
|
* permit persons to whom the Software is furnished to do so, subject to
|
* the following conditions:
|
*
|
* The above copyright notice and this permission notice (including the
|
* next paragraph) shall be included in all copies or substantial
|
* portions of the Software.
|
*
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
|
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
*
|
*/
|
|
#include "radeon_dataflow.h"
|
|
#include "radeon_compiler.h"
|
|
|
struct updatemask_state {
|
unsigned char Output[RC_REGISTER_MAX_INDEX];
|
unsigned char Temporary[RC_REGISTER_MAX_INDEX];
|
unsigned char Address;
|
unsigned char Special[RC_NUM_SPECIAL_REGISTERS];
|
};
|
|
struct instruction_state {
|
unsigned char WriteMask:4;
|
unsigned char WriteALUResult:1;
|
unsigned char SrcReg[3];
|
};
|
|
struct loopinfo {
|
struct updatemask_state * Breaks;
|
unsigned int BreakCount;
|
unsigned int BreaksReserved;
|
};
|
|
struct branchinfo {
|
unsigned int HaveElse:1;
|
|
struct updatemask_state StoreEndif;
|
struct updatemask_state StoreElse;
|
};
|
|
struct deadcode_state {
|
struct radeon_compiler * C;
|
struct instruction_state * Instructions;
|
|
struct updatemask_state R;
|
|
struct branchinfo * BranchStack;
|
unsigned int BranchStackSize;
|
unsigned int BranchStackReserved;
|
|
struct loopinfo * LoopStack;
|
unsigned int LoopStackSize;
|
unsigned int LoopStackReserved;
|
};
|
|
|
static void or_updatemasks(
|
struct updatemask_state * dst,
|
struct updatemask_state * a,
|
struct updatemask_state * b)
|
{
|
for(unsigned int i = 0; i < RC_REGISTER_MAX_INDEX; ++i) {
|
dst->Output[i] = a->Output[i] | b->Output[i];
|
dst->Temporary[i] = a->Temporary[i] | b->Temporary[i];
|
}
|
|
for(unsigned int i = 0; i < RC_NUM_SPECIAL_REGISTERS; ++i)
|
dst->Special[i] = a->Special[i] | b->Special[i];
|
|
dst->Address = a->Address | b->Address;
|
}
|
|
static void push_break(struct deadcode_state *s)
|
{
|
struct loopinfo * loop = &s->LoopStack[s->LoopStackSize - 1];
|
memory_pool_array_reserve(&s->C->Pool, struct updatemask_state,
|
loop->Breaks, loop->BreakCount, loop->BreaksReserved, 1);
|
|
memcpy(&loop->Breaks[loop->BreakCount++], &s->R, sizeof(s->R));
|
}
|
|
static void push_loop(struct deadcode_state * s)
|
{
|
memory_pool_array_reserve(&s->C->Pool, struct loopinfo, s->LoopStack,
|
s->LoopStackSize, s->LoopStackReserved, 1);
|
memset(&s->LoopStack[s->LoopStackSize++], 0, sizeof(struct loopinfo));
|
}
|
|
static void push_branch(struct deadcode_state * s)
|
{
|
struct branchinfo * branch;
|
|
memory_pool_array_reserve(&s->C->Pool, struct branchinfo, s->BranchStack,
|
s->BranchStackSize, s->BranchStackReserved, 1);
|
|
branch = &s->BranchStack[s->BranchStackSize++];
|
branch->HaveElse = 0;
|
memcpy(&branch->StoreEndif, &s->R, sizeof(s->R));
|
}
|
|
static unsigned char * get_used_ptr(struct deadcode_state *s, rc_register_file file, unsigned int index)
|
{
|
if (file == RC_FILE_OUTPUT || file == RC_FILE_TEMPORARY) {
|
if (index >= RC_REGISTER_MAX_INDEX) {
|
rc_error(s->C, "%s: index %i is out of bounds for file %i\n", __FUNCTION__, index, file);
|
return 0;
|
}
|
|
if (file == RC_FILE_OUTPUT)
|
return &s->R.Output[index];
|
else
|
return &s->R.Temporary[index];
|
} else if (file == RC_FILE_ADDRESS) {
|
return &s->R.Address;
|
} else if (file == RC_FILE_SPECIAL) {
|
if (index >= RC_NUM_SPECIAL_REGISTERS) {
|
rc_error(s->C, "%s: special file index %i out of bounds\n", __FUNCTION__, index);
|
return 0;
|
}
|
|
return &s->R.Special[index];
|
}
|
|
return 0;
|
}
|
|
static void mark_used(struct deadcode_state * s, rc_register_file file, unsigned int index, unsigned int mask)
|
{
|
unsigned char * pused = get_used_ptr(s, file, index);
|
if (pused)
|
*pused |= mask;
|
}
|
|
static void update_instruction(struct deadcode_state * s, struct rc_instruction * inst)
|
{
|
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
|
struct instruction_state * insts = &s->Instructions[inst->IP];
|
unsigned int usedmask = 0;
|
unsigned int srcmasks[3];
|
|
if (opcode->HasDstReg) {
|
unsigned char * pused = get_used_ptr(s, inst->U.I.DstReg.File, inst->U.I.DstReg.Index);
|
if (pused) {
|
usedmask = *pused & inst->U.I.DstReg.WriteMask;
|
*pused &= ~usedmask;
|
}
|
}
|
|
insts->WriteMask |= usedmask;
|
|
if (inst->U.I.WriteALUResult) {
|
unsigned char * pused = get_used_ptr(s, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT);
|
if (pused && *pused) {
|
if (inst->U.I.WriteALUResult == RC_ALURESULT_X)
|
usedmask |= RC_MASK_X;
|
else if (inst->U.I.WriteALUResult == RC_ALURESULT_W)
|
usedmask |= RC_MASK_W;
|
|
*pused = 0;
|
insts->WriteALUResult = 1;
|
}
|
}
|
|
rc_compute_sources_for_writemask(inst, usedmask, srcmasks);
|
|
for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
|
unsigned int refmask = 0;
|
unsigned int newsrcmask = srcmasks[src] & ~insts->SrcReg[src];
|
insts->SrcReg[src] |= newsrcmask;
|
|
for(unsigned int chan = 0; chan < 4; ++chan) {
|
if (GET_BIT(newsrcmask, chan))
|
refmask |= 1 << GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan);
|
}
|
|
/* get rid of spurious bits from ZERO, ONE, etc. swizzles */
|
refmask &= RC_MASK_XYZW;
|
|
if (!refmask)
|
continue;
|
|
mark_used(s, inst->U.I.SrcReg[src].File, inst->U.I.SrcReg[src].Index, refmask);
|
|
if (inst->U.I.SrcReg[src].RelAddr)
|
mark_used(s, RC_FILE_ADDRESS, 0, RC_MASK_X);
|
}
|
}
|
|
static void mark_output_use(void * data, unsigned int index, unsigned int mask)
|
{
|
struct deadcode_state * s = data;
|
|
mark_used(s, RC_FILE_OUTPUT, index, mask);
|
}
|
|
void rc_dataflow_deadcode(struct radeon_compiler * c, void *user)
|
{
|
struct deadcode_state s;
|
unsigned int nr_instructions;
|
rc_dataflow_mark_outputs_fn dce = (rc_dataflow_mark_outputs_fn)user;
|
unsigned int ip;
|
|
memset(&s, 0, sizeof(s));
|
s.C = c;
|
|
nr_instructions = rc_recompute_ips(c);
|
s.Instructions = memory_pool_malloc(&c->Pool, sizeof(struct instruction_state)*nr_instructions);
|
memset(s.Instructions, 0, sizeof(struct instruction_state)*nr_instructions);
|
|
dce(c, &s, &mark_output_use);
|
|
for(struct rc_instruction * inst = c->Program.Instructions.Prev;
|
inst != &c->Program.Instructions;
|
inst = inst->Prev) {
|
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
|
|
switch(opcode->Opcode){
|
/* Mark all sources in the loop body as used before doing
|
* normal deadcode analysis. This is probably not optimal.
|
*/
|
case RC_OPCODE_ENDLOOP:
|
{
|
int endloops = 1;
|
struct rc_instruction *ptr;
|
for(ptr = inst->Prev; endloops > 0; ptr = ptr->Prev){
|
opcode = rc_get_opcode_info(ptr->U.I.Opcode);
|
if(ptr->U.I.Opcode == RC_OPCODE_BGNLOOP){
|
endloops--;
|
continue;
|
}
|
if(ptr->U.I.Opcode == RC_OPCODE_ENDLOOP){
|
endloops++;
|
continue;
|
}
|
if(opcode->HasDstReg){
|
int src = 0;
|
unsigned int srcmasks[3];
|
rc_compute_sources_for_writemask(ptr,
|
ptr->U.I.DstReg.WriteMask, srcmasks);
|
for(src=0; src < opcode->NumSrcRegs; src++){
|
mark_used(&s,
|
ptr->U.I.SrcReg[src].File,
|
ptr->U.I.SrcReg[src].Index,
|
srcmasks[src]);
|
}
|
}
|
}
|
push_loop(&s);
|
break;
|
}
|
case RC_OPCODE_BRK:
|
push_break(&s);
|
break;
|
case RC_OPCODE_BGNLOOP:
|
{
|
unsigned int i;
|
struct loopinfo * loop = &s.LoopStack[s.LoopStackSize-1];
|
for(i = 0; i < loop->BreakCount; i++) {
|
or_updatemasks(&s.R, &s.R, &loop->Breaks[i]);
|
}
|
break;
|
}
|
case RC_OPCODE_CONT:
|
break;
|
case RC_OPCODE_ENDIF:
|
push_branch(&s);
|
break;
|
default:
|
if (opcode->IsFlowControl && s.BranchStackSize) {
|
struct branchinfo * branch = &s.BranchStack[s.BranchStackSize-1];
|
if (opcode->Opcode == RC_OPCODE_IF) {
|
or_updatemasks(&s.R,
|
&s.R,
|
branch->HaveElse ? &branch->StoreElse : &branch->StoreEndif);
|
|
s.BranchStackSize--;
|
} else if (opcode->Opcode == RC_OPCODE_ELSE) {
|
if (branch->HaveElse) {
|
rc_error(c, "%s: Multiple ELSE for one IF/ENDIF\n", __FUNCTION__);
|
} else {
|
memcpy(&branch->StoreElse, &s.R, sizeof(s.R));
|
memcpy(&s.R, &branch->StoreEndif, sizeof(s.R));
|
branch->HaveElse = 1;
|
}
|
} else {
|
rc_error(c, "%s: Unhandled control flow instruction %s\n", __FUNCTION__, opcode->Name);
|
}
|
}
|
}
|
|
update_instruction(&s, inst);
|
}
|
|
ip = 0;
|
for(struct rc_instruction * inst = c->Program.Instructions.Next;
|
inst != &c->Program.Instructions;
|
inst = inst->Next, ++ip) {
|
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
|
int dead = 1;
|
unsigned int srcmasks[3];
|
unsigned int usemask;
|
|
if (!opcode->HasDstReg) {
|
dead = 0;
|
} else {
|
inst->U.I.DstReg.WriteMask = s.Instructions[ip].WriteMask;
|
if (s.Instructions[ip].WriteMask)
|
dead = 0;
|
|
if (s.Instructions[ip].WriteALUResult)
|
dead = 0;
|
else
|
inst->U.I.WriteALUResult = RC_ALURESULT_NONE;
|
}
|
|
if (dead) {
|
struct rc_instruction * todelete = inst;
|
inst = inst->Prev;
|
rc_remove_instruction(todelete);
|
continue;
|
}
|
|
usemask = s.Instructions[ip].WriteMask;
|
|
if (inst->U.I.WriteALUResult == RC_ALURESULT_X)
|
usemask |= RC_MASK_X;
|
else if (inst->U.I.WriteALUResult == RC_ALURESULT_W)
|
usemask |= RC_MASK_W;
|
|
rc_compute_sources_for_writemask(inst, usemask, srcmasks);
|
|
for(unsigned int src = 0; src < 3; ++src) {
|
for(unsigned int chan = 0; chan < 4; ++chan) {
|
if (!GET_BIT(srcmasks[src], chan))
|
SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan, RC_SWIZZLE_UNUSED);
|
}
|
}
|
}
|
|
rc_calculate_inputs_outputs(c);
|
}
|