/*
|
* Copyright (C) 2009 Nicolai Haehnle.
|
*
|
* All Rights Reserved.
|
*
|
* Permission is hereby granted, free of charge, to any person obtaining
|
* a copy of this software and associated documentation files (the
|
* "Software"), to deal in the Software without restriction, including
|
* without limitation the rights to use, copy, modify, merge, publish,
|
* distribute, sublicense, and/or sell copies of the Software, and to
|
* permit persons to whom the Software is furnished to do so, subject to
|
* the following conditions:
|
*
|
* The above copyright notice and this permission notice (including the
|
* next paragraph) shall be included in all copies or substantial
|
* portions of the Software.
|
*
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
|
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
*
|
*/
|
|
#include "radeon_program_pair.h"
|
|
#include "radeon_compiler.h"
|
#include "radeon_compiler_util.h"
|
|
|
/**
|
* Finally rewrite ADD, MOV, MUL as the appropriate native instruction
|
* and reverse the order of arguments for CMP.
|
*/
|
static void final_rewrite(struct rc_sub_instruction *inst)
|
{
|
struct rc_src_register tmp;
|
|
switch(inst->Opcode) {
|
case RC_OPCODE_ADD:
|
inst->SrcReg[2] = inst->SrcReg[1];
|
inst->SrcReg[1].File = RC_FILE_NONE;
|
inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111;
|
inst->SrcReg[1].Negate = RC_MASK_NONE;
|
inst->Opcode = RC_OPCODE_MAD;
|
break;
|
case RC_OPCODE_CMP:
|
tmp = inst->SrcReg[2];
|
inst->SrcReg[2] = inst->SrcReg[0];
|
inst->SrcReg[0] = tmp;
|
break;
|
case RC_OPCODE_MOV:
|
/* AMD say we should use CMP.
|
* However, when we transform
|
* KIL -r0;
|
* into
|
* CMP tmp, -r0, -r0, 0;
|
* KIL tmp;
|
* we get incorrect behaviour on R500 when r0 == 0.0.
|
* It appears that the R500 KIL hardware treats -0.0 as less
|
* than zero.
|
*/
|
inst->SrcReg[1].File = RC_FILE_NONE;
|
inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111;
|
inst->SrcReg[2].File = RC_FILE_NONE;
|
inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000;
|
inst->Opcode = RC_OPCODE_MAD;
|
break;
|
case RC_OPCODE_MUL:
|
inst->SrcReg[2].File = RC_FILE_NONE;
|
inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000;
|
inst->Opcode = RC_OPCODE_MAD;
|
break;
|
default:
|
/* nothing to do */
|
break;
|
}
|
}
|
|
|
/**
|
* Classify an instruction according to which ALUs etc. it needs
|
*/
|
static void classify_instruction(struct rc_sub_instruction * inst,
|
int * needrgb, int * needalpha, int * istranscendent)
|
{
|
*needrgb = (inst->DstReg.WriteMask & RC_MASK_XYZ) ? 1 : 0;
|
*needalpha = (inst->DstReg.WriteMask & RC_MASK_W) ? 1 : 0;
|
*istranscendent = 0;
|
|
if (inst->WriteALUResult == RC_ALURESULT_X)
|
*needrgb = 1;
|
else if (inst->WriteALUResult == RC_ALURESULT_W)
|
*needalpha = 1;
|
|
switch(inst->Opcode) {
|
case RC_OPCODE_ADD:
|
case RC_OPCODE_CMP:
|
case RC_OPCODE_CND:
|
case RC_OPCODE_DDX:
|
case RC_OPCODE_DDY:
|
case RC_OPCODE_FRC:
|
case RC_OPCODE_MAD:
|
case RC_OPCODE_MAX:
|
case RC_OPCODE_MIN:
|
case RC_OPCODE_MOV:
|
case RC_OPCODE_MUL:
|
break;
|
case RC_OPCODE_COS:
|
case RC_OPCODE_EX2:
|
case RC_OPCODE_LG2:
|
case RC_OPCODE_RCP:
|
case RC_OPCODE_RSQ:
|
case RC_OPCODE_SIN:
|
*istranscendent = 1;
|
*needalpha = 1;
|
break;
|
case RC_OPCODE_DP4:
|
*needalpha = 1;
|
/* fall through */
|
case RC_OPCODE_DP3:
|
*needrgb = 1;
|
break;
|
default:
|
break;
|
}
|
}
|
|
static void src_uses(struct rc_src_register src, unsigned int * rgb,
|
unsigned int * alpha)
|
{
|
int j;
|
for(j = 0; j < 4; ++j) {
|
unsigned int swz = GET_SWZ(src.Swizzle, j);
|
if (swz < 3)
|
*rgb = 1;
|
else if (swz < 4)
|
*alpha = 1;
|
}
|
}
|
|
/**
|
* Fill the given ALU instruction's opcodes and source operands into the given pair,
|
* if possible.
|
*/
|
static void set_pair_instruction(struct r300_fragment_program_compiler *c,
|
struct rc_pair_instruction * pair,
|
struct rc_sub_instruction * inst)
|
{
|
int needrgb, needalpha, istranscendent;
|
const struct rc_opcode_info * opcode;
|
int i;
|
|
memset(pair, 0, sizeof(struct rc_pair_instruction));
|
|
classify_instruction(inst, &needrgb, &needalpha, &istranscendent);
|
|
if (needrgb) {
|
if (istranscendent)
|
pair->RGB.Opcode = RC_OPCODE_REPL_ALPHA;
|
else
|
pair->RGB.Opcode = inst->Opcode;
|
if (inst->SaturateMode == RC_SATURATE_ZERO_ONE)
|
pair->RGB.Saturate = 1;
|
}
|
if (needalpha) {
|
pair->Alpha.Opcode = inst->Opcode;
|
if (inst->SaturateMode == RC_SATURATE_ZERO_ONE)
|
pair->Alpha.Saturate = 1;
|
}
|
|
opcode = rc_get_opcode_info(inst->Opcode);
|
|
/* Presubtract handling:
|
* We need to make sure that the values used by the presubtract
|
* operation end up in src0 or src1. */
|
if(inst->PreSub.Opcode != RC_PRESUB_NONE) {
|
/* rc_pair_alloc_source() will fill in data for
|
* pair->{RGB,ALPHA}.Src[RC_PAIR_PRESUB_SRC] */
|
int j;
|
for(j = 0; j < 3; j++) {
|
int src_regs;
|
if(inst->SrcReg[j].File != RC_FILE_PRESUB)
|
continue;
|
|
src_regs = rc_presubtract_src_reg_count(
|
inst->PreSub.Opcode);
|
for(i = 0; i < src_regs; i++) {
|
unsigned int rgb = 0;
|
unsigned int alpha = 0;
|
src_uses(inst->SrcReg[j], &rgb, &alpha);
|
if(rgb) {
|
pair->RGB.Src[i].File =
|
inst->PreSub.SrcReg[i].File;
|
pair->RGB.Src[i].Index =
|
inst->PreSub.SrcReg[i].Index;
|
pair->RGB.Src[i].Used = 1;
|
}
|
if(alpha) {
|
pair->Alpha.Src[i].File =
|
inst->PreSub.SrcReg[i].File;
|
pair->Alpha.Src[i].Index =
|
inst->PreSub.SrcReg[i].Index;
|
pair->Alpha.Src[i].Used = 1;
|
}
|
}
|
}
|
}
|
|
for(i = 0; i < opcode->NumSrcRegs; ++i) {
|
int source;
|
if (needrgb && !istranscendent) {
|
unsigned int srcrgb = 0;
|
unsigned int srcalpha = 0;
|
unsigned int srcmask = 0;
|
int j;
|
/* We don't care about the alpha channel here. We only
|
* want the part of the swizzle that writes to rgb,
|
* since we are creating an rgb instruction. */
|
for(j = 0; j < 3; ++j) {
|
unsigned int swz = GET_SWZ(inst->SrcReg[i].Swizzle, j);
|
|
if (swz < RC_SWIZZLE_W)
|
srcrgb = 1;
|
else if (swz == RC_SWIZZLE_W)
|
srcalpha = 1;
|
|
if (swz < RC_SWIZZLE_UNUSED)
|
srcmask |= 1 << j;
|
}
|
source = rc_pair_alloc_source(pair, srcrgb, srcalpha,
|
inst->SrcReg[i].File, inst->SrcReg[i].Index);
|
if (source < 0) {
|
rc_error(&c->Base, "Failed to translate "
|
"rgb instruction.\n");
|
return;
|
}
|
pair->RGB.Arg[i].Source = source;
|
pair->RGB.Arg[i].Swizzle =
|
rc_init_swizzle(inst->SrcReg[i].Swizzle, 3);
|
pair->RGB.Arg[i].Abs = inst->SrcReg[i].Abs;
|
pair->RGB.Arg[i].Negate = !!(srcmask & inst->SrcReg[i].Negate & (RC_MASK_X | RC_MASK_Y | RC_MASK_Z));
|
}
|
if (needalpha) {
|
unsigned int srcrgb = 0;
|
unsigned int srcalpha = 0;
|
unsigned int swz;
|
if (istranscendent) {
|
swz = rc_get_scalar_src_swz(inst->SrcReg[i].Swizzle);
|
} else {
|
swz = GET_SWZ(inst->SrcReg[i].Swizzle, 3);
|
}
|
|
if (swz < 3)
|
srcrgb = 1;
|
else if (swz < 4)
|
srcalpha = 1;
|
source = rc_pair_alloc_source(pair, srcrgb, srcalpha,
|
inst->SrcReg[i].File, inst->SrcReg[i].Index);
|
if (source < 0) {
|
rc_error(&c->Base, "Failed to translate "
|
"alpha instruction.\n");
|
return;
|
}
|
pair->Alpha.Arg[i].Source = source;
|
pair->Alpha.Arg[i].Swizzle = rc_init_swizzle(swz, 1);
|
pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs;
|
|
if (istranscendent) {
|
pair->Alpha.Arg[i].Negate =
|
!!(inst->SrcReg[i].Negate &
|
inst->DstReg.WriteMask);
|
} else {
|
pair->Alpha.Arg[i].Negate =
|
!!(inst->SrcReg[i].Negate & RC_MASK_W);
|
}
|
}
|
}
|
|
/* Destination handling */
|
if (inst->DstReg.File == RC_FILE_OUTPUT) {
|
if (inst->DstReg.Index == c->OutputDepth) {
|
pair->Alpha.DepthWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3);
|
} else {
|
for (i = 0; i < 4; i++) {
|
if (inst->DstReg.Index == c->OutputColor[i]) {
|
pair->RGB.Target = i;
|
pair->Alpha.Target = i;
|
pair->RGB.OutputWriteMask |=
|
inst->DstReg.WriteMask & RC_MASK_XYZ;
|
pair->Alpha.OutputWriteMask |=
|
GET_BIT(inst->DstReg.WriteMask, 3);
|
break;
|
}
|
}
|
}
|
} else {
|
if (needrgb) {
|
pair->RGB.DestIndex = inst->DstReg.Index;
|
pair->RGB.WriteMask |= inst->DstReg.WriteMask & RC_MASK_XYZ;
|
}
|
|
if (needalpha) {
|
pair->Alpha.WriteMask |= (GET_BIT(inst->DstReg.WriteMask, 3) << 3);
|
if (pair->Alpha.WriteMask) {
|
pair->Alpha.DestIndex = inst->DstReg.Index;
|
}
|
}
|
}
|
|
if (needrgb) {
|
pair->RGB.Omod = inst->Omod;
|
}
|
if (needalpha) {
|
pair->Alpha.Omod = inst->Omod;
|
}
|
|
if (inst->WriteALUResult) {
|
pair->WriteALUResult = inst->WriteALUResult;
|
pair->ALUResultCompare = inst->ALUResultCompare;
|
}
|
}
|
|
|
static void check_opcode_support(struct r300_fragment_program_compiler *c,
|
struct rc_sub_instruction *inst)
|
{
|
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);
|
|
if (opcode->HasDstReg) {
|
if (inst->SaturateMode == RC_SATURATE_MINUS_PLUS_ONE) {
|
rc_error(&c->Base, "Fragment program does not support signed Saturate.\n");
|
return;
|
}
|
}
|
|
for (unsigned i = 0; i < opcode->NumSrcRegs; i++) {
|
if (inst->SrcReg[i].RelAddr) {
|
rc_error(&c->Base, "Fragment program does not support relative addressing "
|
" of source operands.\n");
|
return;
|
}
|
}
|
}
|
|
|
/**
|
* Translate all ALU instructions into corresponding pair instructions,
|
* performing no other changes.
|
*/
|
void rc_pair_translate(struct radeon_compiler *cc, void *user)
|
{
|
struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc;
|
|
for(struct rc_instruction * inst = c->Base.Program.Instructions.Next;
|
inst != &c->Base.Program.Instructions;
|
inst = inst->Next) {
|
const struct rc_opcode_info * opcode;
|
struct rc_sub_instruction copy;
|
|
if (inst->Type != RC_INSTRUCTION_NORMAL)
|
continue;
|
|
opcode = rc_get_opcode_info(inst->U.I.Opcode);
|
|
if (opcode->HasTexture || opcode->IsFlowControl || opcode->Opcode == RC_OPCODE_KIL)
|
continue;
|
|
copy = inst->U.I;
|
|
check_opcode_support(c, ©);
|
|
final_rewrite(©);
|
inst->Type = RC_INSTRUCTION_PAIR;
|
set_pair_instruction(c, &inst->U.P, ©);
|
}
|
}
|