/****************************************************************************
|
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
*
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
* copy of this software and associated documentation files (the "Software"),
|
* to deal in the Software without restriction, including without limitation
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
* and/or sell copies of the Software, and to permit persons to whom the
|
* Software is furnished to do so, subject to the following conditions:
|
*
|
* The above copyright notice and this permission notice (including the next
|
* paragraph) shall be included in all copies or substantial portions of the
|
* Software.
|
*
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
* IN THE SOFTWARE.
|
*
|
* @file state.h
|
*
|
* @brief Definitions for API state.
|
*
|
******************************************************************************/
|
#pragma once
|
|
#include "common/formats.h"
|
#include "common/intrin.h"
|
using gfxptr_t = unsigned long long;
|
#include <functional>
|
#include <algorithm>
|
|
//////////////////////////////////////////////////////////////////////////
|
/// PRIMITIVE_TOPOLOGY.
|
//////////////////////////////////////////////////////////////////////////
|
enum PRIMITIVE_TOPOLOGY
|
{
|
TOP_UNKNOWN = 0x0,
|
TOP_POINT_LIST = 0x1,
|
TOP_LINE_LIST = 0x2,
|
TOP_LINE_STRIP = 0x3,
|
TOP_TRIANGLE_LIST = 0x4,
|
TOP_TRIANGLE_STRIP = 0x5,
|
TOP_TRIANGLE_FAN = 0x6,
|
TOP_QUAD_LIST = 0x7,
|
TOP_QUAD_STRIP = 0x8,
|
TOP_LINE_LIST_ADJ = 0x9,
|
TOP_LISTSTRIP_ADJ = 0xA,
|
TOP_TRI_LIST_ADJ = 0xB,
|
TOP_TRI_STRIP_ADJ = 0xC,
|
TOP_TRI_STRIP_REVERSE = 0xD,
|
TOP_POLYGON = 0xE,
|
TOP_RECT_LIST = 0xF,
|
TOP_LINE_LOOP = 0x10,
|
TOP_POINT_LIST_BF = 0x11,
|
TOP_LINE_STRIP_CONT = 0x12,
|
TOP_LINE_STRIP_BF = 0x13,
|
TOP_LINE_STRIP_CONT_BF = 0x14,
|
TOP_TRIANGLE_FAN_NOSTIPPLE = 0x16,
|
TOP_TRIANGLE_DISC = 0x17, /// @todo What is this??
|
|
TOP_PATCHLIST_BASE = 0x1F, // Invalid topology, used to calculate num verts for a patchlist.
|
TOP_PATCHLIST_1 = 0x20, // List of 1-vertex patches
|
TOP_PATCHLIST_2 = 0x21,
|
TOP_PATCHLIST_3 = 0x22,
|
TOP_PATCHLIST_4 = 0x23,
|
TOP_PATCHLIST_5 = 0x24,
|
TOP_PATCHLIST_6 = 0x25,
|
TOP_PATCHLIST_7 = 0x26,
|
TOP_PATCHLIST_8 = 0x27,
|
TOP_PATCHLIST_9 = 0x28,
|
TOP_PATCHLIST_10 = 0x29,
|
TOP_PATCHLIST_11 = 0x2A,
|
TOP_PATCHLIST_12 = 0x2B,
|
TOP_PATCHLIST_13 = 0x2C,
|
TOP_PATCHLIST_14 = 0x2D,
|
TOP_PATCHLIST_15 = 0x2E,
|
TOP_PATCHLIST_16 = 0x2F,
|
TOP_PATCHLIST_17 = 0x30,
|
TOP_PATCHLIST_18 = 0x31,
|
TOP_PATCHLIST_19 = 0x32,
|
TOP_PATCHLIST_20 = 0x33,
|
TOP_PATCHLIST_21 = 0x34,
|
TOP_PATCHLIST_22 = 0x35,
|
TOP_PATCHLIST_23 = 0x36,
|
TOP_PATCHLIST_24 = 0x37,
|
TOP_PATCHLIST_25 = 0x38,
|
TOP_PATCHLIST_26 = 0x39,
|
TOP_PATCHLIST_27 = 0x3A,
|
TOP_PATCHLIST_28 = 0x3B,
|
TOP_PATCHLIST_29 = 0x3C,
|
TOP_PATCHLIST_30 = 0x3D,
|
TOP_PATCHLIST_31 = 0x3E,
|
TOP_PATCHLIST_32 = 0x3F, // List of 32-vertex patches
|
};
|
|
//////////////////////////////////////////////////////////////////////////
|
/// SWR_SHADER_TYPE
|
//////////////////////////////////////////////////////////////////////////
|
enum SWR_SHADER_TYPE
|
{
|
SHADER_VERTEX,
|
SHADER_GEOMETRY,
|
SHADER_DOMAIN,
|
SHADER_HULL,
|
SHADER_PIXEL,
|
SHADER_COMPUTE,
|
|
NUM_SHADER_TYPES,
|
};
|
|
//////////////////////////////////////////////////////////////////////////
|
/// SWR_RENDERTARGET_ATTACHMENT
|
/// @todo Its not clear what an "attachment" means. Its not common term.
|
//////////////////////////////////////////////////////////////////////////
|
enum SWR_RENDERTARGET_ATTACHMENT
|
{
|
SWR_ATTACHMENT_COLOR0,
|
SWR_ATTACHMENT_COLOR1,
|
SWR_ATTACHMENT_COLOR2,
|
SWR_ATTACHMENT_COLOR3,
|
SWR_ATTACHMENT_COLOR4,
|
SWR_ATTACHMENT_COLOR5,
|
SWR_ATTACHMENT_COLOR6,
|
SWR_ATTACHMENT_COLOR7,
|
SWR_ATTACHMENT_DEPTH,
|
SWR_ATTACHMENT_STENCIL,
|
|
SWR_NUM_ATTACHMENTS
|
};
|
|
#define SWR_NUM_RENDERTARGETS 8
|
|
#define SWR_ATTACHMENT_COLOR0_BIT 0x001
|
#define SWR_ATTACHMENT_COLOR1_BIT 0x002
|
#define SWR_ATTACHMENT_COLOR2_BIT 0x004
|
#define SWR_ATTACHMENT_COLOR3_BIT 0x008
|
#define SWR_ATTACHMENT_COLOR4_BIT 0x010
|
#define SWR_ATTACHMENT_COLOR5_BIT 0x020
|
#define SWR_ATTACHMENT_COLOR6_BIT 0x040
|
#define SWR_ATTACHMENT_COLOR7_BIT 0x080
|
#define SWR_ATTACHMENT_DEPTH_BIT 0x100
|
#define SWR_ATTACHMENT_STENCIL_BIT 0x200
|
#define SWR_ATTACHMENT_MASK_ALL 0x3ff
|
#define SWR_ATTACHMENT_MASK_COLOR 0x0ff
|
|
|
//////////////////////////////////////////////////////////////////////////
|
/// @brief SWR Inner Tessellation factor ID
|
/// See above GetTessFactorOutputPosition code for documentation
|
enum SWR_INNER_TESSFACTOR_ID
|
{
|
SWR_QUAD_U_TRI_INSIDE,
|
SWR_QUAD_V_INSIDE,
|
|
SWR_NUM_INNER_TESS_FACTORS,
|
};
|
|
//////////////////////////////////////////////////////////////////////////
|
/// @brief SWR Outer Tessellation factor ID
|
/// See above GetTessFactorOutputPosition code for documentation
|
enum SWR_OUTER_TESSFACTOR_ID
|
{
|
SWR_QUAD_U_EQ0_TRI_U_LINE_DETAIL,
|
SWR_QUAD_V_EQ0_TRI_V_LINE_DENSITY,
|
SWR_QUAD_U_EQ1_TRI_W,
|
SWR_QUAD_V_EQ1,
|
|
SWR_NUM_OUTER_TESS_FACTORS,
|
};
|
|
|
/////////////////////////////////////////////////////////////////////////
|
/// simdvertex
|
/// @brief Defines a vertex element that holds all the data for SIMD vertices.
|
/// Contains space for position, SGV, and 32 generic attributes
|
/////////////////////////////////////////////////////////////////////////
|
enum SWR_VTX_SLOTS
|
{
|
VERTEX_SGV_SLOT = 0,
|
VERTEX_SGV_RTAI_COMP = 0,
|
VERTEX_SGV_VAI_COMP = 1,
|
VERTEX_SGV_POINT_SIZE_COMP = 2,
|
VERTEX_POSITION_SLOT = 1,
|
VERTEX_POSITION_END_SLOT = 1,
|
VERTEX_CLIPCULL_DIST_LO_SLOT = (1 + VERTEX_POSITION_END_SLOT), // VS writes lower 4 clip/cull dist
|
VERTEX_CLIPCULL_DIST_HI_SLOT = (2 + VERTEX_POSITION_END_SLOT), // VS writes upper 4 clip/cull dist
|
VERTEX_ATTRIB_START_SLOT = (3 + VERTEX_POSITION_END_SLOT),
|
VERTEX_ATTRIB_END_SLOT = (34 + VERTEX_POSITION_END_SLOT),
|
SWR_VTX_NUM_SLOTS = (1 + VERTEX_ATTRIB_END_SLOT)
|
};
|
|
// SoAoSoA
|
struct simdvertex
|
{
|
simdvector attrib[SWR_VTX_NUM_SLOTS];
|
};
|
|
#if ENABLE_AVX512_SIMD16
|
struct simd16vertex
|
{
|
simd16vector attrib[SWR_VTX_NUM_SLOTS];
|
};
|
|
#endif
|
|
template<typename SIMD_T>
|
struct SIMDVERTEX_T
|
{
|
typename SIMD_T::Vec4 attrib[SWR_VTX_NUM_SLOTS];
|
};
|
|
//////////////////////////////////////////////////////////////////////////
|
/// SWR_VS_CONTEXT
|
/// @brief Input to vertex shader
|
/////////////////////////////////////////////////////////////////////////
|
struct SWR_VS_CONTEXT
|
{
|
simdvertex* pVin; // IN: SIMD input vertex data store
|
simdvertex* pVout; // OUT: SIMD output vertex data store
|
|
uint32_t InstanceID; // IN: Instance ID, constant across all verts of the SIMD
|
simdscalari VertexID; // IN: Vertex ID
|
simdscalari mask; // IN: Active mask for shader
|
#if USE_SIMD16_FRONTEND
|
uint32_t AlternateOffset; // IN: amount to offset for interleaving even/odd simd8 in simd16vertex output
|
#if USE_SIMD16_VS
|
simd16scalari mask16; // IN: Active mask for shader (16-wide)
|
simd16scalari VertexID16; // IN: Vertex ID (16-wide)
|
#endif
|
#endif
|
};
|
|
/////////////////////////////////////////////////////////////////////////
|
/// ScalarCPoint
|
/// @brief defines a control point element as passed from the output
|
/// of the hull shader to the input of the domain shader
|
/////////////////////////////////////////////////////////////////////////
|
struct ScalarAttrib
|
{
|
float x;
|
float y;
|
float z;
|
float w;
|
};
|
|
struct ScalarCPoint
|
{
|
ScalarAttrib attrib[SWR_VTX_NUM_SLOTS];
|
};
|
|
//////////////////////////////////////////////////////////////////////////
|
/// SWR_TESSELLATION_FACTORS
|
/// @brief Tessellation factors structure (non-vector)
|
/////////////////////////////////////////////////////////////////////////
|
struct SWR_TESSELLATION_FACTORS
|
{
|
float OuterTessFactors[SWR_NUM_OUTER_TESS_FACTORS];
|
float InnerTessFactors[SWR_NUM_INNER_TESS_FACTORS];
|
};
|
|
#define MAX_NUM_VERTS_PER_PRIM 32 // support up to 32 control point patches
|
struct ScalarPatch
|
{
|
SWR_TESSELLATION_FACTORS tessFactors;
|
ScalarCPoint cp[MAX_NUM_VERTS_PER_PRIM];
|
ScalarCPoint patchData;
|
};
|
|
//////////////////////////////////////////////////////////////////////////
|
/// SWR_HS_CONTEXT
|
/// @brief Input to hull shader
|
/////////////////////////////////////////////////////////////////////////
|
struct SWR_HS_CONTEXT
|
{
|
simdvertex vert[MAX_NUM_VERTS_PER_PRIM]; // IN: (SIMD) input primitive data
|
simdscalari PrimitiveID; // IN: (SIMD) primitive ID generated from the draw call
|
simdscalari mask; // IN: Active mask for shader
|
ScalarPatch* pCPout; // OUT: Output control point patch
|
// SIMD-sized-array of SCALAR patches
|
};
|
|
//////////////////////////////////////////////////////////////////////////
|
/// SWR_DS_CONTEXT
|
/// @brief Input to domain shader
|
/////////////////////////////////////////////////////////////////////////
|
struct SWR_DS_CONTEXT
|
{
|
uint32_t PrimitiveID; // IN: (SCALAR) PrimitiveID for the patch associated with the DS invocation
|
uint32_t vectorOffset; // IN: (SCALAR) vector index offset into SIMD data.
|
uint32_t vectorStride; // IN: (SCALAR) stride (in vectors) of output data per attribute-component
|
uint32_t outVertexAttribOffset; // IN: (SCALAR) Offset to the attributes as processed by the next shader stage.
|
ScalarPatch* pCpIn; // IN: (SCALAR) Control patch
|
simdscalar* pDomainU; // IN: (SIMD) Domain Point U coords
|
simdscalar* pDomainV; // IN: (SIMD) Domain Point V coords
|
simdscalari mask; // IN: Active mask for shader
|
simdscalar* pOutputData; // OUT: (SIMD) Vertex Attributes (2D array of vectors, one row per attribute-component)
|
};
|
|
//////////////////////////////////////////////////////////////////////////
|
/// SWR_GS_CONTEXT
|
/// @brief Input to geometry shader.
|
/////////////////////////////////////////////////////////////////////////
|
struct SWR_GS_CONTEXT
|
{
|
simdvector* pVerts; // IN: input primitive data for SIMD prims
|
uint32_t inputVertStride; // IN: input vertex stride, in attributes
|
simdscalari PrimitiveID; // IN: input primitive ID generated from the draw call
|
uint32_t InstanceID; // IN: input instance ID
|
simdscalari mask; // IN: Active mask for shader
|
uint8_t* pStreams[KNOB_SIMD_WIDTH]; // OUT: output stream (contains vertices for all output streams)
|
};
|
|
struct PixelPositions
|
{
|
simdscalar UL;
|
simdscalar center;
|
simdscalar sample;
|
simdscalar centroid;
|
};
|
|
#define SWR_MAX_NUM_MULTISAMPLES 16
|
|
//////////////////////////////////////////////////////////////////////////
|
/// SWR_PS_CONTEXT
|
/// @brief Input to pixel shader.
|
/////////////////////////////////////////////////////////////////////////
|
struct SWR_PS_CONTEXT
|
{
|
PixelPositions vX; // IN: x location(s) of pixels
|
PixelPositions vY; // IN: x location(s) of pixels
|
simdscalar vZ; // INOUT: z location of pixels
|
simdscalari activeMask; // OUT: mask for kill
|
simdscalar inputMask; // IN: input coverage mask for all samples
|
simdscalari oMask; // OUT: mask for output coverage
|
|
PixelPositions vI; // barycentric coords evaluated at pixel center, sample position, centroid
|
PixelPositions vJ;
|
PixelPositions vOneOverW; // IN: 1/w
|
|
const float* pAttribs; // IN: pointer to attribute barycentric coefficients
|
const float* pPerspAttribs; // IN: pointer to attribute/w barycentric coefficients
|
const float* pRecipW; // IN: pointer to 1/w coord for each vertex
|
const float *I; // IN: Barycentric A, B, and C coefs used to compute I
|
const float *J; // IN: Barycentric A, B, and C coefs used to compute J
|
float recipDet; // IN: 1/Det, used when barycentric interpolating attributes
|
const float* pSamplePosX; // IN: array of sample positions
|
const float* pSamplePosY; // IN: array of sample positions
|
simdvector shaded[SWR_NUM_RENDERTARGETS];
|
// OUT: result color per rendertarget
|
|
uint32_t frontFace; // IN: front- 1, back- 0
|
uint32_t sampleIndex; // IN: sampleIndex
|
uint32_t renderTargetArrayIndex; // IN: render target array index from GS
|
uint32_t rasterizerSampleCount; // IN: sample count used by the rasterizer
|
|
uint8_t* pColorBuffer[SWR_NUM_RENDERTARGETS]; // IN: Pointers to render target hottiles
|
};
|
|
//////////////////////////////////////////////////////////////////////////
|
/// SWR_CS_CONTEXT
|
/// @brief Input to compute shader.
|
/////////////////////////////////////////////////////////////////////////
|
struct SWR_CS_CONTEXT
|
{
|
// The ThreadGroupId is the current thread group index relative
|
// to all thread groups in the Dispatch call. The ThreadId, ThreadIdInGroup,
|
// and ThreadIdInGroupFlattened can be derived from ThreadGroupId in the shader.
|
|
// Compute shader accepts the following system values.
|
// o ThreadId - Current thread id relative to all other threads in dispatch.
|
// o ThreadGroupId - Current thread group id relative to all other groups in dispatch.
|
// o ThreadIdInGroup - Current thread relative to all threads in the current thread group.
|
// o ThreadIdInGroupFlattened - Flattened linear id derived from ThreadIdInGroup.
|
//
|
// All of these system values can be computed in the shader. They will be
|
// derived from the current tile counter. The tile counter is an atomic counter that
|
// resides in the draw context and is initialized to the product of the dispatch dims.
|
//
|
// tileCounter = dispatchDims.x * dispatchDims.y * dispatchDims.z
|
//
|
// Each CPU worker thread will atomically decrement this counter and passes the current
|
// count into the shader. When the count reaches 0 then all thread groups in the
|
// dispatch call have been completed.
|
|
uint32_t tileCounter; // The tile counter value for this thread group.
|
|
// Dispatch dimensions used by shader to compute system values from the tile counter.
|
uint32_t dispatchDims[3];
|
|
uint8_t* pTGSM; // Thread Group Shared Memory pointer.
|
|
uint8_t* pSpillFillBuffer; // Spill/fill buffer for barrier support
|
|
uint8_t* pScratchSpace; // Pointer to scratch space buffer used by the shader, shader is responsible
|
// for subdividing scratch space per instance/simd
|
|
uint32_t scratchSpacePerSimd; // Scratch space per work item x SIMD_WIDTH
|
};
|
|
// enums
|
enum SWR_TILE_MODE
|
{
|
SWR_TILE_NONE = 0x0, // Linear mode (no tiling)
|
SWR_TILE_MODE_WMAJOR, // W major tiling
|
SWR_TILE_MODE_XMAJOR, // X major tiling
|
SWR_TILE_MODE_YMAJOR, // Y major tiling
|
SWR_TILE_SWRZ, // SWR-Z tiling
|
|
SWR_TILE_MODE_COUNT
|
};
|
|
enum SWR_SURFACE_TYPE
|
{
|
SURFACE_1D = 0,
|
SURFACE_2D = 1,
|
SURFACE_3D = 2,
|
SURFACE_CUBE = 3,
|
SURFACE_BUFFER = 4,
|
SURFACE_STRUCTURED_BUFFER = 5,
|
SURFACE_NULL = 7
|
};
|
|
enum SWR_ZFUNCTION
|
{
|
ZFUNC_ALWAYS,
|
ZFUNC_NEVER,
|
ZFUNC_LT,
|
ZFUNC_EQ,
|
ZFUNC_LE,
|
ZFUNC_GT,
|
ZFUNC_NE,
|
ZFUNC_GE,
|
NUM_ZFUNC
|
};
|
|
enum SWR_STENCILOP
|
{
|
STENCILOP_KEEP,
|
STENCILOP_ZERO,
|
STENCILOP_REPLACE,
|
STENCILOP_INCRSAT,
|
STENCILOP_DECRSAT,
|
STENCILOP_INCR,
|
STENCILOP_DECR,
|
STENCILOP_INVERT
|
};
|
|
enum SWR_BLEND_FACTOR
|
{
|
BLENDFACTOR_ONE,
|
BLENDFACTOR_SRC_COLOR,
|
BLENDFACTOR_SRC_ALPHA,
|
BLENDFACTOR_DST_ALPHA,
|
BLENDFACTOR_DST_COLOR,
|
BLENDFACTOR_SRC_ALPHA_SATURATE,
|
BLENDFACTOR_CONST_COLOR,
|
BLENDFACTOR_CONST_ALPHA,
|
BLENDFACTOR_SRC1_COLOR,
|
BLENDFACTOR_SRC1_ALPHA,
|
BLENDFACTOR_ZERO,
|
BLENDFACTOR_INV_SRC_COLOR,
|
BLENDFACTOR_INV_SRC_ALPHA,
|
BLENDFACTOR_INV_DST_ALPHA,
|
BLENDFACTOR_INV_DST_COLOR,
|
BLENDFACTOR_INV_CONST_COLOR,
|
BLENDFACTOR_INV_CONST_ALPHA,
|
BLENDFACTOR_INV_SRC1_COLOR,
|
BLENDFACTOR_INV_SRC1_ALPHA
|
};
|
|
enum SWR_BLEND_OP
|
{
|
BLENDOP_ADD,
|
BLENDOP_SUBTRACT,
|
BLENDOP_REVSUBTRACT,
|
BLENDOP_MIN,
|
BLENDOP_MAX,
|
};
|
|
enum SWR_LOGIC_OP
|
{
|
LOGICOP_CLEAR,
|
LOGICOP_NOR,
|
LOGICOP_AND_INVERTED,
|
LOGICOP_COPY_INVERTED,
|
LOGICOP_AND_REVERSE,
|
LOGICOP_INVERT,
|
LOGICOP_XOR,
|
LOGICOP_NAND,
|
LOGICOP_AND,
|
LOGICOP_EQUIV,
|
LOGICOP_NOOP,
|
LOGICOP_OR_INVERTED,
|
LOGICOP_COPY,
|
LOGICOP_OR_REVERSE,
|
LOGICOP_OR,
|
LOGICOP_SET,
|
};
|
|
//////////////////////////////////////////////////////////////////////////
|
/// SWR_AUX_MODE
|
/// @brief Specifies how the auxiliary buffer is used by the driver.
|
//////////////////////////////////////////////////////////////////////////
|
enum SWR_AUX_MODE
|
{
|
AUX_MODE_NONE,
|
AUX_MODE_COLOR,
|
AUX_MODE_UAV,
|
AUX_MODE_DEPTH,
|
};
|
|
//////////////////////////////////////////////////////////////////////////
|
/// SWR_SURFACE_STATE
|
//////////////////////////////////////////////////////////////////////////
|
struct SWR_SURFACE_STATE
|
{
|
gfxptr_t xpBaseAddress;
|
SWR_SURFACE_TYPE type; // @llvm_enum
|
SWR_FORMAT format; // @llvm_enum
|
uint32_t width;
|
uint32_t height;
|
uint32_t depth;
|
uint32_t numSamples;
|
uint32_t samplePattern;
|
uint32_t pitch;
|
uint32_t qpitch;
|
uint32_t minLod; // for sampled surfaces, the most detailed LOD that can be accessed by sampler
|
uint32_t maxLod; // for sampled surfaces, the max LOD that can be accessed
|
float resourceMinLod; // for sampled surfaces, the most detailed fractional mip that can be accessed by sampler
|
uint32_t lod; // for render targets, the lod being rendered to
|
uint32_t arrayIndex; // for render targets, the array index being rendered to for arrayed surfaces
|
SWR_TILE_MODE tileMode; // @llvm_enum
|
uint32_t halign;
|
uint32_t valign;
|
uint32_t xOffset;
|
uint32_t yOffset;
|
|
uint32_t lodOffsets[2][15]; // lod offsets for sampled surfaces
|
|
gfxptr_t xpAuxBaseAddress; // Used for compression, append/consume counter, etc.
|
SWR_AUX_MODE auxMode; // @llvm_enum
|
|
|
bool bInterleavedSamples; // are MSAA samples stored interleaved or planar
|
};
|
|
// vertex fetch state
|
// WARNING- any changes to this struct need to be reflected
|
// in the fetch shader jit
|
struct SWR_VERTEX_BUFFER_STATE
|
{
|
uint32_t index;
|
uint32_t pitch;
|
const uint8_t *pData;
|
uint32_t size;
|
uint32_t numaNode;
|
uint32_t minVertex; // min vertex (for bounds checking)
|
uint32_t maxVertex; // size / pitch. precalculated value used by fetch shader for OOB checks
|
uint32_t partialInboundsSize; // size % pitch. precalculated value used by fetch shader for partially OOB vertices
|
};
|
|
struct SWR_INDEX_BUFFER_STATE
|
{
|
// Format type for indices (e.g. UINT16, UINT32, etc.)
|
SWR_FORMAT format; // @llvm_enum
|
const void *pIndices;
|
uint32_t size;
|
};
|
|
|
//////////////////////////////////////////////////////////////////////////
|
/// SWR_FETCH_CONTEXT
|
/// @brief Input to fetch shader.
|
/// @note WARNING - Changes to this struct need to be reflected in the
|
/// fetch shader jit.
|
/////////////////////////////////////////////////////////////////////////
|
struct SWR_FETCH_CONTEXT
|
{
|
const SWR_VERTEX_BUFFER_STATE* pStreams; // IN: array of bound vertex buffers
|
const int32_t* pIndices; // IN: pointer to index buffer for indexed draws
|
const int32_t* pLastIndex; // IN: pointer to end of index buffer, used for bounds checking
|
uint32_t CurInstance; // IN: current instance
|
uint32_t BaseVertex; // IN: base vertex
|
uint32_t StartVertex; // IN: start vertex
|
uint32_t StartInstance; // IN: start instance
|
simdscalari VertexID; // OUT: vector of vertex IDs
|
simdscalari CutMask; // OUT: vector mask of indices which have the cut index value
|
#if USE_SIMD16_SHADERS
|
// simd16scalari VertexID; // OUT: vector of vertex IDs
|
// simd16scalari CutMask; // OUT: vector mask of indices which have the cut index value
|
simdscalari VertexID2; // OUT: vector of vertex IDs
|
simdscalari CutMask2; // OUT: vector mask of indices which have the cut index value
|
#endif
|
};
|
|
//////////////////////////////////////////////////////////////////////////
|
/// SWR_STATS
|
///
|
/// @brief All statistics generated by SWR go here. These are public
|
/// to driver.
|
/////////////////////////////////////////////////////////////////////////
|
OSALIGNLINE(struct) SWR_STATS
|
{
|
// Occlusion Query
|
uint64_t DepthPassCount; // Number of passing depth tests. Not exact.
|
|
// Pipeline Stats
|
uint64_t PsInvocations; // Number of Pixel Shader invocations
|
uint64_t CsInvocations; // Number of Compute Shader invocations
|
|
};
|
|
//////////////////////////////////////////////////////////////////////////
|
/// SWR_STATS
|
///
|
/// @brief All statistics generated by FE.
|
/////////////////////////////////////////////////////////////////////////
|
OSALIGNLINE(struct) SWR_STATS_FE
|
{
|
uint64_t IaVertices; // Number of Fetch Shader vertices
|
uint64_t IaPrimitives; // Number of PA primitives.
|
uint64_t VsInvocations; // Number of Vertex Shader invocations
|
uint64_t HsInvocations; // Number of Hull Shader invocations
|
uint64_t DsInvocations; // Number of Domain Shader invocations
|
uint64_t GsInvocations; // Number of Geometry Shader invocations
|
uint64_t GsPrimitives; // Number of prims GS outputs.
|
uint64_t CInvocations; // Number of clipper invocations
|
uint64_t CPrimitives; // Number of clipper primitives.
|
|
// Streamout Stats
|
uint64_t SoPrimStorageNeeded[4];
|
uint64_t SoNumPrimsWritten[4];
|
};
|
|
//////////////////////////////////////////////////////////////////////////
|
/// STREAMOUT_BUFFERS
|
/////////////////////////////////////////////////////////////////////////
|
|
#define MAX_SO_STREAMS 4
|
#define MAX_SO_BUFFERS 4
|
#define MAX_ATTRIBUTES 32
|
|
struct SWR_STREAMOUT_BUFFER
|
{
|
bool enable;
|
bool soWriteEnable;
|
|
// Pointers to streamout buffers.
|
uint32_t* pBuffer;
|
|
// Size of buffer in dwords.
|
uint32_t bufferSize;
|
|
// Vertex pitch of buffer in dwords.
|
uint32_t pitch;
|
|
// Offset into buffer in dwords. SOS will increment this offset.
|
uint32_t streamOffset;
|
|
// Offset to the SO write offset. If not null then we update offset here.
|
uint32_t* pWriteOffset;
|
|
};
|
|
//////////////////////////////////////////////////////////////////////////
|
/// STREAMOUT_STATE
|
/////////////////////////////////////////////////////////////////////////
|
struct SWR_STREAMOUT_STATE
|
{
|
// This disables stream output.
|
bool soEnable;
|
|
// which streams are enabled for streamout
|
bool streamEnable[MAX_SO_STREAMS];
|
|
// If set then do not send any streams to the rasterizer.
|
bool rasterizerDisable;
|
|
// Specifies which stream to send to the rasterizer.
|
uint32_t streamToRasterizer;
|
|
// The stream masks specify which attributes are sent to which streams.
|
// These masks help the FE to setup the pPrimData buffer that is passed
|
// the Stream Output Shader (SOS) function.
|
uint32_t streamMasks[MAX_SO_STREAMS];
|
|
// Number of attributes, including position, per vertex that are streamed out.
|
// This should match number of bits in stream mask.
|
uint32_t streamNumEntries[MAX_SO_STREAMS];
|
|
// Offset to the start of the attributes of the input vertices, in simdvector units
|
uint32_t vertexAttribOffset[MAX_SO_STREAMS];
|
};
|
|
//////////////////////////////////////////////////////////////////////////
|
/// STREAMOUT_CONTEXT - Passed to SOS
|
/////////////////////////////////////////////////////////////////////////
|
struct SWR_STREAMOUT_CONTEXT
|
{
|
uint32_t* pPrimData;
|
SWR_STREAMOUT_BUFFER* pBuffer[MAX_SO_STREAMS];
|
|
// Num prims written for this stream
|
uint32_t numPrimsWritten;
|
|
// Num prims that should have been written if there were no overflow.
|
uint32_t numPrimStorageNeeded;
|
};
|
|
//////////////////////////////////////////////////////////////////////////
|
/// SWR_GS_STATE - Geometry shader state
|
/////////////////////////////////////////////////////////////////////////
|
struct SWR_GS_STATE
|
{
|
bool gsEnable;
|
|
// Number of input attributes per vertex. Used by the frontend to
|
// optimize assembling primitives for GS
|
uint32_t numInputAttribs;
|
|
// Stride of incoming verts in attributes
|
uint32_t inputVertStride;
|
|
// Output topology - can be point, tristrip, or linestrip
|
PRIMITIVE_TOPOLOGY outputTopology; // @llvm_enum
|
|
// Maximum number of verts that can be emitted by a single instance of the GS
|
uint32_t maxNumVerts;
|
|
// Instance count
|
uint32_t instanceCount;
|
|
// If true, geometry shader emits a single stream, with separate cut buffer.
|
// If false, geometry shader emits vertices for multiple streams to the stream buffer, with a separate StreamID buffer
|
// to map vertices to streams
|
bool isSingleStream;
|
|
// When single stream is enabled, singleStreamID dictates which stream is being output.
|
// field ignored if isSingleStream is false
|
uint32_t singleStreamID;
|
|
// Total amount of memory to allocate for one instance of the shader output in bytes
|
uint32_t allocationSize;
|
|
// Offset to the start of the attributes of the input vertices, in simdvector units, as read by the GS
|
uint32_t vertexAttribOffset;
|
|
// Offset to the attributes as stored by the preceding shader stage.
|
uint32_t srcVertexAttribOffset;
|
|
// Size of the control data section which contains cut or streamID data, in simdscalar units. Should be sized to handle
|
// the maximum number of verts output by the GS. Can be 0 if there are no cuts or streamID bits.
|
uint32_t controlDataSize;
|
|
// Offset to the control data section, in bytes
|
uint32_t controlDataOffset;
|
|
// Total size of an output vertex, in simdvector units
|
uint32_t outputVertexSize;
|
|
// Offset to the start of the vertex section, in bytes
|
uint32_t outputVertexOffset;
|
|
// Set this to non-zero to indicate that the shader outputs a static number of verts. If zero, shader is
|
// expected to store the final vertex count in the first dword of the gs output stream.
|
uint32_t staticVertexCount;
|
};
|
|
|
//////////////////////////////////////////////////////////////////////////
|
/// SWR_TS_OUTPUT_TOPOLOGY - Defines data output by the tessellator / DS
|
/////////////////////////////////////////////////////////////////////////
|
enum SWR_TS_OUTPUT_TOPOLOGY
|
{
|
SWR_TS_OUTPUT_POINT,
|
SWR_TS_OUTPUT_LINE,
|
SWR_TS_OUTPUT_TRI_CW,
|
SWR_TS_OUTPUT_TRI_CCW,
|
|
SWR_TS_OUTPUT_TOPOLOGY_COUNT
|
};
|
|
//////////////////////////////////////////////////////////////////////////
|
/// SWR_TS_PARTITIONING - Defines tessellation algorithm
|
/////////////////////////////////////////////////////////////////////////
|
enum SWR_TS_PARTITIONING
|
{
|
SWR_TS_INTEGER,
|
SWR_TS_ODD_FRACTIONAL,
|
SWR_TS_EVEN_FRACTIONAL,
|
|
SWR_TS_PARTITIONING_COUNT
|
};
|
|
//////////////////////////////////////////////////////////////////////////
|
/// SWR_TS_DOMAIN - Defines Tessellation Domain
|
/////////////////////////////////////////////////////////////////////////
|
enum SWR_TS_DOMAIN
|
{
|
SWR_TS_QUAD,
|
SWR_TS_TRI,
|
SWR_TS_ISOLINE,
|
|
SWR_TS_DOMAIN_COUNT
|
};
|
|
//////////////////////////////////////////////////////////////////////////
|
/// SWR_TS_STATE - Tessellation state
|
/////////////////////////////////////////////////////////////////////////
|
struct SWR_TS_STATE
|
{
|
bool tsEnable;
|
SWR_TS_OUTPUT_TOPOLOGY tsOutputTopology; // @llvm_enum
|
SWR_TS_PARTITIONING partitioning; // @llvm_enum
|
SWR_TS_DOMAIN domain; // @llvm_enum
|
|
PRIMITIVE_TOPOLOGY postDSTopology; // @llvm_enum
|
|
uint32_t numHsInputAttribs;
|
uint32_t numHsOutputAttribs;
|
uint32_t numDsOutputAttribs;
|
uint32_t dsAllocationSize;
|
uint32_t dsOutVtxAttribOffset;
|
|
// Offset to the start of the attributes of the input vertices, in simdvector units
|
uint32_t vertexAttribOffset;
|
};
|
|
// output merger state
|
struct SWR_RENDER_TARGET_BLEND_STATE
|
{
|
uint8_t writeDisableRed : 1;
|
uint8_t writeDisableGreen : 1;
|
uint8_t writeDisableBlue : 1;
|
uint8_t writeDisableAlpha : 1;
|
};
|
static_assert(sizeof(SWR_RENDER_TARGET_BLEND_STATE) == 1, "Invalid SWR_RENDER_TARGET_BLEND_STATE size");
|
|
enum SWR_MULTISAMPLE_COUNT
|
{
|
SWR_MULTISAMPLE_1X = 0,
|
SWR_MULTISAMPLE_2X,
|
SWR_MULTISAMPLE_4X,
|
SWR_MULTISAMPLE_8X,
|
SWR_MULTISAMPLE_16X,
|
SWR_MULTISAMPLE_TYPE_COUNT
|
};
|
|
INLINE uint32_t GetNumSamples(SWR_MULTISAMPLE_COUNT sampleCount) // @llvm_func_start
|
{
|
static const uint32_t sampleCountLUT[SWR_MULTISAMPLE_TYPE_COUNT] {1, 2, 4, 8, 16};
|
assert(sampleCount < SWR_MULTISAMPLE_TYPE_COUNT);
|
return sampleCountLUT[sampleCount];
|
} // @llvm_func_end
|
|
struct SWR_BLEND_STATE
|
{
|
// constant blend factor color in RGBA float
|
float constantColor[4];
|
|
// alpha test reference value in unorm8 or float32
|
uint32_t alphaTestReference;
|
uint32_t sampleMask;
|
// all RT's have the same sample count
|
///@todo move this to Output Merger state when we refactor
|
SWR_MULTISAMPLE_COUNT sampleCount; // @llvm_enum
|
|
SWR_RENDER_TARGET_BLEND_STATE renderTarget[SWR_NUM_RENDERTARGETS];
|
};
|
static_assert(sizeof(SWR_BLEND_STATE) == 36, "Invalid SWR_BLEND_STATE size");
|
|
//////////////////////////////////////////////////////////////////////////
|
/// FUNCTION POINTERS FOR SHADERS
|
|
#if USE_SIMD16_SHADERS
|
typedef void(__cdecl *PFN_FETCH_FUNC)(HANDLE hPrivateData, SWR_FETCH_CONTEXT& fetchInfo, simd16vertex& out);
|
#else
|
typedef void(__cdecl *PFN_FETCH_FUNC)(HANDLE hPrivateData, SWR_FETCH_CONTEXT& fetchInfo, simdvertex& out);
|
#endif
|
typedef void(__cdecl *PFN_VERTEX_FUNC)(HANDLE hPrivateData, SWR_VS_CONTEXT* pVsContext);
|
typedef void(__cdecl *PFN_HS_FUNC)(HANDLE hPrivateData, SWR_HS_CONTEXT* pHsContext);
|
typedef void(__cdecl *PFN_DS_FUNC)(HANDLE hPrivateData, SWR_DS_CONTEXT* pDsContext);
|
typedef void(__cdecl *PFN_GS_FUNC)(HANDLE hPrivateData, SWR_GS_CONTEXT* pGsContext);
|
typedef void(__cdecl *PFN_CS_FUNC)(HANDLE hPrivateData, SWR_CS_CONTEXT* pCsContext);
|
typedef void(__cdecl *PFN_SO_FUNC)(SWR_STREAMOUT_CONTEXT& soContext);
|
typedef void(__cdecl *PFN_PIXEL_KERNEL)(HANDLE hPrivateData, SWR_PS_CONTEXT *pContext);
|
typedef void(__cdecl *PFN_CPIXEL_KERNEL)(HANDLE hPrivateData, SWR_PS_CONTEXT *pContext);
|
typedef void(__cdecl *PFN_BLEND_JIT_FUNC)(const SWR_BLEND_STATE*,
|
simdvector& vSrc, simdvector& vSrc1, simdscalar& vSrc0Alpha, uint32_t sample,
|
uint8_t* pDst, simdvector& vResult, simdscalari* vOMask, simdscalari* vCoverageMask);
|
typedef simdscalar(*PFN_QUANTIZE_DEPTH)(simdscalar const &);
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
/// FRONTEND_STATE
|
/////////////////////////////////////////////////////////////////////////
|
struct SWR_FRONTEND_STATE
|
{
|
// skip clip test, perspective divide, and viewport transform
|
// intended for verts in screen space
|
bool vpTransformDisable;
|
bool bEnableCutIndex;
|
union
|
{
|
struct
|
{
|
uint32_t triFan : 2;
|
uint32_t lineStripList : 1;
|
uint32_t triStripList : 2;
|
};
|
uint32_t bits;
|
} provokingVertex;
|
uint32_t topologyProvokingVertex; // provoking vertex for the draw topology
|
|
// Size of a vertex in simdvector units. Should be sized to the
|
// maximum of the input/output of the vertex shader.
|
uint32_t vsVertexSize;
|
};
|
|
//////////////////////////////////////////////////////////////////////////
|
/// VIEWPORT_MATRIX
|
/////////////////////////////////////////////////////////////////////////
|
struct SWR_VIEWPORT_MATRIX
|
{
|
float m00;
|
float m11;
|
float m22;
|
float m30;
|
float m31;
|
float m32;
|
};
|
|
//////////////////////////////////////////////////////////////////////////
|
/// VIEWPORT_MATRIXES
|
/////////////////////////////////////////////////////////////////////////
|
struct SWR_VIEWPORT_MATRICES
|
{
|
float m00[KNOB_NUM_VIEWPORTS_SCISSORS];
|
float m11[KNOB_NUM_VIEWPORTS_SCISSORS];
|
float m22[KNOB_NUM_VIEWPORTS_SCISSORS];
|
float m30[KNOB_NUM_VIEWPORTS_SCISSORS];
|
float m31[KNOB_NUM_VIEWPORTS_SCISSORS];
|
float m32[KNOB_NUM_VIEWPORTS_SCISSORS];
|
};
|
|
//////////////////////////////////////////////////////////////////////////
|
/// SWR_VIEWPORT
|
/////////////////////////////////////////////////////////////////////////
|
struct SWR_VIEWPORT
|
{
|
float x;
|
float y;
|
float width;
|
float height;
|
float minZ;
|
float maxZ;
|
};
|
|
//////////////////////////////////////////////////////////////////////////
|
/// SWR_CULLMODE
|
//////////////////////////////////////////////////////////////////////////
|
enum SWR_CULLMODE
|
{
|
SWR_CULLMODE_BOTH,
|
SWR_CULLMODE_NONE,
|
SWR_CULLMODE_FRONT,
|
SWR_CULLMODE_BACK
|
};
|
|
enum SWR_FILLMODE
|
{
|
SWR_FILLMODE_POINT,
|
SWR_FILLMODE_WIREFRAME,
|
SWR_FILLMODE_SOLID
|
};
|
|
enum SWR_FRONTWINDING
|
{
|
SWR_FRONTWINDING_CW,
|
SWR_FRONTWINDING_CCW
|
};
|
|
|
enum SWR_PIXEL_LOCATION
|
{
|
SWR_PIXEL_LOCATION_CENTER,
|
SWR_PIXEL_LOCATION_UL,
|
};
|
|
// fixed point screen space sample locations within a pixel
|
struct SWR_MULTISAMPLE_POS
|
{
|
public:
|
INLINE void SetXi(uint32_t sampleNum, uint32_t val) { _xi[sampleNum] = val; }; // @llvm_func
|
INLINE void SetYi(uint32_t sampleNum, uint32_t val) { _yi[sampleNum] = val; }; // @llvm_func
|
INLINE uint32_t Xi(uint32_t sampleNum) const { return _xi[sampleNum]; }; // @llvm_func
|
INLINE uint32_t Yi(uint32_t sampleNum) const { return _yi[sampleNum]; }; // @llvm_func
|
INLINE void SetX(uint32_t sampleNum, float val) { _x[sampleNum] = val; }; // @llvm_func
|
INLINE void SetY(uint32_t sampleNum, float val) { _y[sampleNum] = val; }; // @llvm_func
|
INLINE float X(uint32_t sampleNum) const { return _x[sampleNum]; }; // @llvm_func
|
INLINE float Y(uint32_t sampleNum) const { return _y[sampleNum]; }; // @llvm_func
|
typedef const float(&sampleArrayT)[SWR_MAX_NUM_MULTISAMPLES]; //@llvm_typedef
|
INLINE sampleArrayT X() const { return _x; }; // @llvm_func
|
INLINE sampleArrayT Y() const { return _y; }; // @llvm_func
|
INLINE const __m128i& vXi(uint32_t sampleNum) const { return _vXi[sampleNum]; }; // @llvm_func
|
INLINE const __m128i& vYi(uint32_t sampleNum) const { return _vYi[sampleNum]; }; // @llvm_func
|
INLINE const simdscalar& vX(uint32_t sampleNum) const { return _vX[sampleNum]; }; // @llvm_func
|
INLINE const simdscalar& vY(uint32_t sampleNum) const { return _vY[sampleNum]; }; // @llvm_func
|
INLINE const __m128i& TileSampleOffsetsX() const { return tileSampleOffsetsX; }; // @llvm_func
|
INLINE const __m128i& TileSampleOffsetsY() const { return tileSampleOffsetsY; }; // @llvm_func
|
|
INLINE void PrecalcSampleData(int numSamples); //@llvm_func
|
|
private:
|
template <typename MaskT>
|
INLINE __m128i expandThenBlend4(uint32_t* min, uint32_t* max); // @llvm_func
|
INLINE void CalcTileSampleOffsets(int numSamples); // @llvm_func
|
|
// scalar sample values
|
uint32_t _xi[SWR_MAX_NUM_MULTISAMPLES];
|
uint32_t _yi[SWR_MAX_NUM_MULTISAMPLES];
|
float _x[SWR_MAX_NUM_MULTISAMPLES];
|
float _y[SWR_MAX_NUM_MULTISAMPLES];
|
|
// precalc'd / vectorized samples
|
__m128i _vXi[SWR_MAX_NUM_MULTISAMPLES];
|
__m128i _vYi[SWR_MAX_NUM_MULTISAMPLES];
|
simdscalar _vX[SWR_MAX_NUM_MULTISAMPLES];
|
simdscalar _vY[SWR_MAX_NUM_MULTISAMPLES];
|
__m128i tileSampleOffsetsX;
|
__m128i tileSampleOffsetsY;
|
};
|
|
//////////////////////////////////////////////////////////////////////////
|
/// SWR_RASTSTATE
|
//////////////////////////////////////////////////////////////////////////
|
struct SWR_RASTSTATE
|
{
|
uint32_t cullMode : 2;
|
uint32_t fillMode : 2;
|
uint32_t frontWinding : 1;
|
uint32_t scissorEnable : 1;
|
uint32_t depthClipEnable : 1;
|
uint32_t clipHalfZ : 1;
|
uint32_t pointParam : 1;
|
uint32_t pointSpriteEnable : 1;
|
uint32_t pointSpriteTopOrigin : 1;
|
uint32_t forcedSampleCount : 1;
|
uint32_t pixelOffset : 1;
|
uint32_t depthBiasPreAdjusted : 1; ///< depth bias constant is in float units, not per-format Z units
|
uint32_t conservativeRast : 1;
|
|
float pointSize;
|
float lineWidth;
|
|
float depthBias;
|
float slopeScaledDepthBias;
|
float depthBiasClamp;
|
SWR_FORMAT depthFormat; // @llvm_enum
|
|
// sample count the rasterizer is running at
|
SWR_MULTISAMPLE_COUNT sampleCount; // @llvm_enum
|
uint32_t pixelLocation; // UL or Center
|
SWR_MULTISAMPLE_POS samplePositions; // @llvm_struct
|
bool bIsCenterPattern; // @llvm_enum
|
};
|
|
|
enum SWR_CONSTANT_SOURCE
|
{
|
SWR_CONSTANT_SOURCE_CONST_0000,
|
SWR_CONSTANT_SOURCE_CONST_0001_FLOAT,
|
SWR_CONSTANT_SOURCE_CONST_1111_FLOAT,
|
SWR_CONSTANT_SOURCE_PRIM_ID
|
};
|
|
struct SWR_ATTRIB_SWIZZLE
|
{
|
uint16_t sourceAttrib : 5; // source attribute
|
uint16_t constantSource : 2; // constant source to apply
|
uint16_t componentOverrideMask : 4; // override component with constant source
|
};
|
|
// backend state
|
struct SWR_BACKEND_STATE
|
{
|
uint32_t constantInterpolationMask; // bitmask indicating which attributes have constant interpolation
|
uint32_t pointSpriteTexCoordMask; // bitmask indicating the attribute(s) which should be interpreted as tex coordinates
|
|
uint8_t numAttributes; // total number of attributes to send to backend (up to 32)
|
uint8_t numComponents[32]; // number of components to setup per attribute, this reduces some calculations for unneeded components
|
|
bool swizzleEnable; // when enabled, core will parse the swizzle map when
|
// setting up attributes for the backend, otherwise
|
// all attributes up to numAttributes will be sent
|
SWR_ATTRIB_SWIZZLE swizzleMap[32];
|
|
bool readRenderTargetArrayIndex; // Forward render target array index from last FE stage to the backend
|
bool readViewportArrayIndex; // Read viewport array index from last FE stage during binning
|
|
// Offset to the start of the attributes of the input vertices, in simdvector units
|
uint32_t vertexAttribOffset;
|
|
// User clip/cull distance enables
|
uint8_t cullDistanceMask;
|
uint8_t clipDistanceMask;
|
|
// Offset to clip/cull attrib section of the vertex, in simdvector units
|
uint32_t vertexClipCullOffset;
|
};
|
|
|
union SWR_DEPTH_STENCIL_STATE
|
{
|
struct
|
{
|
// dword 0
|
uint32_t depthWriteEnable : 1;
|
uint32_t depthTestEnable : 1;
|
uint32_t stencilWriteEnable : 1;
|
uint32_t stencilTestEnable : 1;
|
uint32_t doubleSidedStencilTestEnable : 1;
|
|
uint32_t depthTestFunc : 3;
|
uint32_t stencilTestFunc : 3;
|
|
uint32_t backfaceStencilPassDepthPassOp : 3;
|
uint32_t backfaceStencilPassDepthFailOp : 3;
|
uint32_t backfaceStencilFailOp : 3;
|
uint32_t backfaceStencilTestFunc : 3;
|
uint32_t stencilPassDepthPassOp : 3;
|
uint32_t stencilPassDepthFailOp : 3;
|
uint32_t stencilFailOp : 3;
|
|
// dword 1
|
uint8_t backfaceStencilWriteMask;
|
uint8_t backfaceStencilTestMask;
|
uint8_t stencilWriteMask;
|
uint8_t stencilTestMask;
|
|
// dword 2
|
uint8_t backfaceStencilRefValue;
|
uint8_t stencilRefValue;
|
};
|
uint32_t value[3];
|
};
|
|
enum SWR_SHADING_RATE
|
{
|
SWR_SHADING_RATE_PIXEL,
|
SWR_SHADING_RATE_SAMPLE,
|
SWR_SHADING_RATE_COUNT,
|
};
|
|
enum SWR_INPUT_COVERAGE
|
{
|
SWR_INPUT_COVERAGE_NONE,
|
SWR_INPUT_COVERAGE_NORMAL,
|
SWR_INPUT_COVERAGE_INNER_CONSERVATIVE,
|
SWR_INPUT_COVERAGE_COUNT,
|
};
|
|
enum SWR_PS_POSITION_OFFSET
|
{
|
SWR_PS_POSITION_SAMPLE_NONE,
|
SWR_PS_POSITION_SAMPLE_OFFSET,
|
SWR_PS_POSITION_CENTROID_OFFSET,
|
SWR_PS_POSITION_OFFSET_COUNT,
|
};
|
|
enum SWR_BARYCENTRICS_MASK
|
{
|
SWR_BARYCENTRIC_PER_PIXEL_MASK = 0x1,
|
SWR_BARYCENTRIC_CENTROID_MASK = 0x2,
|
SWR_BARYCENTRIC_PER_SAMPLE_MASK = 0x4,
|
};
|
|
// pixel shader state
|
struct SWR_PS_STATE
|
{
|
// dword 0-1
|
PFN_PIXEL_KERNEL pfnPixelShader; // @llvm_pfn
|
|
// dword 2
|
uint32_t killsPixel : 1; // pixel shader can kill pixels
|
uint32_t inputCoverage : 2; // ps uses input coverage
|
uint32_t writesODepth : 1; // pixel shader writes to depth
|
uint32_t usesSourceDepth : 1; // pixel shader reads depth
|
uint32_t shadingRate : 2; // shading per pixel / sample / coarse pixel
|
uint32_t posOffset : 2; // type of offset (none, sample, centroid) to add to pixel position
|
uint32_t barycentricsMask : 3; // which type(s) of barycentric coords does the PS interpolate attributes with
|
uint32_t usesUAV : 1; // pixel shader accesses UAV
|
uint32_t forceEarlyZ : 1; // force execution of early depth/stencil test
|
|
uint8_t renderTargetMask; // Mask of render targets written
|
};
|
|
// depth bounds state
|
struct SWR_DEPTH_BOUNDS_STATE
|
{
|
bool depthBoundsTestEnable;
|
float depthBoundsTestMinValue;
|
float depthBoundsTestMaxValue;
|
};
|