/****************************************************************************
|
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
*
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
* copy of this software and associated documentation files (the "Software"),
|
* to deal in the Software without restriction, including without limitation
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
* and/or sell copies of the Software, and to permit persons to whom the
|
* Software is furnished to do so, subject to the following conditions:
|
*
|
* The above copyright notice and this permission notice (including the next
|
* paragraph) shall be included in all copies or substantial portions of the
|
* Software.
|
*
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
* IN THE SOFTWARE.
|
*
|
* @file tilemgr.h
|
*
|
* @brief Definitions for Macro Tile Manager which provides the facilities
|
* for threads to work on an macro tile.
|
*
|
******************************************************************************/
|
#pragma once
|
|
#include <set>
|
#include <unordered_map>
|
#include "common/formats.h"
|
#include "fifo.hpp"
|
#include "context.h"
|
#include "format_traits.h"
|
|
//////////////////////////////////////////////////////////////////////////
|
/// MacroTile - work queue for a tile.
|
//////////////////////////////////////////////////////////////////////////
|
struct MacroTileQueue
|
{
|
MacroTileQueue() { }
|
~MacroTileQueue() { }
|
|
//////////////////////////////////////////////////////////////////////////
|
/// @brief Returns number of work items queued for this tile.
|
uint32_t getNumQueued()
|
{
|
return mFifo.getNumQueued();
|
}
|
|
//////////////////////////////////////////////////////////////////////////
|
/// @brief Attempt to lock the work fifo. If already locked then return false.
|
bool tryLock()
|
{
|
return mFifo.tryLock();
|
}
|
|
//////////////////////////////////////////////////////////////////////////
|
/// @brief Clear fifo and unlock it.
|
template <typename ArenaT>
|
void clear(ArenaT& arena)
|
{
|
mFifo.clear(arena);
|
}
|
|
//////////////////////////////////////////////////////////////////////////
|
/// @brief Peek at work sitting at the front of the fifo.
|
BE_WORK* peek()
|
{
|
return mFifo.peek();
|
}
|
|
template <typename ArenaT>
|
bool enqueue_try_nosync(ArenaT& arena, const BE_WORK* entry)
|
{
|
return mFifo.enqueue_try_nosync(arena, entry);
|
}
|
|
//////////////////////////////////////////////////////////////////////////
|
/// @brief Move to next work item
|
void dequeue()
|
{
|
mFifo.dequeue_noinc();
|
}
|
|
//////////////////////////////////////////////////////////////////////////
|
/// @brief Destroy fifo
|
void destroy()
|
{
|
mFifo.destroy();
|
}
|
|
///@todo This will all be private.
|
uint32_t mWorkItemsFE = 0;
|
uint32_t mWorkItemsBE = 0;
|
uint32_t mId = 0;
|
|
private:
|
QUEUE<BE_WORK> mFifo;
|
};
|
|
//////////////////////////////////////////////////////////////////////////
|
/// MacroTileMgr - Manages macrotiles for a draw.
|
//////////////////////////////////////////////////////////////////////////
|
class MacroTileMgr
|
{
|
public:
|
MacroTileMgr(CachingArena& arena);
|
~MacroTileMgr()
|
{
|
for (auto &tile : mTiles)
|
{
|
tile.second.destroy();
|
}
|
}
|
|
INLINE void initialize()
|
{
|
mWorkItemsProduced = 0;
|
mWorkItemsConsumed = 0;
|
|
mDirtyTiles.clear();
|
}
|
|
INLINE std::vector<MacroTileQueue*>& getDirtyTiles() { return mDirtyTiles; }
|
void markTileComplete(uint32_t id);
|
|
INLINE bool isWorkComplete()
|
{
|
return mWorkItemsProduced == mWorkItemsConsumed;
|
}
|
|
void enqueue(uint32_t x, uint32_t y, BE_WORK *pWork);
|
|
static INLINE void getTileIndices(uint32_t tileID, uint32_t &x, uint32_t &y)
|
{
|
y = tileID & 0xffff;
|
x = (tileID >> 16) & 0xffff;
|
}
|
|
private:
|
CachingArena& mArena;
|
std::unordered_map<uint32_t, MacroTileQueue> mTiles;
|
|
// Any tile that has work queued to it is a dirty tile.
|
std::vector<MacroTileQueue*> mDirtyTiles;
|
|
OSALIGNLINE(long) mWorkItemsProduced { 0 };
|
OSALIGNLINE(volatile long) mWorkItemsConsumed { 0 };
|
};
|
|
typedef void(*PFN_DISPATCH)(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroupId, void*& pSpillFillBuffer, void*& pScratchSpace);
|
|
//////////////////////////////////////////////////////////////////////////
|
/// DispatchQueue - work queue for dispatch
|
//////////////////////////////////////////////////////////////////////////
|
class DispatchQueue
|
{
|
public:
|
DispatchQueue() {}
|
|
//////////////////////////////////////////////////////////////////////////
|
/// @brief Setup the producer consumer counts.
|
void initialize(uint32_t totalTasks, void* pTaskData, PFN_DISPATCH pfnDispatch)
|
{
|
// The available and outstanding counts start with total tasks.
|
// At the start there are N tasks available and outstanding.
|
// When both the available and outstanding counts have reached 0 then all work has completed.
|
// When a worker starts on a threadgroup then it decrements the available count.
|
// When a worker completes a threadgroup then it decrements the outstanding count.
|
|
mTasksAvailable = totalTasks;
|
mTasksOutstanding = totalTasks;
|
|
mpTaskData = pTaskData;
|
mPfnDispatch = pfnDispatch;
|
}
|
|
//////////////////////////////////////////////////////////////////////////
|
/// @brief Returns number of tasks available for this dispatch.
|
uint32_t getNumQueued()
|
{
|
return (mTasksAvailable > 0) ? mTasksAvailable : 0;
|
}
|
|
//////////////////////////////////////////////////////////////////////////
|
/// @brief Atomically decrement the work available count. If the result
|
// is greater than 0 then we can on the associated thread group.
|
// Otherwise, there is no more work to do.
|
bool getWork(uint32_t& groupId)
|
{
|
long result = InterlockedDecrement(&mTasksAvailable);
|
|
if (result >= 0)
|
{
|
groupId = result;
|
return true;
|
}
|
|
return false;
|
}
|
|
//////////////////////////////////////////////////////////////////////////
|
/// @brief Atomically decrement the outstanding count. A worker is notifying
|
/// us that he just finished some work. Also, return true if we're
|
/// the last worker to complete this dispatch.
|
bool finishedWork()
|
{
|
long result = InterlockedDecrement(&mTasksOutstanding);
|
SWR_ASSERT(result >= 0, "Should never oversubscribe work");
|
|
return (result == 0) ? true : false;
|
}
|
|
//////////////////////////////////////////////////////////////////////////
|
/// @brief Work is complete once both the available/outstanding counts have reached 0.
|
bool isWorkComplete()
|
{
|
return ((mTasksAvailable <= 0) &&
|
(mTasksOutstanding <= 0));
|
}
|
|
//////////////////////////////////////////////////////////////////////////
|
/// @brief Return pointer to task data.
|
const void* GetTasksData()
|
{
|
return mpTaskData;
|
}
|
|
//////////////////////////////////////////////////////////////////////////
|
/// @brief Dispatches a unit of work
|
void dispatch(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroupId, void*& pSpillFillBuffer, void*& pScratchSpace)
|
{
|
SWR_ASSERT(mPfnDispatch != nullptr);
|
mPfnDispatch(pDC, workerId, threadGroupId, pSpillFillBuffer, pScratchSpace);
|
}
|
|
void* mpTaskData{ nullptr }; // The API thread will set this up and the callback task function will interpet this.
|
PFN_DISPATCH mPfnDispatch{ nullptr }; // Function to call per dispatch
|
|
OSALIGNLINE(volatile long) mTasksAvailable{ 0 };
|
OSALIGNLINE(volatile long) mTasksOutstanding{ 0 };
|
};
|
|
|
enum HOTTILE_STATE
|
{
|
HOTTILE_INVALID, // tile is in unitialized state and should be loaded with surface contents before rendering
|
HOTTILE_CLEAR, // tile should be cleared
|
HOTTILE_DIRTY, // tile has been rendered to
|
HOTTILE_RESOLVED, // tile has been stored to memory
|
};
|
|
struct HOTTILE
|
{
|
uint8_t *pBuffer;
|
HOTTILE_STATE state;
|
DWORD clearData[4]; // May need to change based on pfnClearTile implementation. Reorder for alignment?
|
uint32_t numSamples;
|
uint32_t renderTargetArrayIndex; // current render target array index loaded
|
};
|
|
union HotTileSet
|
{
|
struct
|
{
|
HOTTILE Color[SWR_NUM_RENDERTARGETS];
|
HOTTILE Depth;
|
HOTTILE Stencil;
|
};
|
HOTTILE Attachment[SWR_NUM_ATTACHMENTS];
|
};
|
|
class HotTileMgr
|
{
|
public:
|
HotTileMgr()
|
{
|
memset(mHotTiles, 0, sizeof(mHotTiles));
|
|
// cache hottile size
|
for (uint32_t i = SWR_ATTACHMENT_COLOR0; i <= SWR_ATTACHMENT_COLOR7; ++i)
|
{
|
mHotTileSize[i] = KNOB_MACROTILE_X_DIM * KNOB_MACROTILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8;
|
}
|
mHotTileSize[SWR_ATTACHMENT_DEPTH] = KNOB_MACROTILE_X_DIM * KNOB_MACROTILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8;
|
mHotTileSize[SWR_ATTACHMENT_STENCIL] = KNOB_MACROTILE_X_DIM * KNOB_MACROTILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8;
|
}
|
|
~HotTileMgr()
|
{
|
for (int x = 0; x < KNOB_NUM_HOT_TILES_X; ++x)
|
{
|
for (int y = 0; y < KNOB_NUM_HOT_TILES_Y; ++y)
|
{
|
for (int a = 0; a < SWR_NUM_ATTACHMENTS; ++a)
|
{
|
FreeHotTileMem(mHotTiles[x][y].Attachment[a].pBuffer);
|
}
|
}
|
}
|
}
|
|
void InitializeHotTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroID);
|
|
HOTTILE *GetHotTile(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t macroID, SWR_RENDERTARGET_ATTACHMENT attachment, bool create, uint32_t numSamples = 1,
|
uint32_t renderTargetArrayIndex = 0);
|
|
HOTTILE *GetHotTileNoLoad(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t macroID, SWR_RENDERTARGET_ATTACHMENT attachment, bool create, uint32_t numSamples = 1);
|
|
static void ClearColorHotTile(const HOTTILE* pHotTile);
|
static void ClearDepthHotTile(const HOTTILE* pHotTile);
|
static void ClearStencilHotTile(const HOTTILE* pHotTile);
|
|
private:
|
HotTileSet mHotTiles[KNOB_NUM_HOT_TILES_X][KNOB_NUM_HOT_TILES_Y];
|
uint32_t mHotTileSize[SWR_NUM_ATTACHMENTS];
|
|
void* AllocHotTileMem(size_t size, uint32_t align, uint32_t numaNode)
|
{
|
void* p = nullptr;
|
#if defined(_WIN32)
|
HANDLE hProcess = GetCurrentProcess();
|
p = VirtualAllocExNuma(hProcess, nullptr, size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE, numaNode);
|
#else
|
p = AlignedMalloc(size, align);
|
#endif
|
|
return p;
|
}
|
|
void FreeHotTileMem(void* pBuffer)
|
{
|
if (pBuffer)
|
{
|
#if defined(_WIN32)
|
VirtualFree(pBuffer, 0, MEM_RELEASE);
|
#else
|
AlignedFree(pBuffer);
|
#endif
|
}
|
}
|
};
|