~lzh/A133.git

/****************************************************************************
* Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*
* @file fifo.hpp
*
* @brief Definitions for our fifos used for thread communication.
*
******************************************************************************/
#pragma once
 
 
#include "common/os.h"
#include "arena.h"
 
#include <vector>
#include <cassert>
 
template<class T>
struct QUEUE
{
    OSALIGNLINE(volatile uint32_t) mLock{ 0 };
    OSALIGNLINE(volatile uint32_t) mNumEntries{ 0 };
    std::vector<T*> mBlocks;
    T* mCurBlock{ nullptr };
    uint32_t mHead{ 0 };
    uint32_t mTail{ 0 };
    uint32_t mCurBlockIdx{ 0 };
 
    // power of 2
    static const uint32_t mBlockSizeShift = 6;
    static const uint32_t mBlockSize = 1 << mBlockSizeShift;
 
    template <typename ArenaT>
    void clear(ArenaT& arena)
    {
        mHead = 0;
        mTail = 0;
        mBlocks.clear();
        T* pNewBlock = (T*)arena.AllocAligned(sizeof(T)*mBlockSize, KNOB_SIMD_WIDTH*4);
        mBlocks.push_back(pNewBlock);
        mCurBlock = pNewBlock;
        mCurBlockIdx = 0;
        mNumEntries = 0;
        mLock = 0;
    }
 
    uint32_t getNumQueued()
    {
        return mNumEntries;
    }
 
    bool tryLock()
    {
        if (mLock)
        {
            return false;
        }
 
        // try to lock the FIFO
        long initial = InterlockedCompareExchange(&mLock, 1, 0);
        return (initial == 0);
    }
 
    void unlock()
    {
        mLock = 0;
    }
 
    T* peek()
    {
        if (mNumEntries == 0)
        {
            return nullptr;
        }
        uint32_t block = mHead >> mBlockSizeShift;
        return &mBlocks[block][mHead & (mBlockSize-1)];
    }
 
    void dequeue_noinc()
    {
        mHead ++;
        mNumEntries --;
    }
 
    template <typename ArenaT>
    bool enqueue_try_nosync(ArenaT& arena, const T* entry)
    {
        const float* pSrc = (const float*)entry;
        float* pDst = (float*)&mCurBlock[mTail];
 
        auto lambda = [&](int32_t i)
        {
            __m256 vSrc = _mm256_load_ps(pSrc + i*KNOB_SIMD_WIDTH);
            _mm256_stream_ps(pDst + i*KNOB_SIMD_WIDTH, vSrc);
        };
 
        const uint32_t numSimdLines = sizeof(T) / (KNOB_SIMD_WIDTH*4);
        static_assert(numSimdLines * KNOB_SIMD_WIDTH * 4 == sizeof(T),
            "FIFO element size should be multiple of SIMD width.");
 
        UnrollerL<0, numSimdLines, 1>::step(lambda);
 
        mTail ++;
        if (mTail == mBlockSize)
        {
            if (++mCurBlockIdx < mBlocks.size())
            {
                mCurBlock = mBlocks[mCurBlockIdx];
            }
            else
            {
                T* newBlock = (T*)arena.AllocAligned(sizeof(T)*mBlockSize, KNOB_SIMD_WIDTH*4);
                SWR_ASSERT(newBlock);
 
                mBlocks.push_back(newBlock);
                mCurBlock = newBlock;
            }
 
            mTail = 0;
        }
 
        mNumEntries ++;
        return true;
    }
 
    void destroy()
    {
    }
 
};