// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
|
//
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
// you may not use this file except in compliance with the License.
|
// You may obtain a copy of the License at
|
//
|
// http://www.apache.org/licenses/LICENSE-2.0
|
//
|
// Unless required by applicable law or agreed to in writing, software
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
// See the License for the specific language governing permissions and
|
// limitations under the License.
|
|
#include "Surface.hpp"
|
|
#include "Color.hpp"
|
#include "Context.hpp"
|
#include "ETC_Decoder.hpp"
|
#include "Renderer.hpp"
|
#include "Common/Half.hpp"
|
#include "Common/Memory.hpp"
|
#include "Common/CPUID.hpp"
|
#include "Common/Resource.hpp"
|
#include "Common/Debug.hpp"
|
#include "Reactor/Reactor.hpp"
|
|
#if defined(__i386__) || defined(__x86_64__)
|
#include <xmmintrin.h>
|
#include <emmintrin.h>
|
#endif
|
|
#undef min
|
#undef max
|
|
namespace sw
|
{
|
extern bool quadLayoutEnabled;
|
extern bool complementaryDepthBuffer;
|
extern TranscendentalPrecision logPrecision;
|
|
unsigned int *Surface::palette = 0;
|
unsigned int Surface::paletteID = 0;
|
|
void Surface::Buffer::write(int x, int y, int z, const Color<float> &color)
|
{
|
ASSERT((x >= -border) && (x < (width + border)));
|
ASSERT((y >= -border) && (y < (height + border)));
|
ASSERT((z >= 0) && (z < depth));
|
|
byte *element = (byte*)buffer + (x + border) * bytes + (y + border) * pitchB + z * samples * sliceB;
|
|
for(int i = 0; i < samples; i++)
|
{
|
write(element, color);
|
element += sliceB;
|
}
|
}
|
|
void Surface::Buffer::write(int x, int y, const Color<float> &color)
|
{
|
ASSERT((x >= -border) && (x < (width + border)));
|
ASSERT((y >= -border) && (y < (height + border)));
|
|
byte *element = (byte*)buffer + (x + border) * bytes + (y + border) * pitchB;
|
|
for(int i = 0; i < samples; i++)
|
{
|
write(element, color);
|
element += sliceB;
|
}
|
}
|
|
inline void Surface::Buffer::write(void *element, const Color<float> &color)
|
{
|
float r = color.r;
|
float g = color.g;
|
float b = color.b;
|
float a = color.a;
|
|
if(isSRGBformat(format))
|
{
|
r = linearToSRGB(r);
|
g = linearToSRGB(g);
|
b = linearToSRGB(b);
|
}
|
|
switch(format)
|
{
|
case FORMAT_A8:
|
*(unsigned char*)element = unorm<8>(a);
|
break;
|
case FORMAT_R8_SNORM:
|
*(char*)element = snorm<8>(r);
|
break;
|
case FORMAT_R8:
|
*(unsigned char*)element = unorm<8>(r);
|
break;
|
case FORMAT_R8I:
|
*(char*)element = scast<8>(r);
|
break;
|
case FORMAT_R8UI:
|
*(unsigned char*)element = ucast<8>(r);
|
break;
|
case FORMAT_R16I:
|
*(short*)element = scast<16>(r);
|
break;
|
case FORMAT_R16UI:
|
*(unsigned short*)element = ucast<16>(r);
|
break;
|
case FORMAT_R32I:
|
*(int*)element = static_cast<int>(r);
|
break;
|
case FORMAT_R32UI:
|
*(unsigned int*)element = static_cast<unsigned int>(r);
|
break;
|
case FORMAT_R3G3B2:
|
*(unsigned char*)element = (unorm<3>(r) << 5) | (unorm<3>(g) << 2) | (unorm<2>(b) << 0);
|
break;
|
case FORMAT_A8R3G3B2:
|
*(unsigned short*)element = (unorm<8>(a) << 8) | (unorm<3>(r) << 5) | (unorm<3>(g) << 2) | (unorm<2>(b) << 0);
|
break;
|
case FORMAT_X4R4G4B4:
|
*(unsigned short*)element = 0xF000 | (unorm<4>(r) << 8) | (unorm<4>(g) << 4) | (unorm<4>(b) << 0);
|
break;
|
case FORMAT_A4R4G4B4:
|
*(unsigned short*)element = (unorm<4>(a) << 12) | (unorm<4>(r) << 8) | (unorm<4>(g) << 4) | (unorm<4>(b) << 0);
|
break;
|
case FORMAT_R4G4B4A4:
|
*(unsigned short*)element = (unorm<4>(r) << 12) | (unorm<4>(g) << 8) | (unorm<4>(b) << 4) | (unorm<4>(a) << 0);
|
break;
|
case FORMAT_R5G6B5:
|
*(unsigned short*)element = (unorm<5>(r) << 11) | (unorm<6>(g) << 5) | (unorm<5>(b) << 0);
|
break;
|
case FORMAT_A1R5G5B5:
|
*(unsigned short*)element = (unorm<1>(a) << 15) | (unorm<5>(r) << 10) | (unorm<5>(g) << 5) | (unorm<5>(b) << 0);
|
break;
|
case FORMAT_R5G5B5A1:
|
*(unsigned short*)element = (unorm<5>(r) << 11) | (unorm<5>(g) << 6) | (unorm<5>(b) << 1) | (unorm<5>(a) << 0);
|
break;
|
case FORMAT_X1R5G5B5:
|
*(unsigned short*)element = 0x8000 | (unorm<5>(r) << 10) | (unorm<5>(g) << 5) | (unorm<5>(b) << 0);
|
break;
|
case FORMAT_A8R8G8B8:
|
*(unsigned int*)element = (unorm<8>(a) << 24) | (unorm<8>(r) << 16) | (unorm<8>(g) << 8) | (unorm<8>(b) << 0);
|
break;
|
case FORMAT_X8R8G8B8:
|
*(unsigned int*)element = 0xFF000000 | (unorm<8>(r) << 16) | (unorm<8>(g) << 8) | (unorm<8>(b) << 0);
|
break;
|
case FORMAT_A8B8G8R8_SNORM:
|
*(unsigned int*)element = (static_cast<unsigned int>(snorm<8>(a)) << 24) |
|
(static_cast<unsigned int>(snorm<8>(b)) << 16) |
|
(static_cast<unsigned int>(snorm<8>(g)) << 8) |
|
(static_cast<unsigned int>(snorm<8>(r)) << 0);
|
break;
|
case FORMAT_A8B8G8R8:
|
case FORMAT_SRGB8_A8:
|
*(unsigned int*)element = (unorm<8>(a) << 24) | (unorm<8>(b) << 16) | (unorm<8>(g) << 8) | (unorm<8>(r) << 0);
|
break;
|
case FORMAT_A8B8G8R8I:
|
*(unsigned int*)element = (static_cast<unsigned int>(scast<8>(a)) << 24) |
|
(static_cast<unsigned int>(scast<8>(b)) << 16) |
|
(static_cast<unsigned int>(scast<8>(g)) << 8) |
|
(static_cast<unsigned int>(scast<8>(r)) << 0);
|
break;
|
case FORMAT_A8B8G8R8UI:
|
*(unsigned int*)element = (ucast<8>(a) << 24) | (ucast<8>(b) << 16) | (ucast<8>(g) << 8) | (ucast<8>(r) << 0);
|
break;
|
case FORMAT_X8B8G8R8_SNORM:
|
*(unsigned int*)element = 0x7F000000 |
|
(static_cast<unsigned int>(snorm<8>(b)) << 16) |
|
(static_cast<unsigned int>(snorm<8>(g)) << 8) |
|
(static_cast<unsigned int>(snorm<8>(r)) << 0);
|
break;
|
case FORMAT_X8B8G8R8:
|
case FORMAT_SRGB8_X8:
|
*(unsigned int*)element = 0xFF000000 | (unorm<8>(b) << 16) | (unorm<8>(g) << 8) | (unorm<8>(r) << 0);
|
break;
|
case FORMAT_X8B8G8R8I:
|
*(unsigned int*)element = 0x7F000000 |
|
(static_cast<unsigned int>(scast<8>(b)) << 16) |
|
(static_cast<unsigned int>(scast<8>(g)) << 8) |
|
(static_cast<unsigned int>(scast<8>(r)) << 0);
|
case FORMAT_X8B8G8R8UI:
|
*(unsigned int*)element = 0xFF000000 | (ucast<8>(b) << 16) | (ucast<8>(g) << 8) | (ucast<8>(r) << 0);
|
break;
|
case FORMAT_A2R10G10B10:
|
*(unsigned int*)element = (unorm<2>(a) << 30) | (unorm<10>(r) << 20) | (unorm<10>(g) << 10) | (unorm<10>(b) << 0);
|
break;
|
case FORMAT_A2B10G10R10:
|
case FORMAT_A2B10G10R10UI:
|
*(unsigned int*)element = (unorm<2>(a) << 30) | (unorm<10>(b) << 20) | (unorm<10>(g) << 10) | (unorm<10>(r) << 0);
|
break;
|
case FORMAT_G8R8_SNORM:
|
*(unsigned short*)element = (static_cast<unsigned short>(snorm<8>(g)) << 8) |
|
(static_cast<unsigned short>(snorm<8>(r)) << 0);
|
break;
|
case FORMAT_G8R8:
|
*(unsigned short*)element = (unorm<8>(g) << 8) | (unorm<8>(r) << 0);
|
break;
|
case FORMAT_G8R8I:
|
*(unsigned short*)element = (static_cast<unsigned short>(scast<8>(g)) << 8) |
|
(static_cast<unsigned short>(scast<8>(r)) << 0);
|
break;
|
case FORMAT_G8R8UI:
|
*(unsigned short*)element = (ucast<8>(g) << 8) | (ucast<8>(r) << 0);
|
break;
|
case FORMAT_G16R16:
|
*(unsigned int*)element = (unorm<16>(g) << 16) | (unorm<16>(r) << 0);
|
break;
|
case FORMAT_G16R16I:
|
*(unsigned int*)element = (static_cast<unsigned int>(scast<16>(g)) << 16) |
|
(static_cast<unsigned int>(scast<16>(r)) << 0);
|
break;
|
case FORMAT_G16R16UI:
|
*(unsigned int*)element = (ucast<16>(g) << 16) | (ucast<16>(r) << 0);
|
break;
|
case FORMAT_G32R32I:
|
case FORMAT_G32R32UI:
|
((unsigned int*)element)[0] = static_cast<unsigned int>(r);
|
((unsigned int*)element)[1] = static_cast<unsigned int>(g);
|
break;
|
case FORMAT_A16B16G16R16:
|
((unsigned short*)element)[0] = unorm<16>(r);
|
((unsigned short*)element)[1] = unorm<16>(g);
|
((unsigned short*)element)[2] = unorm<16>(b);
|
((unsigned short*)element)[3] = unorm<16>(a);
|
break;
|
case FORMAT_A16B16G16R16I:
|
((unsigned short*)element)[0] = static_cast<unsigned short>(scast<16>(r));
|
((unsigned short*)element)[1] = static_cast<unsigned short>(scast<16>(g));
|
((unsigned short*)element)[2] = static_cast<unsigned short>(scast<16>(b));
|
((unsigned short*)element)[3] = static_cast<unsigned short>(scast<16>(a));
|
break;
|
case FORMAT_A16B16G16R16UI:
|
((unsigned short*)element)[0] = static_cast<unsigned short>(ucast<16>(r));
|
((unsigned short*)element)[1] = static_cast<unsigned short>(ucast<16>(g));
|
((unsigned short*)element)[2] = static_cast<unsigned short>(ucast<16>(b));
|
((unsigned short*)element)[3] = static_cast<unsigned short>(ucast<16>(a));
|
break;
|
case FORMAT_X16B16G16R16I:
|
((unsigned short*)element)[0] = static_cast<unsigned short>(scast<16>(r));
|
((unsigned short*)element)[1] = static_cast<unsigned short>(scast<16>(g));
|
((unsigned short*)element)[2] = static_cast<unsigned short>(scast<16>(b));
|
break;
|
case FORMAT_X16B16G16R16UI:
|
((unsigned short*)element)[0] = static_cast<unsigned short>(ucast<16>(r));
|
((unsigned short*)element)[1] = static_cast<unsigned short>(ucast<16>(g));
|
((unsigned short*)element)[2] = static_cast<unsigned short>(ucast<16>(b));
|
break;
|
case FORMAT_A32B32G32R32I:
|
case FORMAT_A32B32G32R32UI:
|
((unsigned int*)element)[0] = static_cast<unsigned int>(r);
|
((unsigned int*)element)[1] = static_cast<unsigned int>(g);
|
((unsigned int*)element)[2] = static_cast<unsigned int>(b);
|
((unsigned int*)element)[3] = static_cast<unsigned int>(a);
|
break;
|
case FORMAT_X32B32G32R32I:
|
case FORMAT_X32B32G32R32UI:
|
((unsigned int*)element)[0] = static_cast<unsigned int>(r);
|
((unsigned int*)element)[1] = static_cast<unsigned int>(g);
|
((unsigned int*)element)[2] = static_cast<unsigned int>(b);
|
break;
|
case FORMAT_V8U8:
|
*(unsigned short*)element = (snorm<8>(g) << 8) | (snorm<8>(r) << 0);
|
break;
|
case FORMAT_L6V5U5:
|
*(unsigned short*)element = (unorm<6>(b) << 10) | (snorm<5>(g) << 5) | (snorm<5>(r) << 0);
|
break;
|
case FORMAT_Q8W8V8U8:
|
*(unsigned int*)element = (snorm<8>(a) << 24) | (snorm<8>(b) << 16) | (snorm<8>(g) << 8) | (snorm<8>(r) << 0);
|
break;
|
case FORMAT_X8L8V8U8:
|
*(unsigned int*)element = 0xFF000000 | (unorm<8>(b) << 16) | (snorm<8>(g) << 8) | (snorm<8>(r) << 0);
|
break;
|
case FORMAT_V16U16:
|
*(unsigned int*)element = (snorm<16>(g) << 16) | (snorm<16>(r) << 0);
|
break;
|
case FORMAT_A2W10V10U10:
|
*(unsigned int*)element = (unorm<2>(a) << 30) | (snorm<10>(b) << 20) | (snorm<10>(g) << 10) | (snorm<10>(r) << 0);
|
break;
|
case FORMAT_A16W16V16U16:
|
((unsigned short*)element)[0] = snorm<16>(r);
|
((unsigned short*)element)[1] = snorm<16>(g);
|
((unsigned short*)element)[2] = snorm<16>(b);
|
((unsigned short*)element)[3] = unorm<16>(a);
|
break;
|
case FORMAT_Q16W16V16U16:
|
((unsigned short*)element)[0] = snorm<16>(r);
|
((unsigned short*)element)[1] = snorm<16>(g);
|
((unsigned short*)element)[2] = snorm<16>(b);
|
((unsigned short*)element)[3] = snorm<16>(a);
|
break;
|
case FORMAT_R8G8B8:
|
((unsigned char*)element)[0] = unorm<8>(b);
|
((unsigned char*)element)[1] = unorm<8>(g);
|
((unsigned char*)element)[2] = unorm<8>(r);
|
break;
|
case FORMAT_B8G8R8:
|
((unsigned char*)element)[0] = unorm<8>(r);
|
((unsigned char*)element)[1] = unorm<8>(g);
|
((unsigned char*)element)[2] = unorm<8>(b);
|
break;
|
case FORMAT_R16F:
|
*(half*)element = (half)r;
|
break;
|
case FORMAT_A16F:
|
*(half*)element = (half)a;
|
break;
|
case FORMAT_G16R16F:
|
((half*)element)[0] = (half)r;
|
((half*)element)[1] = (half)g;
|
break;
|
case FORMAT_X16B16G16R16F_UNSIGNED:
|
r = max(r, 0.0f); g = max(g, 0.0f); b = max(b, 0.0f);
|
// Fall through to FORMAT_X16B16G16R16F.
|
case FORMAT_X16B16G16R16F:
|
((half*)element)[3] = 1.0f;
|
// Fall through to FORMAT_B16G16R16F.
|
case FORMAT_B16G16R16F:
|
((half*)element)[0] = (half)r;
|
((half*)element)[1] = (half)g;
|
((half*)element)[2] = (half)b;
|
break;
|
case FORMAT_A16B16G16R16F:
|
((half*)element)[0] = (half)r;
|
((half*)element)[1] = (half)g;
|
((half*)element)[2] = (half)b;
|
((half*)element)[3] = (half)a;
|
break;
|
case FORMAT_A32F:
|
*(float*)element = a;
|
break;
|
case FORMAT_R32F:
|
*(float*)element = r;
|
break;
|
case FORMAT_G32R32F:
|
((float*)element)[0] = r;
|
((float*)element)[1] = g;
|
break;
|
case FORMAT_X32B32G32R32F_UNSIGNED:
|
r = max(r, 0.0f); g = max(g, 0.0f); b = max(b, 0.0f);
|
// Fall through to FORMAT_X32B32G32R32F.
|
case FORMAT_X32B32G32R32F:
|
((float*)element)[3] = 1.0f;
|
// Fall through to FORMAT_B32G32R32F.
|
case FORMAT_B32G32R32F:
|
((float*)element)[0] = r;
|
((float*)element)[1] = g;
|
((float*)element)[2] = b;
|
break;
|
case FORMAT_A32B32G32R32F:
|
((float*)element)[0] = r;
|
((float*)element)[1] = g;
|
((float*)element)[2] = b;
|
((float*)element)[3] = a;
|
break;
|
case FORMAT_D32F:
|
case FORMAT_D32FS8:
|
case FORMAT_D32F_LOCKABLE:
|
case FORMAT_D32FS8_TEXTURE:
|
case FORMAT_D32F_SHADOW:
|
case FORMAT_D32FS8_SHADOW:
|
*((float*)element) = r;
|
break;
|
case FORMAT_D32F_COMPLEMENTARY:
|
case FORMAT_D32FS8_COMPLEMENTARY:
|
*((float*)element) = 1 - r;
|
break;
|
case FORMAT_S8:
|
*((unsigned char*)element) = unorm<8>(r);
|
break;
|
case FORMAT_L8:
|
*(unsigned char*)element = unorm<8>(r);
|
break;
|
case FORMAT_A4L4:
|
*(unsigned char*)element = (unorm<4>(a) << 4) | (unorm<4>(r) << 0);
|
break;
|
case FORMAT_L16:
|
*(unsigned short*)element = unorm<16>(r);
|
break;
|
case FORMAT_A8L8:
|
*(unsigned short*)element = (unorm<8>(a) << 8) | (unorm<8>(r) << 0);
|
break;
|
case FORMAT_L16F:
|
*(half*)element = (half)r;
|
break;
|
case FORMAT_A16L16F:
|
((half*)element)[0] = (half)r;
|
((half*)element)[1] = (half)a;
|
break;
|
case FORMAT_L32F:
|
*(float*)element = r;
|
break;
|
case FORMAT_A32L32F:
|
((float*)element)[0] = r;
|
((float*)element)[1] = a;
|
break;
|
default:
|
ASSERT(false);
|
}
|
}
|
|
Color<float> Surface::Buffer::read(int x, int y, int z) const
|
{
|
ASSERT((x >= -border) && (x < (width + border)));
|
ASSERT((y >= -border) && (y < (height + border)));
|
ASSERT((z >= 0) && (z < depth));
|
|
void *element = (unsigned char*)buffer + (x + border) * bytes + (y + border) * pitchB + z * samples * sliceB;
|
|
return read(element);
|
}
|
|
Color<float> Surface::Buffer::read(int x, int y) const
|
{
|
ASSERT((x >= -border) && (x < (width + border)));
|
ASSERT((y >= -border) && (y < (height + border)));
|
|
void *element = (unsigned char*)buffer + (x + border) * bytes + (y + border) * pitchB;
|
|
return read(element);
|
}
|
|
inline Color<float> Surface::Buffer::read(void *element) const
|
{
|
float r = 0.0f;
|
float g = 0.0f;
|
float b = 0.0f;
|
float a = 1.0f;
|
|
switch(format)
|
{
|
case FORMAT_P8:
|
{
|
ASSERT(palette);
|
|
unsigned int abgr = palette[*(unsigned char*)element];
|
|
r = (abgr & 0x000000FF) * (1.0f / 0x000000FF);
|
g = (abgr & 0x0000FF00) * (1.0f / 0x0000FF00);
|
b = (abgr & 0x00FF0000) * (1.0f / 0x00FF0000);
|
a = (abgr & 0xFF000000) * (1.0f / 0xFF000000);
|
}
|
break;
|
case FORMAT_A8P8:
|
{
|
ASSERT(palette);
|
|
unsigned int bgr = palette[((unsigned char*)element)[0]];
|
|
r = (bgr & 0x000000FF) * (1.0f / 0x000000FF);
|
g = (bgr & 0x0000FF00) * (1.0f / 0x0000FF00);
|
b = (bgr & 0x00FF0000) * (1.0f / 0x00FF0000);
|
a = ((unsigned char*)element)[1] * (1.0f / 0xFF);
|
}
|
break;
|
case FORMAT_A8:
|
r = 0;
|
g = 0;
|
b = 0;
|
a = *(unsigned char*)element * (1.0f / 0xFF);
|
break;
|
case FORMAT_R8_SNORM:
|
r = max((*(signed char*)element) * (1.0f / 0x7F), -1.0f);
|
break;
|
case FORMAT_R8:
|
r = *(unsigned char*)element * (1.0f / 0xFF);
|
break;
|
case FORMAT_R8I:
|
r = *(signed char*)element;
|
break;
|
case FORMAT_R8UI:
|
r = *(unsigned char*)element;
|
break;
|
case FORMAT_R3G3B2:
|
{
|
unsigned char rgb = *(unsigned char*)element;
|
|
r = (rgb & 0xE0) * (1.0f / 0xE0);
|
g = (rgb & 0x1C) * (1.0f / 0x1C);
|
b = (rgb & 0x03) * (1.0f / 0x03);
|
}
|
break;
|
case FORMAT_A8R3G3B2:
|
{
|
unsigned short argb = *(unsigned short*)element;
|
|
a = (argb & 0xFF00) * (1.0f / 0xFF00);
|
r = (argb & 0x00E0) * (1.0f / 0x00E0);
|
g = (argb & 0x001C) * (1.0f / 0x001C);
|
b = (argb & 0x0003) * (1.0f / 0x0003);
|
}
|
break;
|
case FORMAT_X4R4G4B4:
|
{
|
unsigned short rgb = *(unsigned short*)element;
|
|
r = (rgb & 0x0F00) * (1.0f / 0x0F00);
|
g = (rgb & 0x00F0) * (1.0f / 0x00F0);
|
b = (rgb & 0x000F) * (1.0f / 0x000F);
|
}
|
break;
|
case FORMAT_A4R4G4B4:
|
{
|
unsigned short argb = *(unsigned short*)element;
|
|
a = (argb & 0xF000) * (1.0f / 0xF000);
|
r = (argb & 0x0F00) * (1.0f / 0x0F00);
|
g = (argb & 0x00F0) * (1.0f / 0x00F0);
|
b = (argb & 0x000F) * (1.0f / 0x000F);
|
}
|
break;
|
case FORMAT_R4G4B4A4:
|
{
|
unsigned short rgba = *(unsigned short*)element;
|
|
r = (rgba & 0xF000) * (1.0f / 0xF000);
|
g = (rgba & 0x0F00) * (1.0f / 0x0F00);
|
b = (rgba & 0x00F0) * (1.0f / 0x00F0);
|
a = (rgba & 0x000F) * (1.0f / 0x000F);
|
}
|
break;
|
case FORMAT_R5G6B5:
|
{
|
unsigned short rgb = *(unsigned short*)element;
|
|
r = (rgb & 0xF800) * (1.0f / 0xF800);
|
g = (rgb & 0x07E0) * (1.0f / 0x07E0);
|
b = (rgb & 0x001F) * (1.0f / 0x001F);
|
}
|
break;
|
case FORMAT_A1R5G5B5:
|
{
|
unsigned short argb = *(unsigned short*)element;
|
|
a = (argb & 0x8000) * (1.0f / 0x8000);
|
r = (argb & 0x7C00) * (1.0f / 0x7C00);
|
g = (argb & 0x03E0) * (1.0f / 0x03E0);
|
b = (argb & 0x001F) * (1.0f / 0x001F);
|
}
|
break;
|
case FORMAT_R5G5B5A1:
|
{
|
unsigned short rgba = *(unsigned short*)element;
|
|
r = (rgba & 0xF800) * (1.0f / 0xF800);
|
g = (rgba & 0x07C0) * (1.0f / 0x07C0);
|
b = (rgba & 0x003E) * (1.0f / 0x003E);
|
a = (rgba & 0x0001) * (1.0f / 0x0001);
|
}
|
break;
|
case FORMAT_X1R5G5B5:
|
{
|
unsigned short xrgb = *(unsigned short*)element;
|
|
r = (xrgb & 0x7C00) * (1.0f / 0x7C00);
|
g = (xrgb & 0x03E0) * (1.0f / 0x03E0);
|
b = (xrgb & 0x001F) * (1.0f / 0x001F);
|
}
|
break;
|
case FORMAT_A8R8G8B8:
|
{
|
unsigned int argb = *(unsigned int*)element;
|
|
a = (argb & 0xFF000000) * (1.0f / 0xFF000000);
|
r = (argb & 0x00FF0000) * (1.0f / 0x00FF0000);
|
g = (argb & 0x0000FF00) * (1.0f / 0x0000FF00);
|
b = (argb & 0x000000FF) * (1.0f / 0x000000FF);
|
}
|
break;
|
case FORMAT_X8R8G8B8:
|
{
|
unsigned int xrgb = *(unsigned int*)element;
|
|
r = (xrgb & 0x00FF0000) * (1.0f / 0x00FF0000);
|
g = (xrgb & 0x0000FF00) * (1.0f / 0x0000FF00);
|
b = (xrgb & 0x000000FF) * (1.0f / 0x000000FF);
|
}
|
break;
|
case FORMAT_A8B8G8R8_SNORM:
|
{
|
signed char* abgr = (signed char*)element;
|
|
r = max(abgr[0] * (1.0f / 0x7F), -1.0f);
|
g = max(abgr[1] * (1.0f / 0x7F), -1.0f);
|
b = max(abgr[2] * (1.0f / 0x7F), -1.0f);
|
a = max(abgr[3] * (1.0f / 0x7F), -1.0f);
|
}
|
break;
|
case FORMAT_A8B8G8R8:
|
case FORMAT_SRGB8_A8:
|
{
|
unsigned int abgr = *(unsigned int*)element;
|
|
a = (abgr & 0xFF000000) * (1.0f / 0xFF000000);
|
b = (abgr & 0x00FF0000) * (1.0f / 0x00FF0000);
|
g = (abgr & 0x0000FF00) * (1.0f / 0x0000FF00);
|
r = (abgr & 0x000000FF) * (1.0f / 0x000000FF);
|
}
|
break;
|
case FORMAT_A8B8G8R8I:
|
{
|
signed char* abgr = (signed char*)element;
|
|
r = abgr[0];
|
g = abgr[1];
|
b = abgr[2];
|
a = abgr[3];
|
}
|
break;
|
case FORMAT_A8B8G8R8UI:
|
{
|
unsigned char* abgr = (unsigned char*)element;
|
|
r = abgr[0];
|
g = abgr[1];
|
b = abgr[2];
|
a = abgr[3];
|
}
|
break;
|
case FORMAT_X8B8G8R8_SNORM:
|
{
|
signed char* bgr = (signed char*)element;
|
|
r = max(bgr[0] * (1.0f / 0x7F), -1.0f);
|
g = max(bgr[1] * (1.0f / 0x7F), -1.0f);
|
b = max(bgr[2] * (1.0f / 0x7F), -1.0f);
|
}
|
break;
|
case FORMAT_X8B8G8R8:
|
case FORMAT_SRGB8_X8:
|
{
|
unsigned int xbgr = *(unsigned int*)element;
|
|
b = (xbgr & 0x00FF0000) * (1.0f / 0x00FF0000);
|
g = (xbgr & 0x0000FF00) * (1.0f / 0x0000FF00);
|
r = (xbgr & 0x000000FF) * (1.0f / 0x000000FF);
|
}
|
break;
|
case FORMAT_X8B8G8R8I:
|
{
|
signed char* bgr = (signed char*)element;
|
|
r = bgr[0];
|
g = bgr[1];
|
b = bgr[2];
|
}
|
break;
|
case FORMAT_X8B8G8R8UI:
|
{
|
unsigned char* bgr = (unsigned char*)element;
|
|
r = bgr[0];
|
g = bgr[1];
|
b = bgr[2];
|
}
|
break;
|
case FORMAT_G8R8_SNORM:
|
{
|
signed char* gr = (signed char*)element;
|
|
r = (gr[0] & 0xFF00) * (1.0f / 0xFF00);
|
g = (gr[1] & 0x00FF) * (1.0f / 0x00FF);
|
}
|
break;
|
case FORMAT_G8R8:
|
{
|
unsigned short gr = *(unsigned short*)element;
|
|
g = (gr & 0xFF00) * (1.0f / 0xFF00);
|
r = (gr & 0x00FF) * (1.0f / 0x00FF);
|
}
|
break;
|
case FORMAT_G8R8I:
|
{
|
signed char* gr = (signed char*)element;
|
|
r = gr[0];
|
g = gr[1];
|
}
|
break;
|
case FORMAT_G8R8UI:
|
{
|
unsigned char* gr = (unsigned char*)element;
|
|
r = gr[0];
|
g = gr[1];
|
}
|
break;
|
case FORMAT_R16I:
|
r = *((short*)element);
|
break;
|
case FORMAT_R16UI:
|
r = *((unsigned short*)element);
|
break;
|
case FORMAT_G16R16I:
|
{
|
short* gr = (short*)element;
|
|
r = gr[0];
|
g = gr[1];
|
}
|
break;
|
case FORMAT_G16R16:
|
{
|
unsigned int gr = *(unsigned int*)element;
|
|
g = (gr & 0xFFFF0000) * (1.0f / 0xFFFF0000);
|
r = (gr & 0x0000FFFF) * (1.0f / 0x0000FFFF);
|
}
|
break;
|
case FORMAT_G16R16UI:
|
{
|
unsigned short* gr = (unsigned short*)element;
|
|
r = gr[0];
|
g = gr[1];
|
}
|
break;
|
case FORMAT_A2R10G10B10:
|
{
|
unsigned int argb = *(unsigned int*)element;
|
|
a = (argb & 0xC0000000) * (1.0f / 0xC0000000);
|
r = (argb & 0x3FF00000) * (1.0f / 0x3FF00000);
|
g = (argb & 0x000FFC00) * (1.0f / 0x000FFC00);
|
b = (argb & 0x000003FF) * (1.0f / 0x000003FF);
|
}
|
break;
|
case FORMAT_A2B10G10R10:
|
{
|
unsigned int abgr = *(unsigned int*)element;
|
|
a = (abgr & 0xC0000000) * (1.0f / 0xC0000000);
|
b = (abgr & 0x3FF00000) * (1.0f / 0x3FF00000);
|
g = (abgr & 0x000FFC00) * (1.0f / 0x000FFC00);
|
r = (abgr & 0x000003FF) * (1.0f / 0x000003FF);
|
}
|
break;
|
case FORMAT_A2B10G10R10UI:
|
{
|
unsigned int abgr = *(unsigned int*)element;
|
|
a = static_cast<float>((abgr & 0xC0000000) >> 30);
|
b = static_cast<float>((abgr & 0x3FF00000) >> 20);
|
g = static_cast<float>((abgr & 0x000FFC00) >> 10);
|
r = static_cast<float>(abgr & 0x000003FF);
|
}
|
break;
|
case FORMAT_A16B16G16R16I:
|
{
|
short* abgr = (short*)element;
|
|
r = abgr[0];
|
g = abgr[1];
|
b = abgr[2];
|
a = abgr[3];
|
}
|
break;
|
case FORMAT_A16B16G16R16:
|
r = ((unsigned short*)element)[0] * (1.0f / 0xFFFF);
|
g = ((unsigned short*)element)[1] * (1.0f / 0xFFFF);
|
b = ((unsigned short*)element)[2] * (1.0f / 0xFFFF);
|
a = ((unsigned short*)element)[3] * (1.0f / 0xFFFF);
|
break;
|
case FORMAT_A16B16G16R16UI:
|
{
|
unsigned short* abgr = (unsigned short*)element;
|
|
r = abgr[0];
|
g = abgr[1];
|
b = abgr[2];
|
a = abgr[3];
|
}
|
break;
|
case FORMAT_X16B16G16R16I:
|
{
|
short* bgr = (short*)element;
|
|
r = bgr[0];
|
g = bgr[1];
|
b = bgr[2];
|
}
|
break;
|
case FORMAT_X16B16G16R16UI:
|
{
|
unsigned short* bgr = (unsigned short*)element;
|
|
r = bgr[0];
|
g = bgr[1];
|
b = bgr[2];
|
}
|
break;
|
case FORMAT_A32B32G32R32I:
|
{
|
int* abgr = (int*)element;
|
|
r = static_cast<float>(abgr[0]);
|
g = static_cast<float>(abgr[1]);
|
b = static_cast<float>(abgr[2]);
|
a = static_cast<float>(abgr[3]);
|
}
|
break;
|
case FORMAT_A32B32G32R32UI:
|
{
|
unsigned int* abgr = (unsigned int*)element;
|
|
r = static_cast<float>(abgr[0]);
|
g = static_cast<float>(abgr[1]);
|
b = static_cast<float>(abgr[2]);
|
a = static_cast<float>(abgr[3]);
|
}
|
break;
|
case FORMAT_X32B32G32R32I:
|
{
|
int* bgr = (int*)element;
|
|
r = static_cast<float>(bgr[0]);
|
g = static_cast<float>(bgr[1]);
|
b = static_cast<float>(bgr[2]);
|
}
|
break;
|
case FORMAT_X32B32G32R32UI:
|
{
|
unsigned int* bgr = (unsigned int*)element;
|
|
r = static_cast<float>(bgr[0]);
|
g = static_cast<float>(bgr[1]);
|
b = static_cast<float>(bgr[2]);
|
}
|
break;
|
case FORMAT_G32R32I:
|
{
|
int* gr = (int*)element;
|
|
r = static_cast<float>(gr[0]);
|
g = static_cast<float>(gr[1]);
|
}
|
break;
|
case FORMAT_G32R32UI:
|
{
|
unsigned int* gr = (unsigned int*)element;
|
|
r = static_cast<float>(gr[0]);
|
g = static_cast<float>(gr[1]);
|
}
|
break;
|
case FORMAT_R32I:
|
r = static_cast<float>(*((int*)element));
|
break;
|
case FORMAT_R32UI:
|
r = static_cast<float>(*((unsigned int*)element));
|
break;
|
case FORMAT_V8U8:
|
{
|
unsigned short vu = *(unsigned short*)element;
|
|
r = ((int)(vu & 0x00FF) << 24) * (1.0f / 0x7F000000);
|
g = ((int)(vu & 0xFF00) << 16) * (1.0f / 0x7F000000);
|
}
|
break;
|
case FORMAT_L6V5U5:
|
{
|
unsigned short lvu = *(unsigned short*)element;
|
|
r = ((int)(lvu & 0x001F) << 27) * (1.0f / 0x78000000);
|
g = ((int)(lvu & 0x03E0) << 22) * (1.0f / 0x78000000);
|
b = (lvu & 0xFC00) * (1.0f / 0xFC00);
|
}
|
break;
|
case FORMAT_Q8W8V8U8:
|
{
|
unsigned int qwvu = *(unsigned int*)element;
|
|
r = ((int)(qwvu & 0x000000FF) << 24) * (1.0f / 0x7F000000);
|
g = ((int)(qwvu & 0x0000FF00) << 16) * (1.0f / 0x7F000000);
|
b = ((int)(qwvu & 0x00FF0000) << 8) * (1.0f / 0x7F000000);
|
a = ((int)(qwvu & 0xFF000000) << 0) * (1.0f / 0x7F000000);
|
}
|
break;
|
case FORMAT_X8L8V8U8:
|
{
|
unsigned int xlvu = *(unsigned int*)element;
|
|
r = ((int)(xlvu & 0x000000FF) << 24) * (1.0f / 0x7F000000);
|
g = ((int)(xlvu & 0x0000FF00) << 16) * (1.0f / 0x7F000000);
|
b = (xlvu & 0x00FF0000) * (1.0f / 0x00FF0000);
|
}
|
break;
|
case FORMAT_R8G8B8:
|
r = ((unsigned char*)element)[2] * (1.0f / 0xFF);
|
g = ((unsigned char*)element)[1] * (1.0f / 0xFF);
|
b = ((unsigned char*)element)[0] * (1.0f / 0xFF);
|
break;
|
case FORMAT_B8G8R8:
|
r = ((unsigned char*)element)[0] * (1.0f / 0xFF);
|
g = ((unsigned char*)element)[1] * (1.0f / 0xFF);
|
b = ((unsigned char*)element)[2] * (1.0f / 0xFF);
|
break;
|
case FORMAT_V16U16:
|
{
|
unsigned int vu = *(unsigned int*)element;
|
|
r = ((int)(vu & 0x0000FFFF) << 16) * (1.0f / 0x7FFF0000);
|
g = ((int)(vu & 0xFFFF0000) << 0) * (1.0f / 0x7FFF0000);
|
}
|
break;
|
case FORMAT_A2W10V10U10:
|
{
|
unsigned int awvu = *(unsigned int*)element;
|
|
r = ((int)(awvu & 0x000003FF) << 22) * (1.0f / 0x7FC00000);
|
g = ((int)(awvu & 0x000FFC00) << 12) * (1.0f / 0x7FC00000);
|
b = ((int)(awvu & 0x3FF00000) << 2) * (1.0f / 0x7FC00000);
|
a = (awvu & 0xC0000000) * (1.0f / 0xC0000000);
|
}
|
break;
|
case FORMAT_A16W16V16U16:
|
r = ((signed short*)element)[0] * (1.0f / 0x7FFF);
|
g = ((signed short*)element)[1] * (1.0f / 0x7FFF);
|
b = ((signed short*)element)[2] * (1.0f / 0x7FFF);
|
a = ((unsigned short*)element)[3] * (1.0f / 0xFFFF);
|
break;
|
case FORMAT_Q16W16V16U16:
|
r = ((signed short*)element)[0] * (1.0f / 0x7FFF);
|
g = ((signed short*)element)[1] * (1.0f / 0x7FFF);
|
b = ((signed short*)element)[2] * (1.0f / 0x7FFF);
|
a = ((signed short*)element)[3] * (1.0f / 0x7FFF);
|
break;
|
case FORMAT_L8:
|
r =
|
g =
|
b = *(unsigned char*)element * (1.0f / 0xFF);
|
break;
|
case FORMAT_A4L4:
|
{
|
unsigned char al = *(unsigned char*)element;
|
|
r =
|
g =
|
b = (al & 0x0F) * (1.0f / 0x0F);
|
a = (al & 0xF0) * (1.0f / 0xF0);
|
}
|
break;
|
case FORMAT_L16:
|
r =
|
g =
|
b = *(unsigned short*)element * (1.0f / 0xFFFF);
|
break;
|
case FORMAT_A8L8:
|
r =
|
g =
|
b = ((unsigned char*)element)[0] * (1.0f / 0xFF);
|
a = ((unsigned char*)element)[1] * (1.0f / 0xFF);
|
break;
|
case FORMAT_L16F:
|
r =
|
g =
|
b = *(half*)element;
|
break;
|
case FORMAT_A16L16F:
|
r =
|
g =
|
b = ((half*)element)[0];
|
a = ((half*)element)[1];
|
break;
|
case FORMAT_L32F:
|
r =
|
g =
|
b = *(float*)element;
|
break;
|
case FORMAT_A32L32F:
|
r =
|
g =
|
b = ((float*)element)[0];
|
a = ((float*)element)[1];
|
break;
|
case FORMAT_A16F:
|
a = *(half*)element;
|
break;
|
case FORMAT_R16F:
|
r = *(half*)element;
|
break;
|
case FORMAT_G16R16F:
|
r = ((half*)element)[0];
|
g = ((half*)element)[1];
|
break;
|
case FORMAT_X16B16G16R16F:
|
case FORMAT_X16B16G16R16F_UNSIGNED:
|
case FORMAT_B16G16R16F:
|
r = ((half*)element)[0];
|
g = ((half*)element)[1];
|
b = ((half*)element)[2];
|
break;
|
case FORMAT_A16B16G16R16F:
|
r = ((half*)element)[0];
|
g = ((half*)element)[1];
|
b = ((half*)element)[2];
|
a = ((half*)element)[3];
|
break;
|
case FORMAT_A32F:
|
a = *(float*)element;
|
break;
|
case FORMAT_R32F:
|
r = *(float*)element;
|
break;
|
case FORMAT_G32R32F:
|
r = ((float*)element)[0];
|
g = ((float*)element)[1];
|
break;
|
case FORMAT_X32B32G32R32F:
|
case FORMAT_X32B32G32R32F_UNSIGNED:
|
case FORMAT_B32G32R32F:
|
r = ((float*)element)[0];
|
g = ((float*)element)[1];
|
b = ((float*)element)[2];
|
break;
|
case FORMAT_A32B32G32R32F:
|
r = ((float*)element)[0];
|
g = ((float*)element)[1];
|
b = ((float*)element)[2];
|
a = ((float*)element)[3];
|
break;
|
case FORMAT_D32F:
|
case FORMAT_D32FS8:
|
case FORMAT_D32F_LOCKABLE:
|
case FORMAT_D32FS8_TEXTURE:
|
case FORMAT_D32F_SHADOW:
|
case FORMAT_D32FS8_SHADOW:
|
r = *(float*)element;
|
g = r;
|
b = r;
|
a = r;
|
break;
|
case FORMAT_D32F_COMPLEMENTARY:
|
case FORMAT_D32FS8_COMPLEMENTARY:
|
r = 1.0f - *(float*)element;
|
g = r;
|
b = r;
|
a = r;
|
break;
|
case FORMAT_S8:
|
r = *(unsigned char*)element * (1.0f / 0xFF);
|
break;
|
default:
|
ASSERT(false);
|
}
|
|
if(isSRGBformat(format))
|
{
|
r = sRGBtoLinear(r);
|
g = sRGBtoLinear(g);
|
b = sRGBtoLinear(b);
|
}
|
|
return Color<float>(r, g, b, a);
|
}
|
|
Color<float> Surface::Buffer::sample(float x, float y, float z) const
|
{
|
x -= 0.5f;
|
y -= 0.5f;
|
z -= 0.5f;
|
|
int x0 = clamp((int)x, 0, width - 1);
|
int x1 = (x0 + 1 >= width) ? x0 : x0 + 1;
|
|
int y0 = clamp((int)y, 0, height - 1);
|
int y1 = (y0 + 1 >= height) ? y0 : y0 + 1;
|
|
int z0 = clamp((int)z, 0, depth - 1);
|
int z1 = (z0 + 1 >= depth) ? z0 : z0 + 1;
|
|
Color<float> c000 = read(x0, y0, z0);
|
Color<float> c100 = read(x1, y0, z0);
|
Color<float> c010 = read(x0, y1, z0);
|
Color<float> c110 = read(x1, y1, z0);
|
Color<float> c001 = read(x0, y0, z1);
|
Color<float> c101 = read(x1, y0, z1);
|
Color<float> c011 = read(x0, y1, z1);
|
Color<float> c111 = read(x1, y1, z1);
|
|
float fx = x - x0;
|
float fy = y - y0;
|
float fz = z - z0;
|
|
c000 *= (1 - fx) * (1 - fy) * (1 - fz);
|
c100 *= fx * (1 - fy) * (1 - fz);
|
c010 *= (1 - fx) * fy * (1 - fz);
|
c110 *= fx * fy * (1 - fz);
|
c001 *= (1 - fx) * (1 - fy) * fz;
|
c101 *= fx * (1 - fy) * fz;
|
c011 *= (1 - fx) * fy * fz;
|
c111 *= fx * fy * fz;
|
|
return c000 + c100 + c010 + c110 + c001 + c101 + c011 + c111;
|
}
|
|
Color<float> Surface::Buffer::sample(float x, float y, int layer) const
|
{
|
x -= 0.5f;
|
y -= 0.5f;
|
|
int x0 = clamp((int)x, 0, width - 1);
|
int x1 = (x0 + 1 >= width) ? x0 : x0 + 1;
|
|
int y0 = clamp((int)y, 0, height - 1);
|
int y1 = (y0 + 1 >= height) ? y0 : y0 + 1;
|
|
Color<float> c00 = read(x0, y0, layer);
|
Color<float> c10 = read(x1, y0, layer);
|
Color<float> c01 = read(x0, y1, layer);
|
Color<float> c11 = read(x1, y1, layer);
|
|
float fx = x - x0;
|
float fy = y - y0;
|
|
c00 *= (1 - fx) * (1 - fy);
|
c10 *= fx * (1 - fy);
|
c01 *= (1 - fx) * fy;
|
c11 *= fx * fy;
|
|
return c00 + c10 + c01 + c11;
|
}
|
|
void *Surface::Buffer::lockRect(int x, int y, int z, Lock lock)
|
{
|
this->lock = lock;
|
|
switch(lock)
|
{
|
case LOCK_UNLOCKED:
|
case LOCK_READONLY:
|
case LOCK_UPDATE:
|
break;
|
case LOCK_WRITEONLY:
|
case LOCK_READWRITE:
|
case LOCK_DISCARD:
|
dirty = true;
|
break;
|
default:
|
ASSERT(false);
|
}
|
|
if(buffer)
|
{
|
x += border;
|
y += border;
|
|
switch(format)
|
{
|
case FORMAT_DXT1:
|
case FORMAT_ATI1:
|
case FORMAT_ETC1:
|
case FORMAT_R11_EAC:
|
case FORMAT_SIGNED_R11_EAC:
|
case FORMAT_RGB8_ETC2:
|
case FORMAT_SRGB8_ETC2:
|
case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
|
case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
|
return (unsigned char*)buffer + 8 * (x / 4) + (y / 4) * pitchB + z * sliceB;
|
case FORMAT_RG11_EAC:
|
case FORMAT_SIGNED_RG11_EAC:
|
case FORMAT_RGBA8_ETC2_EAC:
|
case FORMAT_SRGB8_ALPHA8_ETC2_EAC:
|
case FORMAT_RGBA_ASTC_4x4_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR:
|
return (unsigned char*)buffer + 16 * (x / 4) + (y / 4) * pitchB + z * sliceB;
|
case FORMAT_RGBA_ASTC_5x4_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR:
|
return (unsigned char*)buffer + 16 * (x / 5) + (y / 4) * pitchB + z * sliceB;
|
case FORMAT_RGBA_ASTC_5x5_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR:
|
return (unsigned char*)buffer + 16 * (x / 5) + (y / 5) * pitchB + z * sliceB;
|
case FORMAT_RGBA_ASTC_6x5_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR:
|
return (unsigned char*)buffer + 16 * (x / 6) + (y / 5) * pitchB + z * sliceB;
|
case FORMAT_RGBA_ASTC_6x6_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR:
|
return (unsigned char*)buffer + 16 * (x / 6) + (y / 6) * pitchB + z * sliceB;
|
case FORMAT_RGBA_ASTC_8x5_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR:
|
return (unsigned char*)buffer + 16 * (x / 8) + (y / 5) * pitchB + z * sliceB;
|
case FORMAT_RGBA_ASTC_8x6_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR:
|
return (unsigned char*)buffer + 16 * (x / 8) + (y / 6) * pitchB + z * sliceB;
|
case FORMAT_RGBA_ASTC_8x8_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR:
|
return (unsigned char*)buffer + 16 * (x / 8) + (y / 8) * pitchB + z * sliceB;
|
case FORMAT_RGBA_ASTC_10x5_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR:
|
return (unsigned char*)buffer + 16 * (x / 10) + (y / 5) * pitchB + z * sliceB;
|
case FORMAT_RGBA_ASTC_10x6_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR:
|
return (unsigned char*)buffer + 16 * (x / 10) + (y / 6) * pitchB + z * sliceB;
|
case FORMAT_RGBA_ASTC_10x8_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR:
|
return (unsigned char*)buffer + 16 * (x / 10) + (y / 8) * pitchB + z * sliceB;
|
case FORMAT_RGBA_ASTC_10x10_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR:
|
return (unsigned char*)buffer + 16 * (x / 10) + (y / 10) * pitchB + z * sliceB;
|
case FORMAT_RGBA_ASTC_12x10_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR:
|
return (unsigned char*)buffer + 16 * (x / 12) + (y / 10) * pitchB + z * sliceB;
|
case FORMAT_RGBA_ASTC_12x12_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR:
|
return (unsigned char*)buffer + 16 * (x / 12) + (y / 12) * pitchB + z * sliceB;
|
case FORMAT_DXT3:
|
case FORMAT_DXT5:
|
case FORMAT_ATI2:
|
return (unsigned char*)buffer + 16 * (x / 4) + (y / 4) * pitchB + z * sliceB;
|
default:
|
return (unsigned char*)buffer + x * bytes + y * pitchB + z * samples * sliceB;
|
}
|
}
|
|
return nullptr;
|
}
|
|
void Surface::Buffer::unlockRect()
|
{
|
lock = LOCK_UNLOCKED;
|
}
|
|
class SurfaceImplementation : public Surface
|
{
|
public:
|
SurfaceImplementation(int width, int height, int depth, Format format, void *pixels, int pitch, int slice)
|
: Surface(width, height, depth, format, pixels, pitch, slice) {}
|
SurfaceImplementation(Resource *texture, int width, int height, int depth, int border, int samples, Format format, bool lockable, bool renderTarget, int pitchP = 0)
|
: Surface(texture, width, height, depth, border, samples, format, lockable, renderTarget, pitchP) {}
|
~SurfaceImplementation() override {}
|
|
void *lockInternal(int x, int y, int z, Lock lock, Accessor client) override
|
{
|
return Surface::lockInternal(x, y, z, lock, client);
|
}
|
|
void unlockInternal() override
|
{
|
Surface::unlockInternal();
|
}
|
};
|
|
Surface *Surface::create(int width, int height, int depth, Format format, void *pixels, int pitch, int slice)
|
{
|
return new SurfaceImplementation(width, height, depth, format, pixels, pitch, slice);
|
}
|
|
Surface *Surface::create(Resource *texture, int width, int height, int depth, int border, int samples, Format format, bool lockable, bool renderTarget, int pitchPprovided)
|
{
|
return new SurfaceImplementation(texture, width, height, depth, border, samples, format, lockable, renderTarget, pitchPprovided);
|
}
|
|
Surface::Surface(int width, int height, int depth, Format format, void *pixels, int pitch, int slice) : lockable(true), renderTarget(false)
|
{
|
resource = new Resource(0);
|
hasParent = false;
|
ownExternal = false;
|
depth = max(1, depth);
|
|
external.buffer = pixels;
|
external.width = width;
|
external.height = height;
|
external.depth = depth;
|
external.samples = 1;
|
external.format = format;
|
external.bytes = bytes(external.format);
|
external.pitchB = pitch;
|
external.pitchP = external.bytes ? pitch / external.bytes : 0;
|
external.sliceB = slice;
|
external.sliceP = external.bytes ? slice / external.bytes : 0;
|
external.border = 0;
|
external.lock = LOCK_UNLOCKED;
|
external.dirty = true;
|
|
internal.buffer = nullptr;
|
internal.width = width;
|
internal.height = height;
|
internal.depth = depth;
|
internal.samples = 1;
|
internal.format = selectInternalFormat(format);
|
internal.bytes = bytes(internal.format);
|
internal.pitchB = pitchB(internal.width, 0, internal.format, false);
|
internal.pitchP = pitchP(internal.width, 0, internal.format, false);
|
internal.sliceB = sliceB(internal.width, internal.height, 0, internal.format, false);
|
internal.sliceP = sliceP(internal.width, internal.height, 0, internal.format, false);
|
internal.border = 0;
|
internal.lock = LOCK_UNLOCKED;
|
internal.dirty = false;
|
|
stencil.buffer = nullptr;
|
stencil.width = width;
|
stencil.height = height;
|
stencil.depth = depth;
|
stencil.samples = 1;
|
stencil.format = isStencil(format) ? FORMAT_S8 : FORMAT_NULL;
|
stencil.bytes = bytes(stencil.format);
|
stencil.pitchB = pitchB(stencil.width, 0, stencil.format, false);
|
stencil.pitchP = pitchP(stencil.width, 0, stencil.format, false);
|
stencil.sliceB = sliceB(stencil.width, stencil.height, 0, stencil.format, false);
|
stencil.sliceP = sliceP(stencil.width, stencil.height, 0, stencil.format, false);
|
stencil.border = 0;
|
stencil.lock = LOCK_UNLOCKED;
|
stencil.dirty = false;
|
|
dirtyContents = true;
|
paletteUsed = 0;
|
}
|
|
Surface::Surface(Resource *texture, int width, int height, int depth, int border, int samples, Format format, bool lockable, bool renderTarget, int pitchPprovided) : lockable(lockable), renderTarget(renderTarget)
|
{
|
resource = texture ? texture : new Resource(0);
|
hasParent = texture != nullptr;
|
ownExternal = true;
|
depth = max(1, depth);
|
samples = max(1, samples);
|
|
external.buffer = nullptr;
|
external.width = width;
|
external.height = height;
|
external.depth = depth;
|
external.samples = (short)samples;
|
external.format = format;
|
external.bytes = bytes(external.format);
|
external.pitchB = !pitchPprovided ? pitchB(external.width, 0, external.format, renderTarget && !texture) : pitchPprovided * external.bytes;
|
external.pitchP = !pitchPprovided ? pitchP(external.width, 0, external.format, renderTarget && !texture) : pitchPprovided;
|
external.sliceB = sliceB(external.width, external.height, 0, external.format, renderTarget && !texture);
|
external.sliceP = sliceP(external.width, external.height, 0, external.format, renderTarget && !texture);
|
external.border = 0;
|
external.lock = LOCK_UNLOCKED;
|
external.dirty = false;
|
|
internal.buffer = nullptr;
|
internal.width = width;
|
internal.height = height;
|
internal.depth = depth;
|
internal.samples = (short)samples;
|
internal.format = selectInternalFormat(format);
|
internal.bytes = bytes(internal.format);
|
internal.pitchB = !pitchPprovided ? pitchB(internal.width, border, internal.format, renderTarget) : pitchPprovided * internal.bytes;
|
internal.pitchP = !pitchPprovided ? pitchP(internal.width, border, internal.format, renderTarget) : pitchPprovided;
|
internal.sliceB = sliceB(internal.width, internal.height, border, internal.format, renderTarget);
|
internal.sliceP = sliceP(internal.width, internal.height, border, internal.format, renderTarget);
|
internal.border = (short)border;
|
internal.lock = LOCK_UNLOCKED;
|
internal.dirty = false;
|
|
stencil.buffer = nullptr;
|
stencil.width = width;
|
stencil.height = height;
|
stencil.depth = depth;
|
stencil.samples = (short)samples;
|
stencil.format = isStencil(format) ? FORMAT_S8 : FORMAT_NULL;
|
stencil.bytes = bytes(stencil.format);
|
stencil.pitchB = pitchB(stencil.width, 0, stencil.format, renderTarget);
|
stencil.pitchP = pitchP(stencil.width, 0, stencil.format, renderTarget);
|
stencil.sliceB = sliceB(stencil.width, stencil.height, 0, stencil.format, renderTarget);
|
stencil.sliceP = sliceP(stencil.width, stencil.height, 0, stencil.format, renderTarget);
|
stencil.border = 0;
|
stencil.lock = LOCK_UNLOCKED;
|
stencil.dirty = false;
|
|
dirtyContents = true;
|
paletteUsed = 0;
|
}
|
|
Surface::~Surface()
|
{
|
// sync() must be called before this destructor to ensure all locks have been released.
|
// We can't call it here because the parent resource may already have been destroyed.
|
ASSERT(isUnlocked());
|
|
if(!hasParent)
|
{
|
resource->destruct();
|
}
|
|
if(ownExternal)
|
{
|
deallocate(external.buffer);
|
}
|
|
if(internal.buffer != external.buffer)
|
{
|
deallocate(internal.buffer);
|
}
|
|
deallocate(stencil.buffer);
|
|
external.buffer = nullptr;
|
internal.buffer = nullptr;
|
stencil.buffer = nullptr;
|
}
|
|
void *Surface::lockExternal(int x, int y, int z, Lock lock, Accessor client)
|
{
|
resource->lock(client);
|
|
if(!external.buffer)
|
{
|
if(internal.buffer && identicalBuffers())
|
{
|
external.buffer = internal.buffer;
|
}
|
else
|
{
|
external.buffer = allocateBuffer(external.width, external.height, external.depth, external.border, external.samples, external.format);
|
}
|
}
|
|
if(internal.dirty)
|
{
|
if(lock != LOCK_DISCARD)
|
{
|
update(external, internal);
|
}
|
|
internal.dirty = false;
|
}
|
|
switch(lock)
|
{
|
case LOCK_READONLY:
|
break;
|
case LOCK_WRITEONLY:
|
case LOCK_READWRITE:
|
case LOCK_DISCARD:
|
dirtyContents = true;
|
break;
|
default:
|
ASSERT(false);
|
}
|
|
return external.lockRect(x, y, z, lock);
|
}
|
|
void Surface::unlockExternal()
|
{
|
external.unlockRect();
|
|
resource->unlock();
|
}
|
|
void *Surface::lockInternal(int x, int y, int z, Lock lock, Accessor client)
|
{
|
if(lock != LOCK_UNLOCKED)
|
{
|
resource->lock(client);
|
}
|
|
if(!internal.buffer)
|
{
|
if(external.buffer && identicalBuffers())
|
{
|
internal.buffer = external.buffer;
|
}
|
else
|
{
|
internal.buffer = allocateBuffer(internal.width, internal.height, internal.depth, internal.border, internal.samples, internal.format);
|
}
|
}
|
|
// FIXME: WHQL requires conversion to lower external precision and back
|
if(logPrecision >= WHQL)
|
{
|
if(internal.dirty && renderTarget && internal.format != external.format)
|
{
|
if(lock != LOCK_DISCARD)
|
{
|
switch(external.format)
|
{
|
case FORMAT_R3G3B2:
|
case FORMAT_A8R3G3B2:
|
case FORMAT_A1R5G5B5:
|
case FORMAT_A2R10G10B10:
|
case FORMAT_A2B10G10R10:
|
lockExternal(0, 0, 0, LOCK_READWRITE, client);
|
unlockExternal();
|
break;
|
default:
|
// Difference passes WHQL
|
break;
|
}
|
}
|
}
|
}
|
|
if(external.dirty || (isPalette(external.format) && paletteUsed != Surface::paletteID))
|
{
|
if(lock != LOCK_DISCARD)
|
{
|
update(internal, external);
|
}
|
|
external.dirty = false;
|
paletteUsed = Surface::paletteID;
|
}
|
|
switch(lock)
|
{
|
case LOCK_UNLOCKED:
|
case LOCK_READONLY:
|
break;
|
case LOCK_WRITEONLY:
|
case LOCK_READWRITE:
|
case LOCK_DISCARD:
|
dirtyContents = true;
|
break;
|
default:
|
ASSERT(false);
|
}
|
|
if(lock == LOCK_READONLY && client == PUBLIC)
|
{
|
resolve();
|
}
|
|
return internal.lockRect(x, y, z, lock);
|
}
|
|
void Surface::unlockInternal()
|
{
|
internal.unlockRect();
|
|
resource->unlock();
|
}
|
|
void *Surface::lockStencil(int x, int y, int front, Accessor client)
|
{
|
resource->lock(client);
|
|
if(stencil.format == FORMAT_NULL)
|
{
|
return nullptr;
|
}
|
|
if(!stencil.buffer)
|
{
|
stencil.buffer = allocateBuffer(stencil.width, stencil.height, stencil.depth, stencil.border, stencil.samples, stencil.format);
|
}
|
|
return stencil.lockRect(x, y, front, LOCK_READWRITE); // FIXME
|
}
|
|
void Surface::unlockStencil()
|
{
|
stencil.unlockRect();
|
|
resource->unlock();
|
}
|
|
int Surface::bytes(Format format)
|
{
|
switch(format)
|
{
|
case FORMAT_NULL: return 0;
|
case FORMAT_P8: return 1;
|
case FORMAT_A8P8: return 2;
|
case FORMAT_A8: return 1;
|
case FORMAT_R8I: return 1;
|
case FORMAT_R8: return 1;
|
case FORMAT_R3G3B2: return 1;
|
case FORMAT_R16I: return 2;
|
case FORMAT_R16UI: return 2;
|
case FORMAT_A8R3G3B2: return 2;
|
case FORMAT_R5G6B5: return 2;
|
case FORMAT_A1R5G5B5: return 2;
|
case FORMAT_X1R5G5B5: return 2;
|
case FORMAT_R5G5B5A1: return 2;
|
case FORMAT_X4R4G4B4: return 2;
|
case FORMAT_A4R4G4B4: return 2;
|
case FORMAT_R4G4B4A4: return 2;
|
case FORMAT_R8G8B8: return 3;
|
case FORMAT_B8G8R8: return 3;
|
case FORMAT_R32I: return 4;
|
case FORMAT_R32UI: return 4;
|
case FORMAT_X8R8G8B8: return 4;
|
// case FORMAT_X8G8R8B8Q: return 4;
|
case FORMAT_A8R8G8B8: return 4;
|
// case FORMAT_A8G8R8B8Q: return 4;
|
case FORMAT_X8B8G8R8I: return 4;
|
case FORMAT_X8B8G8R8: return 4;
|
case FORMAT_SRGB8_X8: return 4;
|
case FORMAT_SRGB8_A8: return 4;
|
case FORMAT_A8B8G8R8I: return 4;
|
case FORMAT_R8UI: return 1;
|
case FORMAT_G8R8UI: return 2;
|
case FORMAT_X8B8G8R8UI: return 4;
|
case FORMAT_A8B8G8R8UI: return 4;
|
case FORMAT_A8B8G8R8: return 4;
|
case FORMAT_R8_SNORM: return 1;
|
case FORMAT_G8R8_SNORM: return 2;
|
case FORMAT_X8B8G8R8_SNORM: return 4;
|
case FORMAT_A8B8G8R8_SNORM: return 4;
|
case FORMAT_A2R10G10B10: return 4;
|
case FORMAT_A2B10G10R10: return 4;
|
case FORMAT_A2B10G10R10UI: return 4;
|
case FORMAT_G8R8I: return 2;
|
case FORMAT_G8R8: return 2;
|
case FORMAT_G16R16I: return 4;
|
case FORMAT_G16R16UI: return 4;
|
case FORMAT_G16R16: return 4;
|
case FORMAT_G32R32I: return 8;
|
case FORMAT_G32R32UI: return 8;
|
case FORMAT_X16B16G16R16I: return 8;
|
case FORMAT_X16B16G16R16UI: return 8;
|
case FORMAT_A16B16G16R16I: return 8;
|
case FORMAT_A16B16G16R16UI: return 8;
|
case FORMAT_A16B16G16R16: return 8;
|
case FORMAT_X32B32G32R32I: return 16;
|
case FORMAT_X32B32G32R32UI: return 16;
|
case FORMAT_A32B32G32R32I: return 16;
|
case FORMAT_A32B32G32R32UI: return 16;
|
// Compressed formats
|
case FORMAT_DXT1: return 2; // Column of four pixels
|
case FORMAT_DXT3: return 4; // Column of four pixels
|
case FORMAT_DXT5: return 4; // Column of four pixels
|
case FORMAT_ATI1: return 2; // Column of four pixels
|
case FORMAT_ATI2: return 4; // Column of four pixels
|
case FORMAT_ETC1: return 2; // Column of four pixels
|
case FORMAT_R11_EAC: return 2;
|
case FORMAT_SIGNED_R11_EAC: return 2;
|
case FORMAT_RG11_EAC: return 4;
|
case FORMAT_SIGNED_RG11_EAC: return 4;
|
case FORMAT_RGB8_ETC2: return 2;
|
case FORMAT_SRGB8_ETC2: return 2;
|
case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: return 2;
|
case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: return 2;
|
case FORMAT_RGBA8_ETC2_EAC: return 4;
|
case FORMAT_SRGB8_ALPHA8_ETC2_EAC: return 4;
|
case FORMAT_RGBA_ASTC_4x4_KHR:
|
case FORMAT_RGBA_ASTC_5x4_KHR:
|
case FORMAT_RGBA_ASTC_5x5_KHR:
|
case FORMAT_RGBA_ASTC_6x5_KHR:
|
case FORMAT_RGBA_ASTC_6x6_KHR:
|
case FORMAT_RGBA_ASTC_8x5_KHR:
|
case FORMAT_RGBA_ASTC_8x6_KHR:
|
case FORMAT_RGBA_ASTC_8x8_KHR:
|
case FORMAT_RGBA_ASTC_10x5_KHR:
|
case FORMAT_RGBA_ASTC_10x6_KHR:
|
case FORMAT_RGBA_ASTC_10x8_KHR:
|
case FORMAT_RGBA_ASTC_10x10_KHR:
|
case FORMAT_RGBA_ASTC_12x10_KHR:
|
case FORMAT_RGBA_ASTC_12x12_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: return 0; // FIXME
|
// Bumpmap formats
|
case FORMAT_V8U8: return 2;
|
case FORMAT_L6V5U5: return 2;
|
case FORMAT_Q8W8V8U8: return 4;
|
case FORMAT_X8L8V8U8: return 4;
|
case FORMAT_A2W10V10U10: return 4;
|
case FORMAT_V16U16: return 4;
|
case FORMAT_A16W16V16U16: return 8;
|
case FORMAT_Q16W16V16U16: return 8;
|
// Luminance formats
|
case FORMAT_L8: return 1;
|
case FORMAT_A4L4: return 1;
|
case FORMAT_L16: return 2;
|
case FORMAT_A8L8: return 2;
|
case FORMAT_L16F: return 2;
|
case FORMAT_A16L16F: return 4;
|
case FORMAT_L32F: return 4;
|
case FORMAT_A32L32F: return 8;
|
// Floating-point formats
|
case FORMAT_A16F: return 2;
|
case FORMAT_R16F: return 2;
|
case FORMAT_G16R16F: return 4;
|
case FORMAT_B16G16R16F: return 6;
|
case FORMAT_X16B16G16R16F: return 8;
|
case FORMAT_A16B16G16R16F: return 8;
|
case FORMAT_X16B16G16R16F_UNSIGNED: return 8;
|
case FORMAT_A32F: return 4;
|
case FORMAT_R32F: return 4;
|
case FORMAT_G32R32F: return 8;
|
case FORMAT_B32G32R32F: return 12;
|
case FORMAT_X32B32G32R32F: return 16;
|
case FORMAT_A32B32G32R32F: return 16;
|
case FORMAT_X32B32G32R32F_UNSIGNED: return 16;
|
// Depth/stencil formats
|
case FORMAT_D16: return 2;
|
case FORMAT_D32: return 4;
|
case FORMAT_D24X8: return 4;
|
case FORMAT_D24S8: return 4;
|
case FORMAT_D24FS8: return 4;
|
case FORMAT_D32F: return 4;
|
case FORMAT_D32FS8: return 4;
|
case FORMAT_D32F_COMPLEMENTARY: return 4;
|
case FORMAT_D32FS8_COMPLEMENTARY: return 4;
|
case FORMAT_D32F_LOCKABLE: return 4;
|
case FORMAT_D32FS8_TEXTURE: return 4;
|
case FORMAT_D32F_SHADOW: return 4;
|
case FORMAT_D32FS8_SHADOW: return 4;
|
case FORMAT_DF24S8: return 4;
|
case FORMAT_DF16S8: return 2;
|
case FORMAT_INTZ: return 4;
|
case FORMAT_S8: return 1;
|
case FORMAT_YV12_BT601: return 1; // Y plane only
|
case FORMAT_YV12_BT709: return 1; // Y plane only
|
case FORMAT_YV12_JFIF: return 1; // Y plane only
|
default:
|
ASSERT(false);
|
}
|
|
return 0;
|
}
|
|
int Surface::pitchB(int width, int border, Format format, bool target)
|
{
|
width += 2 * border;
|
|
// Render targets require 2x2 quads
|
if(target || isDepth(format) || isStencil(format))
|
{
|
width = align<2>(width);
|
}
|
|
switch(format)
|
{
|
case FORMAT_DXT1:
|
case FORMAT_ETC1:
|
case FORMAT_R11_EAC:
|
case FORMAT_SIGNED_R11_EAC:
|
case FORMAT_RGB8_ETC2:
|
case FORMAT_SRGB8_ETC2:
|
case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
|
case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
|
return 8 * ((width + 3) / 4); // 64 bit per 4x4 block, computed per 4 rows
|
case FORMAT_RG11_EAC:
|
case FORMAT_SIGNED_RG11_EAC:
|
case FORMAT_RGBA8_ETC2_EAC:
|
case FORMAT_SRGB8_ALPHA8_ETC2_EAC:
|
case FORMAT_RGBA_ASTC_4x4_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR:
|
return 16 * ((width + 3) / 4); // 128 bit per 4x4 block, computed per 4 rows
|
case FORMAT_RGBA_ASTC_5x4_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR:
|
case FORMAT_RGBA_ASTC_5x5_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR:
|
return 16 * ((width + 4) / 5);
|
case FORMAT_RGBA_ASTC_6x5_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR:
|
case FORMAT_RGBA_ASTC_6x6_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR:
|
return 16 * ((width + 5) / 6);
|
case FORMAT_RGBA_ASTC_8x5_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR:
|
case FORMAT_RGBA_ASTC_8x6_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR:
|
case FORMAT_RGBA_ASTC_8x8_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR:
|
return 16 * ((width + 7) / 8);
|
case FORMAT_RGBA_ASTC_10x5_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR:
|
case FORMAT_RGBA_ASTC_10x6_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR:
|
case FORMAT_RGBA_ASTC_10x8_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR:
|
case FORMAT_RGBA_ASTC_10x10_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR:
|
return 16 * ((width + 9) / 10);
|
case FORMAT_RGBA_ASTC_12x10_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR:
|
case FORMAT_RGBA_ASTC_12x12_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR:
|
return 16 * ((width + 11) / 12);
|
case FORMAT_DXT3:
|
case FORMAT_DXT5:
|
return 16 * ((width + 3) / 4); // 128 bit per 4x4 block, computed per 4 rows
|
case FORMAT_ATI1:
|
return 2 * ((width + 3) / 4); // 64 bit per 4x4 block, computed per row
|
case FORMAT_ATI2:
|
return 4 * ((width + 3) / 4); // 128 bit per 4x4 block, computed per row
|
case FORMAT_YV12_BT601:
|
case FORMAT_YV12_BT709:
|
case FORMAT_YV12_JFIF:
|
return align<16>(width);
|
default:
|
return bytes(format) * width;
|
}
|
}
|
|
int Surface::pitchP(int width, int border, Format format, bool target)
|
{
|
int B = bytes(format);
|
|
return B > 0 ? pitchB(width, border, format, target) / B : 0;
|
}
|
|
int Surface::sliceB(int width, int height, int border, Format format, bool target)
|
{
|
height += 2 * border;
|
|
// Render targets require 2x2 quads
|
if(target || isDepth(format) || isStencil(format))
|
{
|
height = align<2>(height);
|
}
|
|
switch(format)
|
{
|
case FORMAT_DXT1:
|
case FORMAT_DXT3:
|
case FORMAT_DXT5:
|
case FORMAT_ETC1:
|
case FORMAT_R11_EAC:
|
case FORMAT_SIGNED_R11_EAC:
|
case FORMAT_RG11_EAC:
|
case FORMAT_SIGNED_RG11_EAC:
|
case FORMAT_RGB8_ETC2:
|
case FORMAT_SRGB8_ETC2:
|
case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
|
case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
|
case FORMAT_RGBA8_ETC2_EAC:
|
case FORMAT_SRGB8_ALPHA8_ETC2_EAC:
|
case FORMAT_RGBA_ASTC_4x4_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR:
|
case FORMAT_RGBA_ASTC_5x4_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR:
|
return pitchB(width, border, format, target) * ((height + 3) / 4); // Pitch computed per 4 rows
|
case FORMAT_RGBA_ASTC_5x5_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR:
|
case FORMAT_RGBA_ASTC_6x5_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR:
|
case FORMAT_RGBA_ASTC_8x5_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR:
|
case FORMAT_RGBA_ASTC_10x5_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR:
|
return pitchB(width, border, format, target) * ((height + 4) / 5); // Pitch computed per 5 rows
|
case FORMAT_RGBA_ASTC_6x6_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR:
|
case FORMAT_RGBA_ASTC_8x6_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR:
|
case FORMAT_RGBA_ASTC_10x6_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR:
|
return pitchB(width, border, format, target) * ((height + 5) / 6); // Pitch computed per 6 rows
|
case FORMAT_RGBA_ASTC_8x8_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR:
|
case FORMAT_RGBA_ASTC_10x8_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR:
|
return pitchB(width, border, format, target) * ((height + 7) / 8); // Pitch computed per 8 rows
|
case FORMAT_RGBA_ASTC_10x10_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR:
|
case FORMAT_RGBA_ASTC_12x10_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR:
|
return pitchB(width, border, format, target) * ((height + 9) / 10); // Pitch computed per 10 rows
|
case FORMAT_RGBA_ASTC_12x12_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR:
|
return pitchB(width, border, format, target) * ((height + 11) / 12); // Pitch computed per 12 rows
|
case FORMAT_ATI1:
|
case FORMAT_ATI2:
|
return pitchB(width, border, format, target) * align<4>(height); // Pitch computed per row
|
default:
|
return pitchB(width, border, format, target) * height; // Pitch computed per row
|
}
|
}
|
|
int Surface::sliceP(int width, int height, int border, Format format, bool target)
|
{
|
int B = bytes(format);
|
|
return B > 0 ? sliceB(width, height, border, format, target) / B : 0;
|
}
|
|
void Surface::update(Buffer &destination, Buffer &source)
|
{
|
// ASSERT(source.lock != LOCK_UNLOCKED);
|
// ASSERT(destination.lock != LOCK_UNLOCKED);
|
|
if(destination.buffer != source.buffer)
|
{
|
ASSERT(source.dirty && !destination.dirty);
|
|
switch(source.format)
|
{
|
case FORMAT_R8G8B8: decodeR8G8B8(destination, source); break; // FIXME: Check destination format
|
case FORMAT_X1R5G5B5: decodeX1R5G5B5(destination, source); break; // FIXME: Check destination format
|
case FORMAT_A1R5G5B5: decodeA1R5G5B5(destination, source); break; // FIXME: Check destination format
|
case FORMAT_X4R4G4B4: decodeX4R4G4B4(destination, source); break; // FIXME: Check destination format
|
case FORMAT_A4R4G4B4: decodeA4R4G4B4(destination, source); break; // FIXME: Check destination format
|
case FORMAT_P8: decodeP8(destination, source); break; // FIXME: Check destination format
|
case FORMAT_DXT1: decodeDXT1(destination, source); break; // FIXME: Check destination format
|
case FORMAT_DXT3: decodeDXT3(destination, source); break; // FIXME: Check destination format
|
case FORMAT_DXT5: decodeDXT5(destination, source); break; // FIXME: Check destination format
|
case FORMAT_ATI1: decodeATI1(destination, source); break; // FIXME: Check destination format
|
case FORMAT_ATI2: decodeATI2(destination, source); break; // FIXME: Check destination format
|
case FORMAT_R11_EAC: decodeEAC(destination, source, 1, false); break; // FIXME: Check destination format
|
case FORMAT_SIGNED_R11_EAC: decodeEAC(destination, source, 1, true); break; // FIXME: Check destination format
|
case FORMAT_RG11_EAC: decodeEAC(destination, source, 2, false); break; // FIXME: Check destination format
|
case FORMAT_SIGNED_RG11_EAC: decodeEAC(destination, source, 2, true); break; // FIXME: Check destination format
|
case FORMAT_ETC1:
|
case FORMAT_RGB8_ETC2: decodeETC2(destination, source, 0, false); break; // FIXME: Check destination format
|
case FORMAT_SRGB8_ETC2: decodeETC2(destination, source, 0, true); break; // FIXME: Check destination format
|
case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: decodeETC2(destination, source, 1, false); break; // FIXME: Check destination format
|
case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: decodeETC2(destination, source, 1, true); break; // FIXME: Check destination format
|
case FORMAT_RGBA8_ETC2_EAC: decodeETC2(destination, source, 8, false); break; // FIXME: Check destination format
|
case FORMAT_SRGB8_ALPHA8_ETC2_EAC: decodeETC2(destination, source, 8, true); break; // FIXME: Check destination format
|
case FORMAT_RGBA_ASTC_4x4_KHR: decodeASTC(destination, source, 4, 4, 1, false); break; // FIXME: Check destination format
|
case FORMAT_RGBA_ASTC_5x4_KHR: decodeASTC(destination, source, 5, 4, 1, false); break; // FIXME: Check destination format
|
case FORMAT_RGBA_ASTC_5x5_KHR: decodeASTC(destination, source, 5, 5, 1, false); break; // FIXME: Check destination format
|
case FORMAT_RGBA_ASTC_6x5_KHR: decodeASTC(destination, source, 6, 5, 1, false); break; // FIXME: Check destination format
|
case FORMAT_RGBA_ASTC_6x6_KHR: decodeASTC(destination, source, 6, 6, 1, false); break; // FIXME: Check destination format
|
case FORMAT_RGBA_ASTC_8x5_KHR: decodeASTC(destination, source, 8, 5, 1, false); break; // FIXME: Check destination format
|
case FORMAT_RGBA_ASTC_8x6_KHR: decodeASTC(destination, source, 8, 6, 1, false); break; // FIXME: Check destination format
|
case FORMAT_RGBA_ASTC_8x8_KHR: decodeASTC(destination, source, 8, 8, 1, false); break; // FIXME: Check destination format
|
case FORMAT_RGBA_ASTC_10x5_KHR: decodeASTC(destination, source, 10, 5, 1, false); break; // FIXME: Check destination format
|
case FORMAT_RGBA_ASTC_10x6_KHR: decodeASTC(destination, source, 10, 6, 1, false); break; // FIXME: Check destination format
|
case FORMAT_RGBA_ASTC_10x8_KHR: decodeASTC(destination, source, 10, 8, 1, false); break; // FIXME: Check destination format
|
case FORMAT_RGBA_ASTC_10x10_KHR: decodeASTC(destination, source, 10, 10, 1, false); break; // FIXME: Check destination format
|
case FORMAT_RGBA_ASTC_12x10_KHR: decodeASTC(destination, source, 12, 10, 1, false); break; // FIXME: Check destination format
|
case FORMAT_RGBA_ASTC_12x12_KHR: decodeASTC(destination, source, 12, 12, 1, false); break; // FIXME: Check destination format
|
case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR: decodeASTC(destination, source, 4, 4, 1, true); break; // FIXME: Check destination format
|
case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR: decodeASTC(destination, source, 5, 4, 1, true); break; // FIXME: Check destination format
|
case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR: decodeASTC(destination, source, 5, 5, 1, true); break; // FIXME: Check destination format
|
case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR: decodeASTC(destination, source, 6, 5, 1, true); break; // FIXME: Check destination format
|
case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR: decodeASTC(destination, source, 6, 6, 1, true); break; // FIXME: Check destination format
|
case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR: decodeASTC(destination, source, 8, 5, 1, true); break; // FIXME: Check destination format
|
case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR: decodeASTC(destination, source, 8, 6, 1, true); break; // FIXME: Check destination format
|
case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR: decodeASTC(destination, source, 8, 8, 1, true); break; // FIXME: Check destination format
|
case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR: decodeASTC(destination, source, 10, 5, 1, true); break; // FIXME: Check destination format
|
case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR: decodeASTC(destination, source, 10, 6, 1, true); break; // FIXME: Check destination format
|
case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR: decodeASTC(destination, source, 10, 8, 1, true); break; // FIXME: Check destination format
|
case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR: decodeASTC(destination, source, 10, 10, 1, true); break; // FIXME: Check destination format
|
case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR: decodeASTC(destination, source, 12, 10, 1, true); break; // FIXME: Check destination format
|
case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: decodeASTC(destination, source, 12, 12, 1, true); break; // FIXME: Check destination format
|
default: genericUpdate(destination, source); break;
|
}
|
}
|
}
|
|
void Surface::genericUpdate(Buffer &destination, Buffer &source)
|
{
|
unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY);
|
unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_UPDATE);
|
|
int depth = min(destination.depth, source.depth);
|
int height = min(destination.height, source.height);
|
int width = min(destination.width, source.width);
|
int rowBytes = width * source.bytes;
|
|
for(int z = 0; z < depth; z++)
|
{
|
unsigned char *sourceRow = sourceSlice;
|
unsigned char *destinationRow = destinationSlice;
|
|
for(int y = 0; y < height; y++)
|
{
|
if(source.format == destination.format)
|
{
|
memcpy(destinationRow, sourceRow, rowBytes);
|
}
|
else
|
{
|
unsigned char *sourceElement = sourceRow;
|
unsigned char *destinationElement = destinationRow;
|
|
for(int x = 0; x < width; x++)
|
{
|
Color<float> color = source.read(sourceElement);
|
destination.write(destinationElement, color);
|
|
sourceElement += source.bytes;
|
destinationElement += destination.bytes;
|
}
|
}
|
|
sourceRow += source.pitchB;
|
destinationRow += destination.pitchB;
|
}
|
|
sourceSlice += source.sliceB;
|
destinationSlice += destination.sliceB;
|
}
|
|
source.unlockRect();
|
destination.unlockRect();
|
}
|
|
void Surface::decodeR8G8B8(Buffer &destination, Buffer &source)
|
{
|
unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY);
|
unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_UPDATE);
|
|
int depth = min(destination.depth, source.depth);
|
int height = min(destination.height, source.height);
|
int width = min(destination.width, source.width);
|
|
for(int z = 0; z < depth; z++)
|
{
|
unsigned char *sourceRow = sourceSlice;
|
unsigned char *destinationRow = destinationSlice;
|
|
for(int y = 0; y < height; y++)
|
{
|
unsigned char *sourceElement = sourceRow;
|
unsigned char *destinationElement = destinationRow;
|
|
for(int x = 0; x < width; x++)
|
{
|
unsigned int b = sourceElement[0];
|
unsigned int g = sourceElement[1];
|
unsigned int r = sourceElement[2];
|
|
*(unsigned int*)destinationElement = 0xFF000000 | (r << 16) | (g << 8) | (b << 0);
|
|
sourceElement += source.bytes;
|
destinationElement += destination.bytes;
|
}
|
|
sourceRow += source.pitchB;
|
destinationRow += destination.pitchB;
|
}
|
|
sourceSlice += source.sliceB;
|
destinationSlice += destination.sliceB;
|
}
|
|
source.unlockRect();
|
destination.unlockRect();
|
}
|
|
void Surface::decodeX1R5G5B5(Buffer &destination, Buffer &source)
|
{
|
unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY);
|
unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_UPDATE);
|
|
int depth = min(destination.depth, source.depth);
|
int height = min(destination.height, source.height);
|
int width = min(destination.width, source.width);
|
|
for(int z = 0; z < depth; z++)
|
{
|
unsigned char *sourceRow = sourceSlice;
|
unsigned char *destinationRow = destinationSlice;
|
|
for(int y = 0; y < height; y++)
|
{
|
unsigned char *sourceElement = sourceRow;
|
unsigned char *destinationElement = destinationRow;
|
|
for(int x = 0; x < width; x++)
|
{
|
unsigned int xrgb = *(unsigned short*)sourceElement;
|
|
unsigned int r = (((xrgb & 0x7C00) * 134771 + 0x800000) >> 8) & 0x00FF0000;
|
unsigned int g = (((xrgb & 0x03E0) * 16846 + 0x8000) >> 8) & 0x0000FF00;
|
unsigned int b = (((xrgb & 0x001F) * 2106 + 0x80) >> 8);
|
|
*(unsigned int*)destinationElement = 0xFF000000 | r | g | b;
|
|
sourceElement += source.bytes;
|
destinationElement += destination.bytes;
|
}
|
|
sourceRow += source.pitchB;
|
destinationRow += destination.pitchB;
|
}
|
|
sourceSlice += source.sliceB;
|
destinationSlice += destination.sliceB;
|
}
|
|
source.unlockRect();
|
destination.unlockRect();
|
}
|
|
void Surface::decodeA1R5G5B5(Buffer &destination, Buffer &source)
|
{
|
unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY);
|
unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_UPDATE);
|
|
int depth = min(destination.depth, source.depth);
|
int height = min(destination.height, source.height);
|
int width = min(destination.width, source.width);
|
|
for(int z = 0; z < depth; z++)
|
{
|
unsigned char *sourceRow = sourceSlice;
|
unsigned char *destinationRow = destinationSlice;
|
|
for(int y = 0; y < height; y++)
|
{
|
unsigned char *sourceElement = sourceRow;
|
unsigned char *destinationElement = destinationRow;
|
|
for(int x = 0; x < width; x++)
|
{
|
unsigned int argb = *(unsigned short*)sourceElement;
|
|
unsigned int a = (argb & 0x8000) * 130560;
|
unsigned int r = (((argb & 0x7C00) * 134771 + 0x800000) >> 8) & 0x00FF0000;
|
unsigned int g = (((argb & 0x03E0) * 16846 + 0x8000) >> 8) & 0x0000FF00;
|
unsigned int b = (((argb & 0x001F) * 2106 + 0x80) >> 8);
|
|
*(unsigned int*)destinationElement = a | r | g | b;
|
|
sourceElement += source.bytes;
|
destinationElement += destination.bytes;
|
}
|
|
sourceRow += source.pitchB;
|
destinationRow += destination.pitchB;
|
}
|
|
sourceSlice += source.sliceB;
|
destinationSlice += destination.sliceB;
|
}
|
|
source.unlockRect();
|
destination.unlockRect();
|
}
|
|
void Surface::decodeX4R4G4B4(Buffer &destination, Buffer &source)
|
{
|
unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY);
|
unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_UPDATE);
|
|
int depth = min(destination.depth, source.depth);
|
int height = min(destination.height, source.height);
|
int width = min(destination.width, source.width);
|
|
for(int z = 0; z < depth; z++)
|
{
|
unsigned char *sourceRow = sourceSlice;
|
unsigned char *destinationRow = destinationSlice;
|
|
for(int y = 0; y < height; y++)
|
{
|
unsigned char *sourceElement = sourceRow;
|
unsigned char *destinationElement = destinationRow;
|
|
for(int x = 0; x < width; x++)
|
{
|
unsigned int xrgb = *(unsigned short*)sourceElement;
|
|
unsigned int r = ((xrgb & 0x0F00) * 0x00001100) & 0x00FF0000;
|
unsigned int g = ((xrgb & 0x00F0) * 0x00000110) & 0x0000FF00;
|
unsigned int b = (xrgb & 0x000F) * 0x00000011;
|
|
*(unsigned int*)destinationElement = 0xFF000000 | r | g | b;
|
|
sourceElement += source.bytes;
|
destinationElement += destination.bytes;
|
}
|
|
sourceRow += source.pitchB;
|
destinationRow += destination.pitchB;
|
}
|
|
sourceSlice += source.sliceB;
|
destinationSlice += destination.sliceB;
|
}
|
|
source.unlockRect();
|
destination.unlockRect();
|
}
|
|
void Surface::decodeA4R4G4B4(Buffer &destination, Buffer &source)
|
{
|
unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY);
|
unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_UPDATE);
|
|
int depth = min(destination.depth, source.depth);
|
int height = min(destination.height, source.height);
|
int width = min(destination.width, source.width);
|
|
for(int z = 0; z < depth; z++)
|
{
|
unsigned char *sourceRow = sourceSlice;
|
unsigned char *destinationRow = destinationSlice;
|
|
for(int y = 0; y < height; y++)
|
{
|
unsigned char *sourceElement = sourceRow;
|
unsigned char *destinationElement = destinationRow;
|
|
for(int x = 0; x < width; x++)
|
{
|
unsigned int argb = *(unsigned short*)sourceElement;
|
|
unsigned int a = ((argb & 0xF000) * 0x00011000) & 0xFF000000;
|
unsigned int r = ((argb & 0x0F00) * 0x00001100) & 0x00FF0000;
|
unsigned int g = ((argb & 0x00F0) * 0x00000110) & 0x0000FF00;
|
unsigned int b = (argb & 0x000F) * 0x00000011;
|
|
*(unsigned int*)destinationElement = a | r | g | b;
|
|
sourceElement += source.bytes;
|
destinationElement += destination.bytes;
|
}
|
|
sourceRow += source.pitchB;
|
destinationRow += destination.pitchB;
|
}
|
|
sourceSlice += source.sliceB;
|
destinationSlice += destination.sliceB;
|
}
|
|
source.unlockRect();
|
destination.unlockRect();
|
}
|
|
void Surface::decodeP8(Buffer &destination, Buffer &source)
|
{
|
unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY);
|
unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_UPDATE);
|
|
int depth = min(destination.depth, source.depth);
|
int height = min(destination.height, source.height);
|
int width = min(destination.width, source.width);
|
|
for(int z = 0; z < depth; z++)
|
{
|
unsigned char *sourceRow = sourceSlice;
|
unsigned char *destinationRow = destinationSlice;
|
|
for(int y = 0; y < height; y++)
|
{
|
unsigned char *sourceElement = sourceRow;
|
unsigned char *destinationElement = destinationRow;
|
|
for(int x = 0; x < width; x++)
|
{
|
unsigned int abgr = palette[*(unsigned char*)sourceElement];
|
|
unsigned int r = (abgr & 0x000000FF) << 16;
|
unsigned int g = (abgr & 0x0000FF00) << 0;
|
unsigned int b = (abgr & 0x00FF0000) >> 16;
|
unsigned int a = (abgr & 0xFF000000) >> 0;
|
|
*(unsigned int*)destinationElement = a | r | g | b;
|
|
sourceElement += source.bytes;
|
destinationElement += destination.bytes;
|
}
|
|
sourceRow += source.pitchB;
|
destinationRow += destination.pitchB;
|
}
|
|
sourceSlice += source.sliceB;
|
destinationSlice += destination.sliceB;
|
}
|
|
source.unlockRect();
|
destination.unlockRect();
|
}
|
|
void Surface::decodeDXT1(Buffer &internal, Buffer &external)
|
{
|
unsigned int *destSlice = (unsigned int*)internal.lockRect(0, 0, 0, LOCK_UPDATE);
|
const DXT1 *source = (const DXT1*)external.lockRect(0, 0, 0, LOCK_READONLY);
|
|
for(int z = 0; z < external.depth; z++)
|
{
|
unsigned int *dest = destSlice;
|
|
for(int y = 0; y < external.height; y += 4)
|
{
|
for(int x = 0; x < external.width; x += 4)
|
{
|
Color<byte> c[4];
|
|
c[0] = source->c0;
|
c[1] = source->c1;
|
|
if(source->c0 > source->c1) // No transparency
|
{
|
// c2 = 2 / 3 * c0 + 1 / 3 * c1
|
c[2].r = (byte)((2 * (word)c[0].r + (word)c[1].r + 1) / 3);
|
c[2].g = (byte)((2 * (word)c[0].g + (word)c[1].g + 1) / 3);
|
c[2].b = (byte)((2 * (word)c[0].b + (word)c[1].b + 1) / 3);
|
c[2].a = 0xFF;
|
|
// c3 = 1 / 3 * c0 + 2 / 3 * c1
|
c[3].r = (byte)(((word)c[0].r + 2 * (word)c[1].r + 1) / 3);
|
c[3].g = (byte)(((word)c[0].g + 2 * (word)c[1].g + 1) / 3);
|
c[3].b = (byte)(((word)c[0].b + 2 * (word)c[1].b + 1) / 3);
|
c[3].a = 0xFF;
|
}
|
else // c3 transparent
|
{
|
// c2 = 1 / 2 * c0 + 1 / 2 * c1
|
c[2].r = (byte)(((word)c[0].r + (word)c[1].r) / 2);
|
c[2].g = (byte)(((word)c[0].g + (word)c[1].g) / 2);
|
c[2].b = (byte)(((word)c[0].b + (word)c[1].b) / 2);
|
c[2].a = 0xFF;
|
|
c[3].r = 0;
|
c[3].g = 0;
|
c[3].b = 0;
|
c[3].a = 0;
|
}
|
|
for(int j = 0; j < 4 && (y + j) < internal.height; j++)
|
{
|
for(int i = 0; i < 4 && (x + i) < internal.width; i++)
|
{
|
dest[(x + i) + (y + j) * internal.pitchP] = c[(unsigned int)(source->lut >> 2 * (i + j * 4)) % 4];
|
}
|
}
|
|
source++;
|
}
|
}
|
|
(byte*&)destSlice += internal.sliceB;
|
}
|
|
external.unlockRect();
|
internal.unlockRect();
|
}
|
|
void Surface::decodeDXT3(Buffer &internal, Buffer &external)
|
{
|
unsigned int *destSlice = (unsigned int*)internal.lockRect(0, 0, 0, LOCK_UPDATE);
|
const DXT3 *source = (const DXT3*)external.lockRect(0, 0, 0, LOCK_READONLY);
|
|
for(int z = 0; z < external.depth; z++)
|
{
|
unsigned int *dest = destSlice;
|
|
for(int y = 0; y < external.height; y += 4)
|
{
|
for(int x = 0; x < external.width; x += 4)
|
{
|
Color<byte> c[4];
|
|
c[0] = source->c0;
|
c[1] = source->c1;
|
|
// c2 = 2 / 3 * c0 + 1 / 3 * c1
|
c[2].r = (byte)((2 * (word)c[0].r + (word)c[1].r + 1) / 3);
|
c[2].g = (byte)((2 * (word)c[0].g + (word)c[1].g + 1) / 3);
|
c[2].b = (byte)((2 * (word)c[0].b + (word)c[1].b + 1) / 3);
|
|
// c3 = 1 / 3 * c0 + 2 / 3 * c1
|
c[3].r = (byte)(((word)c[0].r + 2 * (word)c[1].r + 1) / 3);
|
c[3].g = (byte)(((word)c[0].g + 2 * (word)c[1].g + 1) / 3);
|
c[3].b = (byte)(((word)c[0].b + 2 * (word)c[1].b + 1) / 3);
|
|
for(int j = 0; j < 4 && (y + j) < internal.height; j++)
|
{
|
for(int i = 0; i < 4 && (x + i) < internal.width; i++)
|
{
|
unsigned int a = (unsigned int)(source->a >> 4 * (i + j * 4)) & 0x0F;
|
unsigned int color = (c[(unsigned int)(source->lut >> 2 * (i + j * 4)) % 4] & 0x00FFFFFF) | ((a << 28) + (a << 24));
|
|
dest[(x + i) + (y + j) * internal.pitchP] = color;
|
}
|
}
|
|
source++;
|
}
|
}
|
|
(byte*&)destSlice += internal.sliceB;
|
}
|
|
external.unlockRect();
|
internal.unlockRect();
|
}
|
|
void Surface::decodeDXT5(Buffer &internal, Buffer &external)
|
{
|
unsigned int *destSlice = (unsigned int*)internal.lockRect(0, 0, 0, LOCK_UPDATE);
|
const DXT5 *source = (const DXT5*)external.lockRect(0, 0, 0, LOCK_READONLY);
|
|
for(int z = 0; z < external.depth; z++)
|
{
|
unsigned int *dest = destSlice;
|
|
for(int y = 0; y < external.height; y += 4)
|
{
|
for(int x = 0; x < external.width; x += 4)
|
{
|
Color<byte> c[4];
|
|
c[0] = source->c0;
|
c[1] = source->c1;
|
|
// c2 = 2 / 3 * c0 + 1 / 3 * c1
|
c[2].r = (byte)((2 * (word)c[0].r + (word)c[1].r + 1) / 3);
|
c[2].g = (byte)((2 * (word)c[0].g + (word)c[1].g + 1) / 3);
|
c[2].b = (byte)((2 * (word)c[0].b + (word)c[1].b + 1) / 3);
|
|
// c3 = 1 / 3 * c0 + 2 / 3 * c1
|
c[3].r = (byte)(((word)c[0].r + 2 * (word)c[1].r + 1) / 3);
|
c[3].g = (byte)(((word)c[0].g + 2 * (word)c[1].g + 1) / 3);
|
c[3].b = (byte)(((word)c[0].b + 2 * (word)c[1].b + 1) / 3);
|
|
byte a[8];
|
|
a[0] = source->a0;
|
a[1] = source->a1;
|
|
if(a[0] > a[1])
|
{
|
a[2] = (byte)((6 * (word)a[0] + 1 * (word)a[1] + 3) / 7);
|
a[3] = (byte)((5 * (word)a[0] + 2 * (word)a[1] + 3) / 7);
|
a[4] = (byte)((4 * (word)a[0] + 3 * (word)a[1] + 3) / 7);
|
a[5] = (byte)((3 * (word)a[0] + 4 * (word)a[1] + 3) / 7);
|
a[6] = (byte)((2 * (word)a[0] + 5 * (word)a[1] + 3) / 7);
|
a[7] = (byte)((1 * (word)a[0] + 6 * (word)a[1] + 3) / 7);
|
}
|
else
|
{
|
a[2] = (byte)((4 * (word)a[0] + 1 * (word)a[1] + 2) / 5);
|
a[3] = (byte)((3 * (word)a[0] + 2 * (word)a[1] + 2) / 5);
|
a[4] = (byte)((2 * (word)a[0] + 3 * (word)a[1] + 2) / 5);
|
a[5] = (byte)((1 * (word)a[0] + 4 * (word)a[1] + 2) / 5);
|
a[6] = 0;
|
a[7] = 0xFF;
|
}
|
|
for(int j = 0; j < 4 && (y + j) < internal.height; j++)
|
{
|
for(int i = 0; i < 4 && (x + i) < internal.width; i++)
|
{
|
unsigned int alpha = (unsigned int)a[(unsigned int)(source->alut >> (16 + 3 * (i + j * 4))) % 8] << 24;
|
unsigned int color = (c[(source->clut >> 2 * (i + j * 4)) % 4] & 0x00FFFFFF) | alpha;
|
|
dest[(x + i) + (y + j) * internal.pitchP] = color;
|
}
|
}
|
|
source++;
|
}
|
}
|
|
(byte*&)destSlice += internal.sliceB;
|
}
|
|
external.unlockRect();
|
internal.unlockRect();
|
}
|
|
void Surface::decodeATI1(Buffer &internal, Buffer &external)
|
{
|
byte *destSlice = (byte*)internal.lockRect(0, 0, 0, LOCK_UPDATE);
|
const ATI1 *source = (const ATI1*)external.lockRect(0, 0, 0, LOCK_READONLY);
|
|
for(int z = 0; z < external.depth; z++)
|
{
|
byte *dest = destSlice;
|
|
for(int y = 0; y < external.height; y += 4)
|
{
|
for(int x = 0; x < external.width; x += 4)
|
{
|
byte r[8];
|
|
r[0] = source->r0;
|
r[1] = source->r1;
|
|
if(r[0] > r[1])
|
{
|
r[2] = (byte)((6 * (word)r[0] + 1 * (word)r[1] + 3) / 7);
|
r[3] = (byte)((5 * (word)r[0] + 2 * (word)r[1] + 3) / 7);
|
r[4] = (byte)((4 * (word)r[0] + 3 * (word)r[1] + 3) / 7);
|
r[5] = (byte)((3 * (word)r[0] + 4 * (word)r[1] + 3) / 7);
|
r[6] = (byte)((2 * (word)r[0] + 5 * (word)r[1] + 3) / 7);
|
r[7] = (byte)((1 * (word)r[0] + 6 * (word)r[1] + 3) / 7);
|
}
|
else
|
{
|
r[2] = (byte)((4 * (word)r[0] + 1 * (word)r[1] + 2) / 5);
|
r[3] = (byte)((3 * (word)r[0] + 2 * (word)r[1] + 2) / 5);
|
r[4] = (byte)((2 * (word)r[0] + 3 * (word)r[1] + 2) / 5);
|
r[5] = (byte)((1 * (word)r[0] + 4 * (word)r[1] + 2) / 5);
|
r[6] = 0;
|
r[7] = 0xFF;
|
}
|
|
for(int j = 0; j < 4 && (y + j) < internal.height; j++)
|
{
|
for(int i = 0; i < 4 && (x + i) < internal.width; i++)
|
{
|
dest[(x + i) + (y + j) * internal.pitchP] = r[(unsigned int)(source->rlut >> (16 + 3 * (i + j * 4))) % 8];
|
}
|
}
|
|
source++;
|
}
|
}
|
|
destSlice += internal.sliceB;
|
}
|
|
external.unlockRect();
|
internal.unlockRect();
|
}
|
|
void Surface::decodeATI2(Buffer &internal, Buffer &external)
|
{
|
word *destSlice = (word*)internal.lockRect(0, 0, 0, LOCK_UPDATE);
|
const ATI2 *source = (const ATI2*)external.lockRect(0, 0, 0, LOCK_READONLY);
|
|
for(int z = 0; z < external.depth; z++)
|
{
|
word *dest = destSlice;
|
|
for(int y = 0; y < external.height; y += 4)
|
{
|
for(int x = 0; x < external.width; x += 4)
|
{
|
byte X[8];
|
|
X[0] = source->x0;
|
X[1] = source->x1;
|
|
if(X[0] > X[1])
|
{
|
X[2] = (byte)((6 * (word)X[0] + 1 * (word)X[1] + 3) / 7);
|
X[3] = (byte)((5 * (word)X[0] + 2 * (word)X[1] + 3) / 7);
|
X[4] = (byte)((4 * (word)X[0] + 3 * (word)X[1] + 3) / 7);
|
X[5] = (byte)((3 * (word)X[0] + 4 * (word)X[1] + 3) / 7);
|
X[6] = (byte)((2 * (word)X[0] + 5 * (word)X[1] + 3) / 7);
|
X[7] = (byte)((1 * (word)X[0] + 6 * (word)X[1] + 3) / 7);
|
}
|
else
|
{
|
X[2] = (byte)((4 * (word)X[0] + 1 * (word)X[1] + 2) / 5);
|
X[3] = (byte)((3 * (word)X[0] + 2 * (word)X[1] + 2) / 5);
|
X[4] = (byte)((2 * (word)X[0] + 3 * (word)X[1] + 2) / 5);
|
X[5] = (byte)((1 * (word)X[0] + 4 * (word)X[1] + 2) / 5);
|
X[6] = 0;
|
X[7] = 0xFF;
|
}
|
|
byte Y[8];
|
|
Y[0] = source->y0;
|
Y[1] = source->y1;
|
|
if(Y[0] > Y[1])
|
{
|
Y[2] = (byte)((6 * (word)Y[0] + 1 * (word)Y[1] + 3) / 7);
|
Y[3] = (byte)((5 * (word)Y[0] + 2 * (word)Y[1] + 3) / 7);
|
Y[4] = (byte)((4 * (word)Y[0] + 3 * (word)Y[1] + 3) / 7);
|
Y[5] = (byte)((3 * (word)Y[0] + 4 * (word)Y[1] + 3) / 7);
|
Y[6] = (byte)((2 * (word)Y[0] + 5 * (word)Y[1] + 3) / 7);
|
Y[7] = (byte)((1 * (word)Y[0] + 6 * (word)Y[1] + 3) / 7);
|
}
|
else
|
{
|
Y[2] = (byte)((4 * (word)Y[0] + 1 * (word)Y[1] + 2) / 5);
|
Y[3] = (byte)((3 * (word)Y[0] + 2 * (word)Y[1] + 2) / 5);
|
Y[4] = (byte)((2 * (word)Y[0] + 3 * (word)Y[1] + 2) / 5);
|
Y[5] = (byte)((1 * (word)Y[0] + 4 * (word)Y[1] + 2) / 5);
|
Y[6] = 0;
|
Y[7] = 0xFF;
|
}
|
|
for(int j = 0; j < 4 && (y + j) < internal.height; j++)
|
{
|
for(int i = 0; i < 4 && (x + i) < internal.width; i++)
|
{
|
word r = X[(unsigned int)(source->xlut >> (16 + 3 * (i + j * 4))) % 8];
|
word g = Y[(unsigned int)(source->ylut >> (16 + 3 * (i + j * 4))) % 8];
|
|
dest[(x + i) + (y + j) * internal.pitchP] = (g << 8) + r;
|
}
|
}
|
|
source++;
|
}
|
}
|
|
(byte*&)destSlice += internal.sliceB;
|
}
|
|
external.unlockRect();
|
internal.unlockRect();
|
}
|
|
void Surface::decodeETC2(Buffer &internal, Buffer &external, int nbAlphaBits, bool isSRGB)
|
{
|
ETC_Decoder::Decode((const byte*)external.lockRect(0, 0, 0, LOCK_READONLY), (byte*)internal.lockRect(0, 0, 0, LOCK_UPDATE), external.width, external.height, internal.width, internal.height, internal.pitchB, internal.bytes,
|
(nbAlphaBits == 8) ? ETC_Decoder::ETC_RGBA : ((nbAlphaBits == 1) ? ETC_Decoder::ETC_RGB_PUNCHTHROUGH_ALPHA : ETC_Decoder::ETC_RGB));
|
external.unlockRect();
|
internal.unlockRect();
|
|
if(isSRGB)
|
{
|
static byte sRGBtoLinearTable[256];
|
static bool sRGBtoLinearTableDirty = true;
|
if(sRGBtoLinearTableDirty)
|
{
|
for(int i = 0; i < 256; i++)
|
{
|
sRGBtoLinearTable[i] = static_cast<byte>(sRGBtoLinear(static_cast<float>(i) / 255.0f) * 255.0f + 0.5f);
|
}
|
sRGBtoLinearTableDirty = false;
|
}
|
|
// Perform sRGB conversion in place after decoding
|
byte *src = (byte*)internal.lockRect(0, 0, 0, LOCK_READWRITE);
|
for(int y = 0; y < internal.height; y++)
|
{
|
byte *srcRow = src + y * internal.pitchB;
|
for(int x = 0; x < internal.width; x++)
|
{
|
byte *srcPix = srcRow + x * internal.bytes;
|
for(int i = 0; i < 3; i++)
|
{
|
srcPix[i] = sRGBtoLinearTable[srcPix[i]];
|
}
|
}
|
}
|
internal.unlockRect();
|
}
|
}
|
|
void Surface::decodeEAC(Buffer &internal, Buffer &external, int nbChannels, bool isSigned)
|
{
|
ASSERT(nbChannels == 1 || nbChannels == 2);
|
|
byte *src = (byte*)internal.lockRect(0, 0, 0, LOCK_READWRITE);
|
ETC_Decoder::Decode((const byte*)external.lockRect(0, 0, 0, LOCK_READONLY), src, external.width, external.height, internal.width, internal.height, internal.pitchB, internal.bytes,
|
(nbChannels == 1) ? (isSigned ? ETC_Decoder::ETC_R_SIGNED : ETC_Decoder::ETC_R_UNSIGNED) : (isSigned ? ETC_Decoder::ETC_RG_SIGNED : ETC_Decoder::ETC_RG_UNSIGNED));
|
external.unlockRect();
|
|
// FIXME: We convert EAC data to float, until signed short internal formats are supported
|
// This code can be removed if ETC2 images are decoded to internal 16 bit signed R/RG formats
|
const float normalization = isSigned ? (1.0f / (8.0f * 127.875f)) : (1.0f / (8.0f * 255.875f));
|
for(int y = 0; y < internal.height; y++)
|
{
|
byte* srcRow = src + y * internal.pitchB;
|
for(int x = internal.width - 1; x >= 0; x--)
|
{
|
int* srcPix = reinterpret_cast<int*>(srcRow + x * internal.bytes);
|
float* dstPix = reinterpret_cast<float*>(srcPix);
|
for(int c = nbChannels - 1; c >= 0; c--)
|
{
|
dstPix[c] = clamp(static_cast<float>(srcPix[c]) * normalization, -1.0f, 1.0f);
|
}
|
}
|
}
|
|
internal.unlockRect();
|
}
|
|
void Surface::decodeASTC(Buffer &internal, Buffer &external, int xBlockSize, int yBlockSize, int zBlockSize, bool isSRGB)
|
{
|
}
|
|
size_t Surface::size(int width, int height, int depth, int border, int samples, Format format)
|
{
|
samples = max(1, samples);
|
|
switch(format)
|
{
|
default:
|
{
|
uint64_t size = (uint64_t)sliceB(width, height, border, format, true) * depth * samples;
|
|
// FIXME: Unpacking byte4 to short4 in the sampler currently involves reading 8 bytes,
|
// and stencil operations also read 8 bytes per four 8-bit stencil values,
|
// so we have to allocate 4 extra bytes to avoid buffer overruns.
|
size += 4;
|
|
// We can only sample buffers smaller than 2 GiB.
|
// Force an out-of-memory if larger, or let the caller report an error.
|
return size < 0x80000000u ? (size_t)size : std::numeric_limits<size_t>::max();
|
}
|
case FORMAT_YV12_BT601:
|
case FORMAT_YV12_BT709:
|
case FORMAT_YV12_JFIF:
|
{
|
width += 2 * border;
|
height += 2 * border;
|
|
size_t YStride = align<16>(width);
|
size_t YSize = YStride * height;
|
size_t CStride = align<16>(YStride / 2);
|
size_t CSize = CStride * height / 2;
|
|
return YSize + 2 * CSize;
|
}
|
}
|
}
|
|
bool Surface::isStencil(Format format)
|
{
|
switch(format)
|
{
|
case FORMAT_D32:
|
case FORMAT_D16:
|
case FORMAT_D24X8:
|
case FORMAT_D32F:
|
case FORMAT_D32F_COMPLEMENTARY:
|
case FORMAT_D32F_LOCKABLE:
|
case FORMAT_D32F_SHADOW:
|
return false;
|
case FORMAT_D24S8:
|
case FORMAT_D24FS8:
|
case FORMAT_S8:
|
case FORMAT_DF24S8:
|
case FORMAT_DF16S8:
|
case FORMAT_D32FS8_TEXTURE:
|
case FORMAT_D32FS8_SHADOW:
|
case FORMAT_D32FS8:
|
case FORMAT_D32FS8_COMPLEMENTARY:
|
case FORMAT_INTZ:
|
return true;
|
default:
|
return false;
|
}
|
}
|
|
bool Surface::isDepth(Format format)
|
{
|
switch(format)
|
{
|
case FORMAT_D32:
|
case FORMAT_D16:
|
case FORMAT_D24X8:
|
case FORMAT_D24S8:
|
case FORMAT_D24FS8:
|
case FORMAT_D32F:
|
case FORMAT_D32FS8:
|
case FORMAT_D32F_COMPLEMENTARY:
|
case FORMAT_D32FS8_COMPLEMENTARY:
|
case FORMAT_D32F_LOCKABLE:
|
case FORMAT_DF24S8:
|
case FORMAT_DF16S8:
|
case FORMAT_D32FS8_TEXTURE:
|
case FORMAT_D32F_SHADOW:
|
case FORMAT_D32FS8_SHADOW:
|
case FORMAT_INTZ:
|
return true;
|
case FORMAT_S8:
|
return false;
|
default:
|
return false;
|
}
|
}
|
|
bool Surface::hasQuadLayout(Format format)
|
{
|
switch(format)
|
{
|
case FORMAT_D32:
|
case FORMAT_D16:
|
case FORMAT_D24X8:
|
case FORMAT_D24S8:
|
case FORMAT_D24FS8:
|
case FORMAT_D32F:
|
case FORMAT_D32FS8:
|
case FORMAT_D32F_COMPLEMENTARY:
|
case FORMAT_D32FS8_COMPLEMENTARY:
|
case FORMAT_DF24S8:
|
case FORMAT_DF16S8:
|
case FORMAT_INTZ:
|
case FORMAT_S8:
|
case FORMAT_A8G8R8B8Q:
|
case FORMAT_X8G8R8B8Q:
|
return true;
|
case FORMAT_D32F_LOCKABLE:
|
case FORMAT_D32FS8_TEXTURE:
|
case FORMAT_D32F_SHADOW:
|
case FORMAT_D32FS8_SHADOW:
|
default:
|
break;
|
}
|
|
return false;
|
}
|
|
bool Surface::isPalette(Format format)
|
{
|
switch(format)
|
{
|
case FORMAT_P8:
|
case FORMAT_A8P8:
|
return true;
|
default:
|
return false;
|
}
|
}
|
|
bool Surface::isFloatFormat(Format format)
|
{
|
switch(format)
|
{
|
case FORMAT_R5G6B5:
|
case FORMAT_R8G8B8:
|
case FORMAT_B8G8R8:
|
case FORMAT_X8R8G8B8:
|
case FORMAT_X8B8G8R8I:
|
case FORMAT_X8B8G8R8:
|
case FORMAT_A8R8G8B8:
|
case FORMAT_SRGB8_X8:
|
case FORMAT_SRGB8_A8:
|
case FORMAT_A8B8G8R8I:
|
case FORMAT_R8UI:
|
case FORMAT_G8R8UI:
|
case FORMAT_X8B8G8R8UI:
|
case FORMAT_A8B8G8R8UI:
|
case FORMAT_A8B8G8R8:
|
case FORMAT_G8R8I:
|
case FORMAT_G8R8:
|
case FORMAT_A2B10G10R10:
|
case FORMAT_A2B10G10R10UI:
|
case FORMAT_R8_SNORM:
|
case FORMAT_G8R8_SNORM:
|
case FORMAT_X8B8G8R8_SNORM:
|
case FORMAT_A8B8G8R8_SNORM:
|
case FORMAT_R16I:
|
case FORMAT_R16UI:
|
case FORMAT_G16R16I:
|
case FORMAT_G16R16UI:
|
case FORMAT_G16R16:
|
case FORMAT_X16B16G16R16I:
|
case FORMAT_X16B16G16R16UI:
|
case FORMAT_A16B16G16R16I:
|
case FORMAT_A16B16G16R16UI:
|
case FORMAT_A16B16G16R16:
|
case FORMAT_V8U8:
|
case FORMAT_Q8W8V8U8:
|
case FORMAT_X8L8V8U8:
|
case FORMAT_V16U16:
|
case FORMAT_A16W16V16U16:
|
case FORMAT_Q16W16V16U16:
|
case FORMAT_A8:
|
case FORMAT_R8I:
|
case FORMAT_R8:
|
case FORMAT_S8:
|
case FORMAT_L8:
|
case FORMAT_L16:
|
case FORMAT_A8L8:
|
case FORMAT_YV12_BT601:
|
case FORMAT_YV12_BT709:
|
case FORMAT_YV12_JFIF:
|
case FORMAT_R32I:
|
case FORMAT_R32UI:
|
case FORMAT_G32R32I:
|
case FORMAT_G32R32UI:
|
case FORMAT_X32B32G32R32I:
|
case FORMAT_X32B32G32R32UI:
|
case FORMAT_A32B32G32R32I:
|
case FORMAT_A32B32G32R32UI:
|
return false;
|
case FORMAT_R16F:
|
case FORMAT_G16R16F:
|
case FORMAT_B16G16R16F:
|
case FORMAT_X16B16G16R16F:
|
case FORMAT_A16B16G16R16F:
|
case FORMAT_X16B16G16R16F_UNSIGNED:
|
case FORMAT_R32F:
|
case FORMAT_G32R32F:
|
case FORMAT_B32G32R32F:
|
case FORMAT_X32B32G32R32F:
|
case FORMAT_A32B32G32R32F:
|
case FORMAT_X32B32G32R32F_UNSIGNED:
|
case FORMAT_D32F:
|
case FORMAT_D32FS8:
|
case FORMAT_D32F_COMPLEMENTARY:
|
case FORMAT_D32FS8_COMPLEMENTARY:
|
case FORMAT_D32F_LOCKABLE:
|
case FORMAT_D32FS8_TEXTURE:
|
case FORMAT_D32F_SHADOW:
|
case FORMAT_D32FS8_SHADOW:
|
case FORMAT_L16F:
|
case FORMAT_A16L16F:
|
case FORMAT_L32F:
|
case FORMAT_A32L32F:
|
return true;
|
default:
|
ASSERT(false);
|
}
|
|
return false;
|
}
|
|
bool Surface::isUnsignedComponent(Format format, int component)
|
{
|
switch(format)
|
{
|
case FORMAT_NULL:
|
case FORMAT_R5G6B5:
|
case FORMAT_R8G8B8:
|
case FORMAT_B8G8R8:
|
case FORMAT_X8R8G8B8:
|
case FORMAT_X8B8G8R8:
|
case FORMAT_A8R8G8B8:
|
case FORMAT_A8B8G8R8:
|
case FORMAT_SRGB8_X8:
|
case FORMAT_SRGB8_A8:
|
case FORMAT_G8R8:
|
case FORMAT_A2B10G10R10:
|
case FORMAT_A2B10G10R10UI:
|
case FORMAT_R16UI:
|
case FORMAT_G16R16:
|
case FORMAT_G16R16UI:
|
case FORMAT_X16B16G16R16UI:
|
case FORMAT_A16B16G16R16:
|
case FORMAT_A16B16G16R16UI:
|
case FORMAT_R32UI:
|
case FORMAT_G32R32UI:
|
case FORMAT_X32B32G32R32UI:
|
case FORMAT_A32B32G32R32UI:
|
case FORMAT_X32B32G32R32F_UNSIGNED:
|
case FORMAT_R8UI:
|
case FORMAT_G8R8UI:
|
case FORMAT_X8B8G8R8UI:
|
case FORMAT_A8B8G8R8UI:
|
case FORMAT_D32F:
|
case FORMAT_D32FS8:
|
case FORMAT_D32F_COMPLEMENTARY:
|
case FORMAT_D32FS8_COMPLEMENTARY:
|
case FORMAT_D32F_LOCKABLE:
|
case FORMAT_D32FS8_TEXTURE:
|
case FORMAT_D32F_SHADOW:
|
case FORMAT_D32FS8_SHADOW:
|
case FORMAT_A8:
|
case FORMAT_R8:
|
case FORMAT_L8:
|
case FORMAT_L16:
|
case FORMAT_A8L8:
|
case FORMAT_YV12_BT601:
|
case FORMAT_YV12_BT709:
|
case FORMAT_YV12_JFIF:
|
return true;
|
case FORMAT_A8B8G8R8I:
|
case FORMAT_A16B16G16R16I:
|
case FORMAT_A32B32G32R32I:
|
case FORMAT_A8B8G8R8_SNORM:
|
case FORMAT_Q8W8V8U8:
|
case FORMAT_Q16W16V16U16:
|
case FORMAT_A32B32G32R32F:
|
return false;
|
case FORMAT_R32F:
|
case FORMAT_R8I:
|
case FORMAT_R16I:
|
case FORMAT_R32I:
|
case FORMAT_R8_SNORM:
|
return component >= 1;
|
case FORMAT_V8U8:
|
case FORMAT_X8L8V8U8:
|
case FORMAT_V16U16:
|
case FORMAT_G32R32F:
|
case FORMAT_G8R8I:
|
case FORMAT_G16R16I:
|
case FORMAT_G32R32I:
|
case FORMAT_G8R8_SNORM:
|
return component >= 2;
|
case FORMAT_A16W16V16U16:
|
case FORMAT_B32G32R32F:
|
case FORMAT_X32B32G32R32F:
|
case FORMAT_X8B8G8R8I:
|
case FORMAT_X16B16G16R16I:
|
case FORMAT_X32B32G32R32I:
|
case FORMAT_X8B8G8R8_SNORM:
|
return component >= 3;
|
default:
|
ASSERT(false);
|
}
|
|
return false;
|
}
|
|
bool Surface::isSRGBreadable(Format format)
|
{
|
// Keep in sync with Capabilities::isSRGBreadable
|
switch(format)
|
{
|
case FORMAT_L8:
|
case FORMAT_A8L8:
|
case FORMAT_R8G8B8:
|
case FORMAT_A8R8G8B8:
|
case FORMAT_X8R8G8B8:
|
case FORMAT_A8B8G8R8:
|
case FORMAT_X8B8G8R8:
|
case FORMAT_SRGB8_X8:
|
case FORMAT_SRGB8_A8:
|
case FORMAT_R5G6B5:
|
case FORMAT_X1R5G5B5:
|
case FORMAT_A1R5G5B5:
|
case FORMAT_A4R4G4B4:
|
case FORMAT_DXT1:
|
case FORMAT_DXT3:
|
case FORMAT_DXT5:
|
case FORMAT_ATI1:
|
case FORMAT_ATI2:
|
return true;
|
default:
|
return false;
|
}
|
}
|
|
bool Surface::isSRGBwritable(Format format)
|
{
|
// Keep in sync with Capabilities::isSRGBwritable
|
switch(format)
|
{
|
case FORMAT_NULL:
|
case FORMAT_A8R8G8B8:
|
case FORMAT_X8R8G8B8:
|
case FORMAT_A8B8G8R8:
|
case FORMAT_X8B8G8R8:
|
case FORMAT_SRGB8_X8:
|
case FORMAT_SRGB8_A8:
|
case FORMAT_R5G6B5:
|
return true;
|
default:
|
return false;
|
}
|
}
|
|
bool Surface::isSRGBformat(Format format)
|
{
|
switch(format)
|
{
|
case FORMAT_SRGB8_X8:
|
case FORMAT_SRGB8_A8:
|
return true;
|
default:
|
return false;
|
}
|
}
|
|
bool Surface::isCompressed(Format format)
|
{
|
switch(format)
|
{
|
case FORMAT_DXT1:
|
case FORMAT_DXT3:
|
case FORMAT_DXT5:
|
case FORMAT_ATI1:
|
case FORMAT_ATI2:
|
case FORMAT_ETC1:
|
case FORMAT_R11_EAC:
|
case FORMAT_SIGNED_R11_EAC:
|
case FORMAT_RG11_EAC:
|
case FORMAT_SIGNED_RG11_EAC:
|
case FORMAT_RGB8_ETC2:
|
case FORMAT_SRGB8_ETC2:
|
case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
|
case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
|
case FORMAT_RGBA8_ETC2_EAC:
|
case FORMAT_SRGB8_ALPHA8_ETC2_EAC:
|
case FORMAT_RGBA_ASTC_4x4_KHR:
|
case FORMAT_RGBA_ASTC_5x4_KHR:
|
case FORMAT_RGBA_ASTC_5x5_KHR:
|
case FORMAT_RGBA_ASTC_6x5_KHR:
|
case FORMAT_RGBA_ASTC_6x6_KHR:
|
case FORMAT_RGBA_ASTC_8x5_KHR:
|
case FORMAT_RGBA_ASTC_8x6_KHR:
|
case FORMAT_RGBA_ASTC_8x8_KHR:
|
case FORMAT_RGBA_ASTC_10x5_KHR:
|
case FORMAT_RGBA_ASTC_10x6_KHR:
|
case FORMAT_RGBA_ASTC_10x8_KHR:
|
case FORMAT_RGBA_ASTC_10x10_KHR:
|
case FORMAT_RGBA_ASTC_12x10_KHR:
|
case FORMAT_RGBA_ASTC_12x12_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR:
|
return true;
|
default:
|
return false;
|
}
|
}
|
|
bool Surface::isSignedNonNormalizedInteger(Format format)
|
{
|
switch(format)
|
{
|
case FORMAT_A8B8G8R8I:
|
case FORMAT_X8B8G8R8I:
|
case FORMAT_G8R8I:
|
case FORMAT_R8I:
|
case FORMAT_A16B16G16R16I:
|
case FORMAT_X16B16G16R16I:
|
case FORMAT_G16R16I:
|
case FORMAT_R16I:
|
case FORMAT_A32B32G32R32I:
|
case FORMAT_X32B32G32R32I:
|
case FORMAT_G32R32I:
|
case FORMAT_R32I:
|
return true;
|
default:
|
return false;
|
}
|
}
|
|
bool Surface::isUnsignedNonNormalizedInteger(Format format)
|
{
|
switch(format)
|
{
|
case FORMAT_A8B8G8R8UI:
|
case FORMAT_X8B8G8R8UI:
|
case FORMAT_G8R8UI:
|
case FORMAT_R8UI:
|
case FORMAT_A16B16G16R16UI:
|
case FORMAT_X16B16G16R16UI:
|
case FORMAT_G16R16UI:
|
case FORMAT_R16UI:
|
case FORMAT_A32B32G32R32UI:
|
case FORMAT_X32B32G32R32UI:
|
case FORMAT_G32R32UI:
|
case FORMAT_R32UI:
|
return true;
|
default:
|
return false;
|
}
|
}
|
|
bool Surface::isNonNormalizedInteger(Format format)
|
{
|
return isSignedNonNormalizedInteger(format) ||
|
isUnsignedNonNormalizedInteger(format);
|
}
|
|
bool Surface::isNormalizedInteger(Format format)
|
{
|
return !isFloatFormat(format) &&
|
!isNonNormalizedInteger(format) &&
|
!isCompressed(format) &&
|
!isDepth(format) &&
|
!isStencil(format);
|
}
|
|
int Surface::componentCount(Format format)
|
{
|
switch(format)
|
{
|
case FORMAT_R5G6B5: return 3;
|
case FORMAT_X8R8G8B8: return 3;
|
case FORMAT_X8B8G8R8I: return 3;
|
case FORMAT_X8B8G8R8: return 3;
|
case FORMAT_A8R8G8B8: return 4;
|
case FORMAT_SRGB8_X8: return 3;
|
case FORMAT_SRGB8_A8: return 4;
|
case FORMAT_A8B8G8R8I: return 4;
|
case FORMAT_A8B8G8R8: return 4;
|
case FORMAT_G8R8I: return 2;
|
case FORMAT_G8R8: return 2;
|
case FORMAT_R8_SNORM: return 1;
|
case FORMAT_G8R8_SNORM: return 2;
|
case FORMAT_X8B8G8R8_SNORM:return 3;
|
case FORMAT_A8B8G8R8_SNORM:return 4;
|
case FORMAT_R8UI: return 1;
|
case FORMAT_G8R8UI: return 2;
|
case FORMAT_X8B8G8R8UI: return 3;
|
case FORMAT_A8B8G8R8UI: return 4;
|
case FORMAT_A2B10G10R10: return 4;
|
case FORMAT_A2B10G10R10UI: return 4;
|
case FORMAT_G16R16I: return 2;
|
case FORMAT_G16R16UI: return 2;
|
case FORMAT_G16R16: return 2;
|
case FORMAT_G32R32I: return 2;
|
case FORMAT_G32R32UI: return 2;
|
case FORMAT_X16B16G16R16I: return 3;
|
case FORMAT_X16B16G16R16UI: return 3;
|
case FORMAT_A16B16G16R16I: return 4;
|
case FORMAT_A16B16G16R16UI: return 4;
|
case FORMAT_A16B16G16R16: return 4;
|
case FORMAT_X32B32G32R32I: return 3;
|
case FORMAT_X32B32G32R32UI: return 3;
|
case FORMAT_A32B32G32R32I: return 4;
|
case FORMAT_A32B32G32R32UI: return 4;
|
case FORMAT_V8U8: return 2;
|
case FORMAT_Q8W8V8U8: return 4;
|
case FORMAT_X8L8V8U8: return 3;
|
case FORMAT_V16U16: return 2;
|
case FORMAT_A16W16V16U16: return 4;
|
case FORMAT_Q16W16V16U16: return 4;
|
case FORMAT_R32F: return 1;
|
case FORMAT_G32R32F: return 2;
|
case FORMAT_X32B32G32R32F: return 3;
|
case FORMAT_A32B32G32R32F: return 4;
|
case FORMAT_X32B32G32R32F_UNSIGNED: return 3;
|
case FORMAT_D32F: return 1;
|
case FORMAT_D32FS8: return 1;
|
case FORMAT_D32F_LOCKABLE: return 1;
|
case FORMAT_D32FS8_TEXTURE: return 1;
|
case FORMAT_D32F_SHADOW: return 1;
|
case FORMAT_D32FS8_SHADOW: return 1;
|
case FORMAT_A8: return 1;
|
case FORMAT_R8I: return 1;
|
case FORMAT_R8: return 1;
|
case FORMAT_R16I: return 1;
|
case FORMAT_R16UI: return 1;
|
case FORMAT_R32I: return 1;
|
case FORMAT_R32UI: return 1;
|
case FORMAT_L8: return 1;
|
case FORMAT_L16: return 1;
|
case FORMAT_A8L8: return 2;
|
case FORMAT_YV12_BT601: return 3;
|
case FORMAT_YV12_BT709: return 3;
|
case FORMAT_YV12_JFIF: return 3;
|
default:
|
ASSERT(false);
|
}
|
|
return 1;
|
}
|
|
void *Surface::allocateBuffer(int width, int height, int depth, int border, int samples, Format format)
|
{
|
return allocate(size(width, height, depth, border, samples, format));
|
}
|
|
void Surface::memfill4(void *buffer, int pattern, int bytes)
|
{
|
while((size_t)buffer & 0x1 && bytes >= 1)
|
{
|
*(char*)buffer = (char)pattern;
|
(char*&)buffer += 1;
|
bytes -= 1;
|
}
|
|
while((size_t)buffer & 0x3 && bytes >= 2)
|
{
|
*(short*)buffer = (short)pattern;
|
(short*&)buffer += 1;
|
bytes -= 2;
|
}
|
|
#if defined(__i386__) || defined(__x86_64__)
|
if(CPUID::supportsSSE())
|
{
|
while((size_t)buffer & 0xF && bytes >= 4)
|
{
|
*(int*)buffer = pattern;
|
(int*&)buffer += 1;
|
bytes -= 4;
|
}
|
|
__m128 quad = _mm_set_ps1((float&)pattern);
|
|
float *pointer = (float*)buffer;
|
int qxwords = bytes / 64;
|
bytes -= qxwords * 64;
|
|
while(qxwords--)
|
{
|
_mm_stream_ps(pointer + 0, quad);
|
_mm_stream_ps(pointer + 4, quad);
|
_mm_stream_ps(pointer + 8, quad);
|
_mm_stream_ps(pointer + 12, quad);
|
|
pointer += 16;
|
}
|
|
buffer = pointer;
|
}
|
#endif
|
|
while(bytes >= 4)
|
{
|
*(int*)buffer = (int)pattern;
|
(int*&)buffer += 1;
|
bytes -= 4;
|
}
|
|
while(bytes >= 2)
|
{
|
*(short*)buffer = (short)pattern;
|
(short*&)buffer += 1;
|
bytes -= 2;
|
}
|
|
while(bytes >= 1)
|
{
|
*(char*)buffer = (char)pattern;
|
(char*&)buffer += 1;
|
bytes -= 1;
|
}
|
}
|
|
void Surface::sync()
|
{
|
resource->lock(EXCLUSIVE);
|
resource->unlock();
|
}
|
|
bool Surface::isEntire(const Rect& rect) const
|
{
|
return (rect.x0 == 0 && rect.y0 == 0 && rect.x1 == internal.width && rect.y1 == internal.height && internal.depth == 1);
|
}
|
|
Rect Surface::getRect() const
|
{
|
return Rect(0, 0, internal.width, internal.height);
|
}
|
|
void Surface::clearDepth(float depth, int x0, int y0, int width, int height)
|
{
|
if(width == 0 || height == 0)
|
{
|
return;
|
}
|
|
if(internal.format == FORMAT_NULL)
|
{
|
return;
|
}
|
|
// Not overlapping
|
if(x0 > internal.width) return;
|
if(y0 > internal.height) return;
|
if(x0 + width < 0) return;
|
if(y0 + height < 0) return;
|
|
// Clip against dimensions
|
if(x0 < 0) {width += x0; x0 = 0;}
|
if(x0 + width > internal.width) width = internal.width - x0;
|
if(y0 < 0) {height += y0; y0 = 0;}
|
if(y0 + height > internal.height) height = internal.height - y0;
|
|
const bool entire = x0 == 0 && y0 == 0 && width == internal.width && height == internal.height;
|
const Lock lock = entire ? LOCK_DISCARD : LOCK_WRITEONLY;
|
|
int x1 = x0 + width;
|
int y1 = y0 + height;
|
|
if(!hasQuadLayout(internal.format))
|
{
|
float *target = (float*)lockInternal(x0, y0, 0, lock, PUBLIC);
|
|
for(int z = 0; z < internal.samples; z++)
|
{
|
float *row = target;
|
for(int y = y0; y < y1; y++)
|
{
|
memfill4(row, (int&)depth, width * sizeof(float));
|
row += internal.pitchP;
|
}
|
target += internal.sliceP;
|
}
|
|
unlockInternal();
|
}
|
else // Quad layout
|
{
|
if(complementaryDepthBuffer)
|
{
|
depth = 1 - depth;
|
}
|
|
float *buffer = (float*)lockInternal(0, 0, 0, lock, PUBLIC);
|
|
int oddX0 = (x0 & ~1) * 2 + (x0 & 1);
|
int oddX1 = (x1 & ~1) * 2;
|
int evenX0 = ((x0 + 1) & ~1) * 2;
|
int evenBytes = (oddX1 - evenX0) * sizeof(float);
|
|
for(int z = 0; z < internal.samples; z++)
|
{
|
for(int y = y0; y < y1; y++)
|
{
|
float *target = buffer + (y & ~1) * internal.pitchP + (y & 1) * 2;
|
|
if((y & 1) == 0 && y + 1 < y1) // Fill quad line at once
|
{
|
if((x0 & 1) != 0)
|
{
|
target[oddX0 + 0] = depth;
|
target[oddX0 + 2] = depth;
|
}
|
|
// for(int x2 = evenX0; x2 < x1 * 2; x2 += 4)
|
// {
|
// target[x2 + 0] = depth;
|
// target[x2 + 1] = depth;
|
// target[x2 + 2] = depth;
|
// target[x2 + 3] = depth;
|
// }
|
|
// __asm
|
// {
|
// movss xmm0, depth
|
// shufps xmm0, xmm0, 0x00
|
//
|
// mov eax, x0
|
// add eax, 1
|
// and eax, 0xFFFFFFFE
|
// cmp eax, x1
|
// jge qEnd
|
//
|
// mov edi, target
|
//
|
// qLoop:
|
// movntps [edi+8*eax], xmm0
|
//
|
// add eax, 2
|
// cmp eax, x1
|
// jl qLoop
|
// qEnd:
|
// }
|
|
memfill4(&target[evenX0], (int&)depth, evenBytes);
|
|
if((x1 & 1) != 0)
|
{
|
target[oddX1 + 0] = depth;
|
target[oddX1 + 2] = depth;
|
}
|
|
y++;
|
}
|
else
|
{
|
for(int x = x0, i = oddX0; x < x1; x++, i = (x & ~1) * 2 + (x & 1))
|
{
|
target[i] = depth;
|
}
|
}
|
}
|
|
buffer += internal.sliceP;
|
}
|
|
unlockInternal();
|
}
|
}
|
|
void Surface::clearStencil(unsigned char s, unsigned char mask, int x0, int y0, int width, int height)
|
{
|
if(mask == 0 || width == 0 || height == 0)
|
{
|
return;
|
}
|
|
if(stencil.format == FORMAT_NULL)
|
{
|
return;
|
}
|
|
// Not overlapping
|
if(x0 > internal.width) return;
|
if(y0 > internal.height) return;
|
if(x0 + width < 0) return;
|
if(y0 + height < 0) return;
|
|
// Clip against dimensions
|
if(x0 < 0) {width += x0; x0 = 0;}
|
if(x0 + width > internal.width) width = internal.width - x0;
|
if(y0 < 0) {height += y0; y0 = 0;}
|
if(y0 + height > internal.height) height = internal.height - y0;
|
|
int x1 = x0 + width;
|
int y1 = y0 + height;
|
|
int oddX0 = (x0 & ~1) * 2 + (x0 & 1);
|
int oddX1 = (x1 & ~1) * 2;
|
int evenX0 = ((x0 + 1) & ~1) * 2;
|
int evenBytes = oddX1 - evenX0;
|
|
unsigned char maskedS = s & mask;
|
unsigned char invMask = ~mask;
|
unsigned int fill = maskedS;
|
fill = fill | (fill << 8) | (fill << 16) | (fill << 24);
|
|
char *buffer = (char*)lockStencil(0, 0, 0, PUBLIC);
|
|
// Stencil buffers are assumed to use quad layout
|
for(int z = 0; z < stencil.samples; z++)
|
{
|
for(int y = y0; y < y1; y++)
|
{
|
char *target = buffer + (y & ~1) * stencil.pitchP + (y & 1) * 2;
|
|
if((y & 1) == 0 && y + 1 < y1 && mask == 0xFF) // Fill quad line at once
|
{
|
if((x0 & 1) != 0)
|
{
|
target[oddX0 + 0] = fill;
|
target[oddX0 + 2] = fill;
|
}
|
|
memfill4(&target[evenX0], fill, evenBytes);
|
|
if((x1 & 1) != 0)
|
{
|
target[oddX1 + 0] = fill;
|
target[oddX1 + 2] = fill;
|
}
|
|
y++;
|
}
|
else
|
{
|
for(int x = x0; x < x1; x++)
|
{
|
int i = (x & ~1) * 2 + (x & 1);
|
target[i] = maskedS | (target[i] & invMask);
|
}
|
}
|
}
|
|
buffer += stencil.sliceP;
|
}
|
|
unlockStencil();
|
}
|
|
void Surface::fill(const Color<float> &color, int x0, int y0, int width, int height)
|
{
|
unsigned char *row;
|
Buffer *buffer;
|
|
if(internal.dirty)
|
{
|
row = (unsigned char*)lockInternal(x0, y0, 0, LOCK_WRITEONLY, PUBLIC);
|
buffer = &internal;
|
}
|
else
|
{
|
row = (unsigned char*)lockExternal(x0, y0, 0, LOCK_WRITEONLY, PUBLIC);
|
buffer = &external;
|
}
|
|
if(buffer->bytes <= 4)
|
{
|
int c;
|
buffer->write(&c, color);
|
|
if(buffer->bytes <= 1) c = (c << 8) | c;
|
if(buffer->bytes <= 2) c = (c << 16) | c;
|
|
for(int y = 0; y < height; y++)
|
{
|
memfill4(row, c, width * buffer->bytes);
|
|
row += buffer->pitchB;
|
}
|
}
|
else // Generic
|
{
|
for(int y = 0; y < height; y++)
|
{
|
unsigned char *element = row;
|
|
for(int x = 0; x < width; x++)
|
{
|
buffer->write(element, color);
|
|
element += buffer->bytes;
|
}
|
|
row += buffer->pitchB;
|
}
|
}
|
|
if(buffer == &internal)
|
{
|
unlockInternal();
|
}
|
else
|
{
|
unlockExternal();
|
}
|
}
|
|
void Surface::copyInternal(const Surface *source, int x, int y, float srcX, float srcY, bool filter)
|
{
|
ASSERT(internal.lock != LOCK_UNLOCKED && source && source->internal.lock != LOCK_UNLOCKED);
|
|
sw::Color<float> color;
|
|
if(!filter)
|
{
|
color = source->internal.read((int)srcX, (int)srcY, 0);
|
}
|
else // Bilinear filtering
|
{
|
color = source->internal.sample(srcX, srcY, 0);
|
}
|
|
internal.write(x, y, color);
|
}
|
|
void Surface::copyInternal(const Surface *source, int x, int y, int z, float srcX, float srcY, float srcZ, bool filter)
|
{
|
ASSERT(internal.lock != LOCK_UNLOCKED && source && source->internal.lock != LOCK_UNLOCKED);
|
|
sw::Color<float> color;
|
|
if(!filter)
|
{
|
color = source->internal.read((int)srcX, (int)srcY, int(srcZ));
|
}
|
else // Bilinear filtering
|
{
|
color = source->internal.sample(srcX, srcY, srcZ);
|
}
|
|
internal.write(x, y, z, color);
|
}
|
|
void Surface::copyCubeEdge(Edge dstEdge, Surface *src, Edge srcEdge)
|
{
|
Surface *dst = this;
|
|
// Figure out if the edges to be copied in reverse order respectively from one another
|
// The copy should be reversed whenever the same edges are contiguous or if we're
|
// copying top <-> right or bottom <-> left. This is explained by the layout, which is:
|
//
|
// | +y |
|
// | -x | +z | +x | -z |
|
// | -y |
|
|
bool reverse = (srcEdge == dstEdge) ||
|
((srcEdge == TOP) && (dstEdge == RIGHT)) ||
|
((srcEdge == RIGHT) && (dstEdge == TOP)) ||
|
((srcEdge == BOTTOM) && (dstEdge == LEFT)) ||
|
((srcEdge == LEFT) && (dstEdge == BOTTOM));
|
|
int srcBytes = src->bytes(src->Surface::getInternalFormat());
|
int srcPitch = src->getInternalPitchB();
|
int dstBytes = dst->bytes(dst->Surface::getInternalFormat());
|
int dstPitch = dst->getInternalPitchB();
|
|
int srcW = src->getWidth();
|
int srcH = src->getHeight();
|
int dstW = dst->getWidth();
|
int dstH = dst->getHeight();
|
|
ASSERT(srcW == srcH && dstW == dstH && srcW == dstW && srcBytes == dstBytes);
|
|
// Src is expressed in the regular [0, width-1], [0, height-1] space
|
int srcDelta = ((srcEdge == TOP) || (srcEdge == BOTTOM)) ? srcBytes : srcPitch;
|
int srcStart = ((srcEdge == BOTTOM) ? srcPitch * (srcH - 1) : ((srcEdge == RIGHT) ? srcBytes * (srcW - 1) : 0));
|
|
// Dst contains borders, so it is expressed in the [-1, width+1], [-1, height+1] space
|
int dstDelta = (((dstEdge == TOP) || (dstEdge == BOTTOM)) ? dstBytes : dstPitch) * (reverse ? -1 : 1);
|
int dstStart = ((dstEdge == BOTTOM) ? dstPitch * (dstH + 1) : ((dstEdge == RIGHT) ? dstBytes * (dstW + 1) : 0)) + (reverse ? dstW * -dstDelta : dstDelta);
|
|
char *srcBuf = (char*)src->lockInternal(0, 0, 0, sw::LOCK_READONLY, sw::PRIVATE) + srcStart;
|
char *dstBuf = (char*)dst->lockInternal(-1, -1, 0, sw::LOCK_READWRITE, sw::PRIVATE) + dstStart;
|
|
for(int i = 0; i < srcW; ++i, dstBuf += dstDelta, srcBuf += srcDelta)
|
{
|
memcpy(dstBuf, srcBuf, srcBytes);
|
}
|
|
if(dstEdge == LEFT || dstEdge == RIGHT)
|
{
|
// TOP and BOTTOM are already set, let's average out the corners
|
int x0 = (dstEdge == RIGHT) ? dstW : -1;
|
int y0 = -1;
|
int x1 = (dstEdge == RIGHT) ? dstW - 1 : 0;
|
int y1 = 0;
|
dst->computeCubeCorner(x0, y0, x1, y1);
|
y0 = dstH;
|
y1 = dstH - 1;
|
dst->computeCubeCorner(x0, y0, x1, y1);
|
}
|
|
src->unlockInternal();
|
dst->unlockInternal();
|
}
|
|
void Surface::computeCubeCorner(int x0, int y0, int x1, int y1)
|
{
|
ASSERT(internal.lock != LOCK_UNLOCKED);
|
|
sw::Color<float> color = internal.read(x0, y1);
|
color += internal.read(x1, y0);
|
color += internal.read(x1, y1);
|
color *= (1.0f / 3.0f);
|
|
internal.write(x0, y0, color);
|
}
|
|
bool Surface::hasStencil() const
|
{
|
return isStencil(external.format);
|
}
|
|
bool Surface::hasDepth() const
|
{
|
return isDepth(external.format);
|
}
|
|
bool Surface::hasPalette() const
|
{
|
return isPalette(external.format);
|
}
|
|
bool Surface::isRenderTarget() const
|
{
|
return renderTarget;
|
}
|
|
bool Surface::hasDirtyContents() const
|
{
|
return dirtyContents;
|
}
|
|
void Surface::markContentsClean()
|
{
|
dirtyContents = false;
|
}
|
|
Resource *Surface::getResource()
|
{
|
return resource;
|
}
|
|
bool Surface::identicalBuffers() const
|
{
|
return external.format == internal.format &&
|
external.width == internal.width &&
|
external.height == internal.height &&
|
external.depth == internal.depth &&
|
external.pitchB == internal.pitchB &&
|
external.sliceB == internal.sliceB &&
|
external.border == internal.border &&
|
external.samples == internal.samples;
|
}
|
|
Format Surface::selectInternalFormat(Format format) const
|
{
|
switch(format)
|
{
|
case FORMAT_NULL:
|
return FORMAT_NULL;
|
case FORMAT_P8:
|
case FORMAT_A8P8:
|
case FORMAT_A4R4G4B4:
|
case FORMAT_A1R5G5B5:
|
case FORMAT_A8R3G3B2:
|
return FORMAT_A8R8G8B8;
|
case FORMAT_A8:
|
return FORMAT_A8;
|
case FORMAT_R8I:
|
return FORMAT_R8I;
|
case FORMAT_R8UI:
|
return FORMAT_R8UI;
|
case FORMAT_R8_SNORM:
|
return FORMAT_R8_SNORM;
|
case FORMAT_R8:
|
return FORMAT_R8;
|
case FORMAT_R16I:
|
return FORMAT_R16I;
|
case FORMAT_R16UI:
|
return FORMAT_R16UI;
|
case FORMAT_R32I:
|
return FORMAT_R32I;
|
case FORMAT_R32UI:
|
return FORMAT_R32UI;
|
case FORMAT_X16B16G16R16I:
|
return FORMAT_X16B16G16R16I;
|
case FORMAT_A16B16G16R16I:
|
return FORMAT_A16B16G16R16I;
|
case FORMAT_X16B16G16R16UI:
|
return FORMAT_X16B16G16R16UI;
|
case FORMAT_A16B16G16R16UI:
|
return FORMAT_A16B16G16R16UI;
|
case FORMAT_A2R10G10B10:
|
case FORMAT_A2B10G10R10:
|
case FORMAT_A16B16G16R16:
|
return FORMAT_A16B16G16R16;
|
case FORMAT_A2B10G10R10UI:
|
return FORMAT_A16B16G16R16UI;
|
case FORMAT_X32B32G32R32I:
|
return FORMAT_X32B32G32R32I;
|
case FORMAT_A32B32G32R32I:
|
return FORMAT_A32B32G32R32I;
|
case FORMAT_X32B32G32R32UI:
|
return FORMAT_X32B32G32R32UI;
|
case FORMAT_A32B32G32R32UI:
|
return FORMAT_A32B32G32R32UI;
|
case FORMAT_G8R8I:
|
return FORMAT_G8R8I;
|
case FORMAT_G8R8UI:
|
return FORMAT_G8R8UI;
|
case FORMAT_G8R8_SNORM:
|
return FORMAT_G8R8_SNORM;
|
case FORMAT_G8R8:
|
return FORMAT_G8R8;
|
case FORMAT_G16R16I:
|
return FORMAT_G16R16I;
|
case FORMAT_G16R16UI:
|
return FORMAT_G16R16UI;
|
case FORMAT_G16R16:
|
return FORMAT_G16R16;
|
case FORMAT_G32R32I:
|
return FORMAT_G32R32I;
|
case FORMAT_G32R32UI:
|
return FORMAT_G32R32UI;
|
case FORMAT_A8R8G8B8:
|
if(lockable || !quadLayoutEnabled)
|
{
|
return FORMAT_A8R8G8B8;
|
}
|
else
|
{
|
return FORMAT_A8G8R8B8Q;
|
}
|
case FORMAT_A8B8G8R8I:
|
return FORMAT_A8B8G8R8I;
|
case FORMAT_A8B8G8R8UI:
|
return FORMAT_A8B8G8R8UI;
|
case FORMAT_A8B8G8R8_SNORM:
|
return FORMAT_A8B8G8R8_SNORM;
|
case FORMAT_R5G5B5A1:
|
case FORMAT_R4G4B4A4:
|
case FORMAT_A8B8G8R8:
|
return FORMAT_A8B8G8R8;
|
case FORMAT_R5G6B5:
|
return FORMAT_R5G6B5;
|
case FORMAT_R3G3B2:
|
case FORMAT_R8G8B8:
|
case FORMAT_X4R4G4B4:
|
case FORMAT_X1R5G5B5:
|
case FORMAT_X8R8G8B8:
|
if(lockable || !quadLayoutEnabled)
|
{
|
return FORMAT_X8R8G8B8;
|
}
|
else
|
{
|
return FORMAT_X8G8R8B8Q;
|
}
|
case FORMAT_X8B8G8R8I:
|
return FORMAT_X8B8G8R8I;
|
case FORMAT_X8B8G8R8UI:
|
return FORMAT_X8B8G8R8UI;
|
case FORMAT_X8B8G8R8_SNORM:
|
return FORMAT_X8B8G8R8_SNORM;
|
case FORMAT_B8G8R8:
|
case FORMAT_X8B8G8R8:
|
return FORMAT_X8B8G8R8;
|
case FORMAT_SRGB8_X8:
|
return FORMAT_SRGB8_X8;
|
case FORMAT_SRGB8_A8:
|
return FORMAT_SRGB8_A8;
|
// Compressed formats
|
case FORMAT_DXT1:
|
case FORMAT_DXT3:
|
case FORMAT_DXT5:
|
case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
|
case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
|
case FORMAT_RGBA8_ETC2_EAC:
|
case FORMAT_SRGB8_ALPHA8_ETC2_EAC:
|
case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR:
|
case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR:
|
return FORMAT_A8R8G8B8;
|
case FORMAT_RGBA_ASTC_4x4_KHR:
|
case FORMAT_RGBA_ASTC_5x4_KHR:
|
case FORMAT_RGBA_ASTC_5x5_KHR:
|
case FORMAT_RGBA_ASTC_6x5_KHR:
|
case FORMAT_RGBA_ASTC_6x6_KHR:
|
case FORMAT_RGBA_ASTC_8x5_KHR:
|
case FORMAT_RGBA_ASTC_8x6_KHR:
|
case FORMAT_RGBA_ASTC_8x8_KHR:
|
case FORMAT_RGBA_ASTC_10x5_KHR:
|
case FORMAT_RGBA_ASTC_10x6_KHR:
|
case FORMAT_RGBA_ASTC_10x8_KHR:
|
case FORMAT_RGBA_ASTC_10x10_KHR:
|
case FORMAT_RGBA_ASTC_12x10_KHR:
|
case FORMAT_RGBA_ASTC_12x12_KHR:
|
// ASTC supports HDR, so a floating point format is required to represent it properly
|
return FORMAT_A32B32G32R32F; // FIXME: 16FP is probably sufficient, but it's currently unsupported
|
case FORMAT_ATI1:
|
return FORMAT_R8;
|
case FORMAT_R11_EAC:
|
case FORMAT_SIGNED_R11_EAC:
|
return FORMAT_R32F; // FIXME: Signed 8bit format would be sufficient
|
case FORMAT_ATI2:
|
return FORMAT_G8R8;
|
case FORMAT_RG11_EAC:
|
case FORMAT_SIGNED_RG11_EAC:
|
return FORMAT_G32R32F; // FIXME: Signed 8bit format would be sufficient
|
case FORMAT_ETC1:
|
case FORMAT_RGB8_ETC2:
|
case FORMAT_SRGB8_ETC2:
|
return FORMAT_X8R8G8B8;
|
// Bumpmap formats
|
case FORMAT_V8U8: return FORMAT_V8U8;
|
case FORMAT_L6V5U5: return FORMAT_X8L8V8U8;
|
case FORMAT_Q8W8V8U8: return FORMAT_Q8W8V8U8;
|
case FORMAT_X8L8V8U8: return FORMAT_X8L8V8U8;
|
case FORMAT_V16U16: return FORMAT_V16U16;
|
case FORMAT_A2W10V10U10: return FORMAT_A16W16V16U16;
|
case FORMAT_Q16W16V16U16: return FORMAT_Q16W16V16U16;
|
// Floating-point formats
|
case FORMAT_A16F: return FORMAT_A32B32G32R32F;
|
case FORMAT_R16F: return FORMAT_R32F;
|
case FORMAT_G16R16F: return FORMAT_G32R32F;
|
case FORMAT_B16G16R16F: return FORMAT_X32B32G32R32F;
|
case FORMAT_X16B16G16R16F: return FORMAT_X32B32G32R32F;
|
case FORMAT_A16B16G16R16F: return FORMAT_A32B32G32R32F;
|
case FORMAT_X16B16G16R16F_UNSIGNED: return FORMAT_X32B32G32R32F_UNSIGNED;
|
case FORMAT_A32F: return FORMAT_A32B32G32R32F;
|
case FORMAT_R32F: return FORMAT_R32F;
|
case FORMAT_G32R32F: return FORMAT_G32R32F;
|
case FORMAT_B32G32R32F: return FORMAT_X32B32G32R32F;
|
case FORMAT_X32B32G32R32F: return FORMAT_X32B32G32R32F;
|
case FORMAT_A32B32G32R32F: return FORMAT_A32B32G32R32F;
|
case FORMAT_X32B32G32R32F_UNSIGNED: return FORMAT_X32B32G32R32F_UNSIGNED;
|
// Luminance formats
|
case FORMAT_L8: return FORMAT_L8;
|
case FORMAT_A4L4: return FORMAT_A8L8;
|
case FORMAT_L16: return FORMAT_L16;
|
case FORMAT_A8L8: return FORMAT_A8L8;
|
case FORMAT_L16F: return FORMAT_X32B32G32R32F;
|
case FORMAT_A16L16F: return FORMAT_A32B32G32R32F;
|
case FORMAT_L32F: return FORMAT_X32B32G32R32F;
|
case FORMAT_A32L32F: return FORMAT_A32B32G32R32F;
|
// Depth/stencil formats
|
case FORMAT_D16:
|
case FORMAT_D32:
|
case FORMAT_D24X8:
|
if(hasParent) // Texture
|
{
|
return FORMAT_D32F_SHADOW;
|
}
|
else if(complementaryDepthBuffer)
|
{
|
return FORMAT_D32F_COMPLEMENTARY;
|
}
|
else
|
{
|
return FORMAT_D32F;
|
}
|
case FORMAT_D24S8:
|
case FORMAT_D24FS8:
|
if(hasParent) // Texture
|
{
|
return FORMAT_D32FS8_SHADOW;
|
}
|
else if(complementaryDepthBuffer)
|
{
|
return FORMAT_D32FS8_COMPLEMENTARY;
|
}
|
else
|
{
|
return FORMAT_D32FS8;
|
}
|
case FORMAT_D32F: return FORMAT_D32F;
|
case FORMAT_D32FS8: return FORMAT_D32FS8;
|
case FORMAT_D32F_LOCKABLE: return FORMAT_D32F_LOCKABLE;
|
case FORMAT_D32FS8_TEXTURE: return FORMAT_D32FS8_TEXTURE;
|
case FORMAT_INTZ: return FORMAT_D32FS8_TEXTURE;
|
case FORMAT_DF24S8: return FORMAT_D32FS8_SHADOW;
|
case FORMAT_DF16S8: return FORMAT_D32FS8_SHADOW;
|
case FORMAT_S8: return FORMAT_S8;
|
// YUV formats
|
case FORMAT_YV12_BT601: return FORMAT_YV12_BT601;
|
case FORMAT_YV12_BT709: return FORMAT_YV12_BT709;
|
case FORMAT_YV12_JFIF: return FORMAT_YV12_JFIF;
|
default:
|
ASSERT(false);
|
}
|
|
return FORMAT_NULL;
|
}
|
|
void Surface::setTexturePalette(unsigned int *palette)
|
{
|
Surface::palette = palette;
|
Surface::paletteID++;
|
}
|
|
void Surface::resolve()
|
{
|
if(internal.samples <= 1 || !internal.dirty || !renderTarget || internal.format == FORMAT_NULL)
|
{
|
return;
|
}
|
|
ASSERT(internal.depth == 1); // Unimplemented
|
|
void *source = internal.lockRect(0, 0, 0, LOCK_READWRITE);
|
|
int width = internal.width;
|
int height = internal.height;
|
int pitch = internal.pitchB;
|
int slice = internal.sliceB;
|
|
unsigned char *source0 = (unsigned char*)source;
|
unsigned char *source1 = source0 + slice;
|
unsigned char *source2 = source1 + slice;
|
unsigned char *source3 = source2 + slice;
|
unsigned char *source4 = source3 + slice;
|
unsigned char *source5 = source4 + slice;
|
unsigned char *source6 = source5 + slice;
|
unsigned char *source7 = source6 + slice;
|
unsigned char *source8 = source7 + slice;
|
unsigned char *source9 = source8 + slice;
|
unsigned char *sourceA = source9 + slice;
|
unsigned char *sourceB = sourceA + slice;
|
unsigned char *sourceC = sourceB + slice;
|
unsigned char *sourceD = sourceC + slice;
|
unsigned char *sourceE = sourceD + slice;
|
unsigned char *sourceF = sourceE + slice;
|
|
if(internal.format == FORMAT_X8R8G8B8 || internal.format == FORMAT_A8R8G8B8 ||
|
internal.format == FORMAT_X8B8G8R8 || internal.format == FORMAT_A8B8G8R8 ||
|
internal.format == FORMAT_SRGB8_X8 || internal.format == FORMAT_SRGB8_A8)
|
{
|
#if defined(__i386__) || defined(__x86_64__)
|
if(CPUID::supportsSSE2() && (width % 4) == 0)
|
{
|
if(internal.samples == 2)
|
{
|
for(int y = 0; y < height; y++)
|
{
|
for(int x = 0; x < width; x += 4)
|
{
|
__m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
|
__m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
|
|
c0 = _mm_avg_epu8(c0, c1);
|
|
_mm_store_si128((__m128i*)(source0 + 4 * x), c0);
|
}
|
|
source0 += pitch;
|
source1 += pitch;
|
}
|
}
|
else if(internal.samples == 4)
|
{
|
for(int y = 0; y < height; y++)
|
{
|
for(int x = 0; x < width; x += 4)
|
{
|
__m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
|
__m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
|
__m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x));
|
__m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x));
|
|
c0 = _mm_avg_epu8(c0, c1);
|
c2 = _mm_avg_epu8(c2, c3);
|
c0 = _mm_avg_epu8(c0, c2);
|
|
_mm_store_si128((__m128i*)(source0 + 4 * x), c0);
|
}
|
|
source0 += pitch;
|
source1 += pitch;
|
source2 += pitch;
|
source3 += pitch;
|
}
|
}
|
else if(internal.samples == 8)
|
{
|
for(int y = 0; y < height; y++)
|
{
|
for(int x = 0; x < width; x += 4)
|
{
|
__m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
|
__m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
|
__m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x));
|
__m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x));
|
__m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x));
|
__m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x));
|
__m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x));
|
__m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x));
|
|
c0 = _mm_avg_epu8(c0, c1);
|
c2 = _mm_avg_epu8(c2, c3);
|
c4 = _mm_avg_epu8(c4, c5);
|
c6 = _mm_avg_epu8(c6, c7);
|
c0 = _mm_avg_epu8(c0, c2);
|
c4 = _mm_avg_epu8(c4, c6);
|
c0 = _mm_avg_epu8(c0, c4);
|
|
_mm_store_si128((__m128i*)(source0 + 4 * x), c0);
|
}
|
|
source0 += pitch;
|
source1 += pitch;
|
source2 += pitch;
|
source3 += pitch;
|
source4 += pitch;
|
source5 += pitch;
|
source6 += pitch;
|
source7 += pitch;
|
}
|
}
|
else if(internal.samples == 16)
|
{
|
for(int y = 0; y < height; y++)
|
{
|
for(int x = 0; x < width; x += 4)
|
{
|
__m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
|
__m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
|
__m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x));
|
__m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x));
|
__m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x));
|
__m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x));
|
__m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x));
|
__m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x));
|
__m128i c8 = _mm_load_si128((__m128i*)(source8 + 4 * x));
|
__m128i c9 = _mm_load_si128((__m128i*)(source9 + 4 * x));
|
__m128i cA = _mm_load_si128((__m128i*)(sourceA + 4 * x));
|
__m128i cB = _mm_load_si128((__m128i*)(sourceB + 4 * x));
|
__m128i cC = _mm_load_si128((__m128i*)(sourceC + 4 * x));
|
__m128i cD = _mm_load_si128((__m128i*)(sourceD + 4 * x));
|
__m128i cE = _mm_load_si128((__m128i*)(sourceE + 4 * x));
|
__m128i cF = _mm_load_si128((__m128i*)(sourceF + 4 * x));
|
|
c0 = _mm_avg_epu8(c0, c1);
|
c2 = _mm_avg_epu8(c2, c3);
|
c4 = _mm_avg_epu8(c4, c5);
|
c6 = _mm_avg_epu8(c6, c7);
|
c8 = _mm_avg_epu8(c8, c9);
|
cA = _mm_avg_epu8(cA, cB);
|
cC = _mm_avg_epu8(cC, cD);
|
cE = _mm_avg_epu8(cE, cF);
|
c0 = _mm_avg_epu8(c0, c2);
|
c4 = _mm_avg_epu8(c4, c6);
|
c8 = _mm_avg_epu8(c8, cA);
|
cC = _mm_avg_epu8(cC, cE);
|
c0 = _mm_avg_epu8(c0, c4);
|
c8 = _mm_avg_epu8(c8, cC);
|
c0 = _mm_avg_epu8(c0, c8);
|
|
_mm_store_si128((__m128i*)(source0 + 4 * x), c0);
|
}
|
|
source0 += pitch;
|
source1 += pitch;
|
source2 += pitch;
|
source3 += pitch;
|
source4 += pitch;
|
source5 += pitch;
|
source6 += pitch;
|
source7 += pitch;
|
source8 += pitch;
|
source9 += pitch;
|
sourceA += pitch;
|
sourceB += pitch;
|
sourceC += pitch;
|
sourceD += pitch;
|
sourceE += pitch;
|
sourceF += pitch;
|
}
|
}
|
else ASSERT(false);
|
}
|
else
|
#endif
|
{
|
#define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7F7F7F7F) + (((x) ^ (y)) & 0x01010101))
|
|
if(internal.samples == 2)
|
{
|
for(int y = 0; y < height; y++)
|
{
|
for(int x = 0; x < width; x++)
|
{
|
unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
|
unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
|
|
c0 = AVERAGE(c0, c1);
|
|
*(unsigned int*)(source0 + 4 * x) = c0;
|
}
|
|
source0 += pitch;
|
source1 += pitch;
|
}
|
}
|
else if(internal.samples == 4)
|
{
|
for(int y = 0; y < height; y++)
|
{
|
for(int x = 0; x < width; x++)
|
{
|
unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
|
unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
|
unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
|
unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
|
|
c0 = AVERAGE(c0, c1);
|
c2 = AVERAGE(c2, c3);
|
c0 = AVERAGE(c0, c2);
|
|
*(unsigned int*)(source0 + 4 * x) = c0;
|
}
|
|
source0 += pitch;
|
source1 += pitch;
|
source2 += pitch;
|
source3 += pitch;
|
}
|
}
|
else if(internal.samples == 8)
|
{
|
for(int y = 0; y < height; y++)
|
{
|
for(int x = 0; x < width; x++)
|
{
|
unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
|
unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
|
unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
|
unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
|
unsigned int c4 = *(unsigned int*)(source4 + 4 * x);
|
unsigned int c5 = *(unsigned int*)(source5 + 4 * x);
|
unsigned int c6 = *(unsigned int*)(source6 + 4 * x);
|
unsigned int c7 = *(unsigned int*)(source7 + 4 * x);
|
|
c0 = AVERAGE(c0, c1);
|
c2 = AVERAGE(c2, c3);
|
c4 = AVERAGE(c4, c5);
|
c6 = AVERAGE(c6, c7);
|
c0 = AVERAGE(c0, c2);
|
c4 = AVERAGE(c4, c6);
|
c0 = AVERAGE(c0, c4);
|
|
*(unsigned int*)(source0 + 4 * x) = c0;
|
}
|
|
source0 += pitch;
|
source1 += pitch;
|
source2 += pitch;
|
source3 += pitch;
|
source4 += pitch;
|
source5 += pitch;
|
source6 += pitch;
|
source7 += pitch;
|
}
|
}
|
else if(internal.samples == 16)
|
{
|
for(int y = 0; y < height; y++)
|
{
|
for(int x = 0; x < width; x++)
|
{
|
unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
|
unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
|
unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
|
unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
|
unsigned int c4 = *(unsigned int*)(source4 + 4 * x);
|
unsigned int c5 = *(unsigned int*)(source5 + 4 * x);
|
unsigned int c6 = *(unsigned int*)(source6 + 4 * x);
|
unsigned int c7 = *(unsigned int*)(source7 + 4 * x);
|
unsigned int c8 = *(unsigned int*)(source8 + 4 * x);
|
unsigned int c9 = *(unsigned int*)(source9 + 4 * x);
|
unsigned int cA = *(unsigned int*)(sourceA + 4 * x);
|
unsigned int cB = *(unsigned int*)(sourceB + 4 * x);
|
unsigned int cC = *(unsigned int*)(sourceC + 4 * x);
|
unsigned int cD = *(unsigned int*)(sourceD + 4 * x);
|
unsigned int cE = *(unsigned int*)(sourceE + 4 * x);
|
unsigned int cF = *(unsigned int*)(sourceF + 4 * x);
|
|
c0 = AVERAGE(c0, c1);
|
c2 = AVERAGE(c2, c3);
|
c4 = AVERAGE(c4, c5);
|
c6 = AVERAGE(c6, c7);
|
c8 = AVERAGE(c8, c9);
|
cA = AVERAGE(cA, cB);
|
cC = AVERAGE(cC, cD);
|
cE = AVERAGE(cE, cF);
|
c0 = AVERAGE(c0, c2);
|
c4 = AVERAGE(c4, c6);
|
c8 = AVERAGE(c8, cA);
|
cC = AVERAGE(cC, cE);
|
c0 = AVERAGE(c0, c4);
|
c8 = AVERAGE(c8, cC);
|
c0 = AVERAGE(c0, c8);
|
|
*(unsigned int*)(source0 + 4 * x) = c0;
|
}
|
|
source0 += pitch;
|
source1 += pitch;
|
source2 += pitch;
|
source3 += pitch;
|
source4 += pitch;
|
source5 += pitch;
|
source6 += pitch;
|
source7 += pitch;
|
source8 += pitch;
|
source9 += pitch;
|
sourceA += pitch;
|
sourceB += pitch;
|
sourceC += pitch;
|
sourceD += pitch;
|
sourceE += pitch;
|
sourceF += pitch;
|
}
|
}
|
else ASSERT(false);
|
|
#undef AVERAGE
|
}
|
}
|
else if(internal.format == FORMAT_G16R16)
|
{
|
|
#if defined(__i386__) || defined(__x86_64__)
|
if(CPUID::supportsSSE2() && (width % 4) == 0)
|
{
|
if(internal.samples == 2)
|
{
|
for(int y = 0; y < height; y++)
|
{
|
for(int x = 0; x < width; x += 4)
|
{
|
__m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
|
__m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
|
|
c0 = _mm_avg_epu16(c0, c1);
|
|
_mm_store_si128((__m128i*)(source0 + 4 * x), c0);
|
}
|
|
source0 += pitch;
|
source1 += pitch;
|
}
|
}
|
else if(internal.samples == 4)
|
{
|
for(int y = 0; y < height; y++)
|
{
|
for(int x = 0; x < width; x += 4)
|
{
|
__m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
|
__m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
|
__m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x));
|
__m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x));
|
|
c0 = _mm_avg_epu16(c0, c1);
|
c2 = _mm_avg_epu16(c2, c3);
|
c0 = _mm_avg_epu16(c0, c2);
|
|
_mm_store_si128((__m128i*)(source0 + 4 * x), c0);
|
}
|
|
source0 += pitch;
|
source1 += pitch;
|
source2 += pitch;
|
source3 += pitch;
|
}
|
}
|
else if(internal.samples == 8)
|
{
|
for(int y = 0; y < height; y++)
|
{
|
for(int x = 0; x < width; x += 4)
|
{
|
__m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
|
__m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
|
__m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x));
|
__m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x));
|
__m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x));
|
__m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x));
|
__m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x));
|
__m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x));
|
|
c0 = _mm_avg_epu16(c0, c1);
|
c2 = _mm_avg_epu16(c2, c3);
|
c4 = _mm_avg_epu16(c4, c5);
|
c6 = _mm_avg_epu16(c6, c7);
|
c0 = _mm_avg_epu16(c0, c2);
|
c4 = _mm_avg_epu16(c4, c6);
|
c0 = _mm_avg_epu16(c0, c4);
|
|
_mm_store_si128((__m128i*)(source0 + 4 * x), c0);
|
}
|
|
source0 += pitch;
|
source1 += pitch;
|
source2 += pitch;
|
source3 += pitch;
|
source4 += pitch;
|
source5 += pitch;
|
source6 += pitch;
|
source7 += pitch;
|
}
|
}
|
else if(internal.samples == 16)
|
{
|
for(int y = 0; y < height; y++)
|
{
|
for(int x = 0; x < width; x += 4)
|
{
|
__m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
|
__m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
|
__m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x));
|
__m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x));
|
__m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x));
|
__m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x));
|
__m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x));
|
__m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x));
|
__m128i c8 = _mm_load_si128((__m128i*)(source8 + 4 * x));
|
__m128i c9 = _mm_load_si128((__m128i*)(source9 + 4 * x));
|
__m128i cA = _mm_load_si128((__m128i*)(sourceA + 4 * x));
|
__m128i cB = _mm_load_si128((__m128i*)(sourceB + 4 * x));
|
__m128i cC = _mm_load_si128((__m128i*)(sourceC + 4 * x));
|
__m128i cD = _mm_load_si128((__m128i*)(sourceD + 4 * x));
|
__m128i cE = _mm_load_si128((__m128i*)(sourceE + 4 * x));
|
__m128i cF = _mm_load_si128((__m128i*)(sourceF + 4 * x));
|
|
c0 = _mm_avg_epu16(c0, c1);
|
c2 = _mm_avg_epu16(c2, c3);
|
c4 = _mm_avg_epu16(c4, c5);
|
c6 = _mm_avg_epu16(c6, c7);
|
c8 = _mm_avg_epu16(c8, c9);
|
cA = _mm_avg_epu16(cA, cB);
|
cC = _mm_avg_epu16(cC, cD);
|
cE = _mm_avg_epu16(cE, cF);
|
c0 = _mm_avg_epu16(c0, c2);
|
c4 = _mm_avg_epu16(c4, c6);
|
c8 = _mm_avg_epu16(c8, cA);
|
cC = _mm_avg_epu16(cC, cE);
|
c0 = _mm_avg_epu16(c0, c4);
|
c8 = _mm_avg_epu16(c8, cC);
|
c0 = _mm_avg_epu16(c0, c8);
|
|
_mm_store_si128((__m128i*)(source0 + 4 * x), c0);
|
}
|
|
source0 += pitch;
|
source1 += pitch;
|
source2 += pitch;
|
source3 += pitch;
|
source4 += pitch;
|
source5 += pitch;
|
source6 += pitch;
|
source7 += pitch;
|
source8 += pitch;
|
source9 += pitch;
|
sourceA += pitch;
|
sourceB += pitch;
|
sourceC += pitch;
|
sourceD += pitch;
|
sourceE += pitch;
|
sourceF += pitch;
|
}
|
}
|
else ASSERT(false);
|
}
|
else
|
#endif
|
{
|
#define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7FFF7FFF) + (((x) ^ (y)) & 0x00010001))
|
|
if(internal.samples == 2)
|
{
|
for(int y = 0; y < height; y++)
|
{
|
for(int x = 0; x < width; x++)
|
{
|
unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
|
unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
|
|
c0 = AVERAGE(c0, c1);
|
|
*(unsigned int*)(source0 + 4 * x) = c0;
|
}
|
|
source0 += pitch;
|
source1 += pitch;
|
}
|
}
|
else if(internal.samples == 4)
|
{
|
for(int y = 0; y < height; y++)
|
{
|
for(int x = 0; x < width; x++)
|
{
|
unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
|
unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
|
unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
|
unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
|
|
c0 = AVERAGE(c0, c1);
|
c2 = AVERAGE(c2, c3);
|
c0 = AVERAGE(c0, c2);
|
|
*(unsigned int*)(source0 + 4 * x) = c0;
|
}
|
|
source0 += pitch;
|
source1 += pitch;
|
source2 += pitch;
|
source3 += pitch;
|
}
|
}
|
else if(internal.samples == 8)
|
{
|
for(int y = 0; y < height; y++)
|
{
|
for(int x = 0; x < width; x++)
|
{
|
unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
|
unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
|
unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
|
unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
|
unsigned int c4 = *(unsigned int*)(source4 + 4 * x);
|
unsigned int c5 = *(unsigned int*)(source5 + 4 * x);
|
unsigned int c6 = *(unsigned int*)(source6 + 4 * x);
|
unsigned int c7 = *(unsigned int*)(source7 + 4 * x);
|
|
c0 = AVERAGE(c0, c1);
|
c2 = AVERAGE(c2, c3);
|
c4 = AVERAGE(c4, c5);
|
c6 = AVERAGE(c6, c7);
|
c0 = AVERAGE(c0, c2);
|
c4 = AVERAGE(c4, c6);
|
c0 = AVERAGE(c0, c4);
|
|
*(unsigned int*)(source0 + 4 * x) = c0;
|
}
|
|
source0 += pitch;
|
source1 += pitch;
|
source2 += pitch;
|
source3 += pitch;
|
source4 += pitch;
|
source5 += pitch;
|
source6 += pitch;
|
source7 += pitch;
|
}
|
}
|
else if(internal.samples == 16)
|
{
|
for(int y = 0; y < height; y++)
|
{
|
for(int x = 0; x < width; x++)
|
{
|
unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
|
unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
|
unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
|
unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
|
unsigned int c4 = *(unsigned int*)(source4 + 4 * x);
|
unsigned int c5 = *(unsigned int*)(source5 + 4 * x);
|
unsigned int c6 = *(unsigned int*)(source6 + 4 * x);
|
unsigned int c7 = *(unsigned int*)(source7 + 4 * x);
|
unsigned int c8 = *(unsigned int*)(source8 + 4 * x);
|
unsigned int c9 = *(unsigned int*)(source9 + 4 * x);
|
unsigned int cA = *(unsigned int*)(sourceA + 4 * x);
|
unsigned int cB = *(unsigned int*)(sourceB + 4 * x);
|
unsigned int cC = *(unsigned int*)(sourceC + 4 * x);
|
unsigned int cD = *(unsigned int*)(sourceD + 4 * x);
|
unsigned int cE = *(unsigned int*)(sourceE + 4 * x);
|
unsigned int cF = *(unsigned int*)(sourceF + 4 * x);
|
|
c0 = AVERAGE(c0, c1);
|
c2 = AVERAGE(c2, c3);
|
c4 = AVERAGE(c4, c5);
|
c6 = AVERAGE(c6, c7);
|
c8 = AVERAGE(c8, c9);
|
cA = AVERAGE(cA, cB);
|
cC = AVERAGE(cC, cD);
|
cE = AVERAGE(cE, cF);
|
c0 = AVERAGE(c0, c2);
|
c4 = AVERAGE(c4, c6);
|
c8 = AVERAGE(c8, cA);
|
cC = AVERAGE(cC, cE);
|
c0 = AVERAGE(c0, c4);
|
c8 = AVERAGE(c8, cC);
|
c0 = AVERAGE(c0, c8);
|
|
*(unsigned int*)(source0 + 4 * x) = c0;
|
}
|
|
source0 += pitch;
|
source1 += pitch;
|
source2 += pitch;
|
source3 += pitch;
|
source4 += pitch;
|
source5 += pitch;
|
source6 += pitch;
|
source7 += pitch;
|
source8 += pitch;
|
source9 += pitch;
|
sourceA += pitch;
|
sourceB += pitch;
|
sourceC += pitch;
|
sourceD += pitch;
|
sourceE += pitch;
|
sourceF += pitch;
|
}
|
}
|
else ASSERT(false);
|
|
#undef AVERAGE
|
}
|
}
|
else if(internal.format == FORMAT_A16B16G16R16)
|
{
|
#if defined(__i386__) || defined(__x86_64__)
|
if(CPUID::supportsSSE2() && (width % 2) == 0)
|
{
|
if(internal.samples == 2)
|
{
|
for(int y = 0; y < height; y++)
|
{
|
for(int x = 0; x < width; x += 2)
|
{
|
__m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x));
|
__m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x));
|
|
c0 = _mm_avg_epu16(c0, c1);
|
|
_mm_store_si128((__m128i*)(source0 + 8 * x), c0);
|
}
|
|
source0 += pitch;
|
source1 += pitch;
|
}
|
}
|
else if(internal.samples == 4)
|
{
|
for(int y = 0; y < height; y++)
|
{
|
for(int x = 0; x < width; x += 2)
|
{
|
__m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x));
|
__m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x));
|
__m128i c2 = _mm_load_si128((__m128i*)(source2 + 8 * x));
|
__m128i c3 = _mm_load_si128((__m128i*)(source3 + 8 * x));
|
|
c0 = _mm_avg_epu16(c0, c1);
|
c2 = _mm_avg_epu16(c2, c3);
|
c0 = _mm_avg_epu16(c0, c2);
|
|
_mm_store_si128((__m128i*)(source0 + 8 * x), c0);
|
}
|
|
source0 += pitch;
|
source1 += pitch;
|
source2 += pitch;
|
source3 += pitch;
|
}
|
}
|
else if(internal.samples == 8)
|
{
|
for(int y = 0; y < height; y++)
|
{
|
for(int x = 0; x < width; x += 2)
|
{
|
__m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x));
|
__m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x));
|
__m128i c2 = _mm_load_si128((__m128i*)(source2 + 8 * x));
|
__m128i c3 = _mm_load_si128((__m128i*)(source3 + 8 * x));
|
__m128i c4 = _mm_load_si128((__m128i*)(source4 + 8 * x));
|
__m128i c5 = _mm_load_si128((__m128i*)(source5 + 8 * x));
|
__m128i c6 = _mm_load_si128((__m128i*)(source6 + 8 * x));
|
__m128i c7 = _mm_load_si128((__m128i*)(source7 + 8 * x));
|
|
c0 = _mm_avg_epu16(c0, c1);
|
c2 = _mm_avg_epu16(c2, c3);
|
c4 = _mm_avg_epu16(c4, c5);
|
c6 = _mm_avg_epu16(c6, c7);
|
c0 = _mm_avg_epu16(c0, c2);
|
c4 = _mm_avg_epu16(c4, c6);
|
c0 = _mm_avg_epu16(c0, c4);
|
|
_mm_store_si128((__m128i*)(source0 + 8 * x), c0);
|
}
|
|
source0 += pitch;
|
source1 += pitch;
|
source2 += pitch;
|
source3 += pitch;
|
source4 += pitch;
|
source5 += pitch;
|
source6 += pitch;
|
source7 += pitch;
|
}
|
}
|
else if(internal.samples == 16)
|
{
|
for(int y = 0; y < height; y++)
|
{
|
for(int x = 0; x < width; x += 2)
|
{
|
__m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x));
|
__m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x));
|
__m128i c2 = _mm_load_si128((__m128i*)(source2 + 8 * x));
|
__m128i c3 = _mm_load_si128((__m128i*)(source3 + 8 * x));
|
__m128i c4 = _mm_load_si128((__m128i*)(source4 + 8 * x));
|
__m128i c5 = _mm_load_si128((__m128i*)(source5 + 8 * x));
|
__m128i c6 = _mm_load_si128((__m128i*)(source6 + 8 * x));
|
__m128i c7 = _mm_load_si128((__m128i*)(source7 + 8 * x));
|
__m128i c8 = _mm_load_si128((__m128i*)(source8 + 8 * x));
|
__m128i c9 = _mm_load_si128((__m128i*)(source9 + 8 * x));
|
__m128i cA = _mm_load_si128((__m128i*)(sourceA + 8 * x));
|
__m128i cB = _mm_load_si128((__m128i*)(sourceB + 8 * x));
|
__m128i cC = _mm_load_si128((__m128i*)(sourceC + 8 * x));
|
__m128i cD = _mm_load_si128((__m128i*)(sourceD + 8 * x));
|
__m128i cE = _mm_load_si128((__m128i*)(sourceE + 8 * x));
|
__m128i cF = _mm_load_si128((__m128i*)(sourceF + 8 * x));
|
|
c0 = _mm_avg_epu16(c0, c1);
|
c2 = _mm_avg_epu16(c2, c3);
|
c4 = _mm_avg_epu16(c4, c5);
|
c6 = _mm_avg_epu16(c6, c7);
|
c8 = _mm_avg_epu16(c8, c9);
|
cA = _mm_avg_epu16(cA, cB);
|
cC = _mm_avg_epu16(cC, cD);
|
cE = _mm_avg_epu16(cE, cF);
|
c0 = _mm_avg_epu16(c0, c2);
|
c4 = _mm_avg_epu16(c4, c6);
|
c8 = _mm_avg_epu16(c8, cA);
|
cC = _mm_avg_epu16(cC, cE);
|
c0 = _mm_avg_epu16(c0, c4);
|
c8 = _mm_avg_epu16(c8, cC);
|
c0 = _mm_avg_epu16(c0, c8);
|
|
_mm_store_si128((__m128i*)(source0 + 8 * x), c0);
|
}
|
|
source0 += pitch;
|
source1 += pitch;
|
source2 += pitch;
|
source3 += pitch;
|
source4 += pitch;
|
source5 += pitch;
|
source6 += pitch;
|
source7 += pitch;
|
source8 += pitch;
|
source9 += pitch;
|
sourceA += pitch;
|
sourceB += pitch;
|
sourceC += pitch;
|
sourceD += pitch;
|
sourceE += pitch;
|
sourceF += pitch;
|
}
|
}
|
else ASSERT(false);
|
}
|
else
|
#endif
|
{
|
#define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7FFF7FFF) + (((x) ^ (y)) & 0x00010001))
|
|
if(internal.samples == 2)
|
{
|
for(int y = 0; y < height; y++)
|
{
|
for(int x = 0; x < 2 * width; x++)
|
{
|
unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
|
unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
|
|
c0 = AVERAGE(c0, c1);
|
|
*(unsigned int*)(source0 + 4 * x) = c0;
|
}
|
|
source0 += pitch;
|
source1 += pitch;
|
}
|
}
|
else if(internal.samples == 4)
|
{
|
for(int y = 0; y < height; y++)
|
{
|
for(int x = 0; x < 2 * width; x++)
|
{
|
unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
|
unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
|
unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
|
unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
|
|
c0 = AVERAGE(c0, c1);
|
c2 = AVERAGE(c2, c3);
|
c0 = AVERAGE(c0, c2);
|
|
*(unsigned int*)(source0 + 4 * x) = c0;
|
}
|
|
source0 += pitch;
|
source1 += pitch;
|
source2 += pitch;
|
source3 += pitch;
|
}
|
}
|
else if(internal.samples == 8)
|
{
|
for(int y = 0; y < height; y++)
|
{
|
for(int x = 0; x < 2 * width; x++)
|
{
|
unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
|
unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
|
unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
|
unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
|
unsigned int c4 = *(unsigned int*)(source4 + 4 * x);
|
unsigned int c5 = *(unsigned int*)(source5 + 4 * x);
|
unsigned int c6 = *(unsigned int*)(source6 + 4 * x);
|
unsigned int c7 = *(unsigned int*)(source7 + 4 * x);
|
|
c0 = AVERAGE(c0, c1);
|
c2 = AVERAGE(c2, c3);
|
c4 = AVERAGE(c4, c5);
|
c6 = AVERAGE(c6, c7);
|
c0 = AVERAGE(c0, c2);
|
c4 = AVERAGE(c4, c6);
|
c0 = AVERAGE(c0, c4);
|
|
*(unsigned int*)(source0 + 4 * x) = c0;
|
}
|
|
source0 += pitch;
|
source1 += pitch;
|
source2 += pitch;
|
source3 += pitch;
|
source4 += pitch;
|
source5 += pitch;
|
source6 += pitch;
|
source7 += pitch;
|
}
|
}
|
else if(internal.samples == 16)
|
{
|
for(int y = 0; y < height; y++)
|
{
|
for(int x = 0; x < 2 * width; x++)
|
{
|
unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
|
unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
|
unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
|
unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
|
unsigned int c4 = *(unsigned int*)(source4 + 4 * x);
|
unsigned int c5 = *(unsigned int*)(source5 + 4 * x);
|
unsigned int c6 = *(unsigned int*)(source6 + 4 * x);
|
unsigned int c7 = *(unsigned int*)(source7 + 4 * x);
|
unsigned int c8 = *(unsigned int*)(source8 + 4 * x);
|
unsigned int c9 = *(unsigned int*)(source9 + 4 * x);
|
unsigned int cA = *(unsigned int*)(sourceA + 4 * x);
|
unsigned int cB = *(unsigned int*)(sourceB + 4 * x);
|
unsigned int cC = *(unsigned int*)(sourceC + 4 * x);
|
unsigned int cD = *(unsigned int*)(sourceD + 4 * x);
|
unsigned int cE = *(unsigned int*)(sourceE + 4 * x);
|
unsigned int cF = *(unsigned int*)(sourceF + 4 * x);
|
|
c0 = AVERAGE(c0, c1);
|
c2 = AVERAGE(c2, c3);
|
c4 = AVERAGE(c4, c5);
|
c6 = AVERAGE(c6, c7);
|
c8 = AVERAGE(c8, c9);
|
cA = AVERAGE(cA, cB);
|
cC = AVERAGE(cC, cD);
|
cE = AVERAGE(cE, cF);
|
c0 = AVERAGE(c0, c2);
|
c4 = AVERAGE(c4, c6);
|
c8 = AVERAGE(c8, cA);
|
cC = AVERAGE(cC, cE);
|
c0 = AVERAGE(c0, c4);
|
c8 = AVERAGE(c8, cC);
|
c0 = AVERAGE(c0, c8);
|
|
*(unsigned int*)(source0 + 4 * x) = c0;
|
}
|
|
source0 += pitch;
|
source1 += pitch;
|
source2 += pitch;
|
source3 += pitch;
|
source4 += pitch;
|
source5 += pitch;
|
source6 += pitch;
|
source7 += pitch;
|
source8 += pitch;
|
source9 += pitch;
|
sourceA += pitch;
|
sourceB += pitch;
|
sourceC += pitch;
|
sourceD += pitch;
|
sourceE += pitch;
|
sourceF += pitch;
|
}
|
}
|
else ASSERT(false);
|
|
#undef AVERAGE
|
}
|
}
|
else if(internal.format == FORMAT_R32F)
|
{
|
#if defined(__i386__) || defined(__x86_64__)
|
if(CPUID::supportsSSE() && (width % 4) == 0)
|
{
|
if(internal.samples == 2)
|
{
|
for(int y = 0; y < height; y++)
|
{
|
for(int x = 0; x < width; x += 4)
|
{
|
__m128 c0 = _mm_load_ps((float*)(source0 + 4 * x));
|
__m128 c1 = _mm_load_ps((float*)(source1 + 4 * x));
|
|
c0 = _mm_add_ps(c0, c1);
|
c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 2.0f));
|
|
_mm_store_ps((float*)(source0 + 4 * x), c0);
|
}
|
|
source0 += pitch;
|
source1 += pitch;
|
}
|
}
|
else if(internal.samples == 4)
|
{
|
for(int y = 0; y < height; y++)
|
{
|
for(int x = 0; x < width; x += 4)
|
{
|
__m128 c0 = _mm_load_ps((float*)(source0 + 4 * x));
|
__m128 c1 = _mm_load_ps((float*)(source1 + 4 * x));
|
__m128 c2 = _mm_load_ps((float*)(source2 + 4 * x));
|
__m128 c3 = _mm_load_ps((float*)(source3 + 4 * x));
|
|
c0 = _mm_add_ps(c0, c1);
|
c2 = _mm_add_ps(c2, c3);
|
c0 = _mm_add_ps(c0, c2);
|
c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 4.0f));
|
|
_mm_store_ps((float*)(source0 + 4 * x), c0);
|
}
|
|
source0 += pitch;
|
source1 += pitch;
|
source2 += pitch;
|
source3 += pitch;
|
}
|
}
|
else if(internal.samples == 8)
|
{
|
for(int y = 0; y < height; y++)
|
{
|
for(int x = 0; x < width; x += 4)
|
{
|
__m128 c0 = _mm_load_ps((float*)(source0 + 4 * x));
|
__m128 c1 = _mm_load_ps((float*)(source1 + 4 * x));
|
__m128 c2 = _mm_load_ps((float*)(source2 + 4 * x));
|
__m128 c3 = _mm_load_ps((float*)(source3 + 4 * x));
|
__m128 c4 = _mm_load_ps((float*)(source4 + 4 * x));
|
__m128 c5 = _mm_load_ps((float*)(source5 + 4 * x));
|
__m128 c6 = _mm_load_ps((float*)(source6 + 4 * x));
|
__m128 c7 = _mm_load_ps((float*)(source7 + 4 * x));
|
|
c0 = _mm_add_ps(c0, c1);
|
c2 = _mm_add_ps(c2, c3);
|
c4 = _mm_add_ps(c4, c5);
|
c6 = _mm_add_ps(c6, c7);
|
c0 = _mm_add_ps(c0, c2);
|
c4 = _mm_add_ps(c4, c6);
|
c0 = _mm_add_ps(c0, c4);
|
c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 8.0f));
|
|
_mm_store_ps((float*)(source0 + 4 * x), c0);
|
}
|
|
source0 += pitch;
|
source1 += pitch;
|
source2 += pitch;
|
source3 += pitch;
|
source4 += pitch;
|
source5 += pitch;
|
source6 += pitch;
|
source7 += pitch;
|
}
|
}
|
else if(internal.samples == 16)
|
{
|
for(int y = 0; y < height; y++)
|
{
|
for(int x = 0; x < width; x += 4)
|
{
|
__m128 c0 = _mm_load_ps((float*)(source0 + 4 * x));
|
__m128 c1 = _mm_load_ps((float*)(source1 + 4 * x));
|
__m128 c2 = _mm_load_ps((float*)(source2 + 4 * x));
|
__m128 c3 = _mm_load_ps((float*)(source3 + 4 * x));
|
__m128 c4 = _mm_load_ps((float*)(source4 + 4 * x));
|
__m128 c5 = _mm_load_ps((float*)(source5 + 4 * x));
|
__m128 c6 = _mm_load_ps((float*)(source6 + 4 * x));
|
__m128 c7 = _mm_load_ps((float*)(source7 + 4 * x));
|
__m128 c8 = _mm_load_ps((float*)(source8 + 4 * x));
|
__m128 c9 = _mm_load_ps((float*)(source9 + 4 * x));
|
__m128 cA = _mm_load_ps((float*)(sourceA + 4 * x));
|
__m128 cB = _mm_load_ps((float*)(sourceB + 4 * x));
|
__m128 cC = _mm_load_ps((float*)(sourceC + 4 * x));
|
__m128 cD = _mm_load_ps((float*)(sourceD + 4 * x));
|
__m128 cE = _mm_load_ps((float*)(sourceE + 4 * x));
|
__m128 cF = _mm_load_ps((float*)(sourceF + 4 * x));
|
|
c0 = _mm_add_ps(c0, c1);
|
c2 = _mm_add_ps(c2, c3);
|
c4 = _mm_add_ps(c4, c5);
|
c6 = _mm_add_ps(c6, c7);
|
c8 = _mm_add_ps(c8, c9);
|
cA = _mm_add_ps(cA, cB);
|
cC = _mm_add_ps(cC, cD);
|
cE = _mm_add_ps(cE, cF);
|
c0 = _mm_add_ps(c0, c2);
|
c4 = _mm_add_ps(c4, c6);
|
c8 = _mm_add_ps(c8, cA);
|
cC = _mm_add_ps(cC, cE);
|
c0 = _mm_add_ps(c0, c4);
|
c8 = _mm_add_ps(c8, cC);
|
c0 = _mm_add_ps(c0, c8);
|
c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 16.0f));
|
|
_mm_store_ps((float*)(source0 + 4 * x), c0);
|
}
|
|
source0 += pitch;
|
source1 += pitch;
|
source2 += pitch;
|
source3 += pitch;
|
source4 += pitch;
|
source5 += pitch;
|
source6 += pitch;
|
source7 += pitch;
|
source8 += pitch;
|
source9 += pitch;
|
sourceA += pitch;
|
sourceB += pitch;
|
sourceC += pitch;
|
sourceD += pitch;
|
sourceE += pitch;
|
sourceF += pitch;
|
}
|
}
|
else ASSERT(false);
|
}
|
else
|
#endif
|
{
|
if(internal.samples == 2)
|
{
|
for(int y = 0; y < height; y++)
|
{
|
for(int x = 0; x < width; x++)
|
{
|
float c0 = *(float*)(source0 + 4 * x);
|
float c1 = *(float*)(source1 + 4 * x);
|
|
c0 = c0 + c1;
|
c0 *= 1.0f / 2.0f;
|
|
*(float*)(source0 + 4 * x) = c0;
|
}
|
|
source0 += pitch;
|
source1 += pitch;
|
}
|
}
|
else if(internal.samples == 4)
|
{
|
for(int y = 0; y < height; y++)
|
{
|
for(int x = 0; x < width; x++)
|
{
|
float c0 = *(float*)(source0 + 4 * x);
|
float c1 = *(float*)(source1 + 4 * x);
|
float c2 = *(float*)(source2 + 4 * x);
|
float c3 = *(float*)(source3 + 4 * x);
|
|
c0 = c0 + c1;
|
c2 = c2 + c3;
|
c0 = c0 + c2;
|
c0 *= 1.0f / 4.0f;
|
|
*(float*)(source0 + 4 * x) = c0;
|
}
|
|
source0 += pitch;
|
source1 += pitch;
|
source2 += pitch;
|
source3 += pitch;
|
}
|
}
|
else if(internal.samples == 8)
|
{
|
for(int y = 0; y < height; y++)
|
{
|
for(int x = 0; x < width; x++)
|
{
|
float c0 = *(float*)(source0 + 4 * x);
|
float c1 = *(float*)(source1 + 4 * x);
|
float c2 = *(float*)(source2 + 4 * x);
|
float c3 = *(float*)(source3 + 4 * x);
|
float c4 = *(float*)(source4 + 4 * x);
|
float c5 = *(float*)(source5 + 4 * x);
|
float c6 = *(float*)(source6 + 4 * x);
|
float c7 = *(float*)(source7 + 4 * x);
|
|
c0 = c0 + c1;
|
c2 = c2 + c3;
|
c4 = c4 + c5;
|
c6 = c6 + c7;
|
c0 = c0 + c2;
|
c4 = c4 + c6;
|
c0 = c0 + c4;
|
c0 *= 1.0f / 8.0f;
|
|
*(float*)(source0 + 4 * x) = c0;
|
}
|
|
source0 += pitch;
|
source1 += pitch;
|
source2 += pitch;
|
source3 += pitch;
|
source4 += pitch;
|
source5 += pitch;
|
source6 += pitch;
|
source7 += pitch;
|
}
|
}
|
else if(internal.samples == 16)
|
{
|
for(int y = 0; y < height; y++)
|
{
|
for(int x = 0; x < width; x++)
|
{
|
float c0 = *(float*)(source0 + 4 * x);
|
float c1 = *(float*)(source1 + 4 * x);
|
float c2 = *(float*)(source2 + 4 * x);
|
float c3 = *(float*)(source3 + 4 * x);
|
float c4 = *(float*)(source4 + 4 * x);
|
float c5 = *(float*)(source5 + 4 * x);
|
float c6 = *(float*)(source6 + 4 * x);
|
float c7 = *(float*)(source7 + 4 * x);
|
float c8 = *(float*)(source8 + 4 * x);
|
float c9 = *(float*)(source9 + 4 * x);
|
float cA = *(float*)(sourceA + 4 * x);
|
float cB = *(float*)(sourceB + 4 * x);
|
float cC = *(float*)(sourceC + 4 * x);
|
float cD = *(float*)(sourceD + 4 * x);
|
float cE = *(float*)(sourceE + 4 * x);
|
float cF = *(float*)(sourceF + 4 * x);
|
|
c0 = c0 + c1;
|
c2 = c2 + c3;
|
c4 = c4 + c5;
|
c6 = c6 + c7;
|
c8 = c8 + c9;
|
cA = cA + cB;
|
cC = cC + cD;
|
cE = cE + cF;
|
c0 = c0 + c2;
|
c4 = c4 + c6;
|
c8 = c8 + cA;
|
cC = cC + cE;
|
c0 = c0 + c4;
|
c8 = c8 + cC;
|
c0 = c0 + c8;
|
c0 *= 1.0f / 16.0f;
|
|
*(float*)(source0 + 4 * x) = c0;
|
}
|
|
source0 += pitch;
|
source1 += pitch;
|
source2 += pitch;
|
source3 += pitch;
|
source4 += pitch;
|
source5 += pitch;
|
source6 += pitch;
|
source7 += pitch;
|
source8 += pitch;
|
source9 += pitch;
|
sourceA += pitch;
|
sourceB += pitch;
|
sourceC += pitch;
|
sourceD += pitch;
|
sourceE += pitch;
|
sourceF += pitch;
|
}
|
}
|
else ASSERT(false);
|
}
|
}
|
else if(internal.format == FORMAT_G32R32F)
|
{
|
#if defined(__i386__) || defined(__x86_64__)
|
if(CPUID::supportsSSE() && (width % 2) == 0)
|
{
|
if(internal.samples == 2)
|
{
|
for(int y = 0; y < height; y++)
|
{
|
for(int x = 0; x < width; x += 2)
|
{
|
__m128 c0 = _mm_load_ps((float*)(source0 + 8 * x));
|
__m128 c1 = _mm_load_ps((float*)(source1 + 8 * x));
|
|
c0 = _mm_add_ps(c0, c1);
|
c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 2.0f));
|
|
_mm_store_ps((float*)(source0 + 8 * x), c0);
|
}
|
|
source0 += pitch;
|
source1 += pitch;
|
}
|
}
|
else if(internal.samples == 4)
|
{
|
for(int y = 0; y < height; y++)
|
{
|
for(int x = 0; x < width; x += 2)
|
{
|
__m128 c0 = _mm_load_ps((float*)(source0 + 8 * x));
|
__m128 c1 = _mm_load_ps((float*)(source1 + 8 * x));
|
__m128 c2 = _mm_load_ps((float*)(source2 + 8 * x));
|
__m128 c3 = _mm_load_ps((float*)(source3 + 8 * x));
|
|
c0 = _mm_add_ps(c0, c1);
|
c2 = _mm_add_ps(c2, c3);
|
c0 = _mm_add_ps(c0, c2);
|
c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 4.0f));
|
|
_mm_store_ps((float*)(source0 + 8 * x), c0);
|
}
|
|
source0 += pitch;
|
source1 += pitch;
|
source2 += pitch;
|
source3 += pitch;
|
}
|
}
|
else if(internal.samples == 8)
|
{
|
for(int y = 0; y < height; y++)
|
{
|
for(int x = 0; x < width; x += 2)
|
{
|
__m128 c0 = _mm_load_ps((float*)(source0 + 8 * x));
|
__m128 c1 = _mm_load_ps((float*)(source1 + 8 * x));
|
__m128 c2 = _mm_load_ps((float*)(source2 + 8 * x));
|
__m128 c3 = _mm_load_ps((float*)(source3 + 8 * x));
|
__m128 c4 = _mm_load_ps((float*)(source4 + 8 * x));
|
__m128 c5 = _mm_load_ps((float*)(source5 + 8 * x));
|
__m128 c6 = _mm_load_ps((float*)(source6 + 8 * x));
|
__m128 c7 = _mm_load_ps((float*)(source7 + 8 * x));
|
|
c0 = _mm_add_ps(c0, c1);
|
c2 = _mm_add_ps(c2, c3);
|
c4 = _mm_add_ps(c4, c5);
|
c6 = _mm_add_ps(c6, c7);
|
c0 = _mm_add_ps(c0, c2);
|
c4 = _mm_add_ps(c4, c6);
|
c0 = _mm_add_ps(c0, c4);
|
c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 8.0f));
|
|
_mm_store_ps((float*)(source0 + 8 * x), c0);
|
}
|
|
source0 += pitch;
|
source1 += pitch;
|
source2 += pitch;
|
source3 += pitch;
|
source4 += pitch;
|
source5 += pitch;
|
source6 += pitch;
|
source7 += pitch;
|
}
|
}
|
else if(internal.samples == 16)
|
{
|
for(int y = 0; y < height; y++)
|
{
|
for(int x = 0; x < width; x += 2)
|
{
|
__m128 c0 = _mm_load_ps((float*)(source0 + 8 * x));
|
__m128 c1 = _mm_load_ps((float*)(source1 + 8 * x));
|
__m128 c2 = _mm_load_ps((float*)(source2 + 8 * x));
|
__m128 c3 = _mm_load_ps((float*)(source3 + 8 * x));
|
__m128 c4 = _mm_load_ps((float*)(source4 + 8 * x));
|
__m128 c5 = _mm_load_ps((float*)(source5 + 8 * x));
|
__m128 c6 = _mm_load_ps((float*)(source6 + 8 * x));
|
__m128 c7 = _mm_load_ps((float*)(source7 + 8 * x));
|
__m128 c8 = _mm_load_ps((float*)(source8 + 8 * x));
|
__m128 c9 = _mm_load_ps((float*)(source9 + 8 * x));
|
__m128 cA = _mm_load_ps((float*)(sourceA + 8 * x));
|
__m128 cB = _mm_load_ps((float*)(sourceB + 8 * x));
|
__m128 cC = _mm_load_ps((float*)(sourceC + 8 * x));
|
__m128 cD = _mm_load_ps((float*)(sourceD + 8 * x));
|
__m128 cE = _mm_load_ps((float*)(sourceE + 8 * x));
|
__m128 cF = _mm_load_ps((float*)(sourceF + 8 * x));
|
|
c0 = _mm_add_ps(c0, c1);
|
c2 = _mm_add_ps(c2, c3);
|
c4 = _mm_add_ps(c4, c5);
|
c6 = _mm_add_ps(c6, c7);
|
c8 = _mm_add_ps(c8, c9);
|
cA = _mm_add_ps(cA, cB);
|
cC = _mm_add_ps(cC, cD);
|
cE = _mm_add_ps(cE, cF);
|
c0 = _mm_add_ps(c0, c2);
|
c4 = _mm_add_ps(c4, c6);
|
c8 = _mm_add_ps(c8, cA);
|
cC = _mm_add_ps(cC, cE);
|
c0 = _mm_add_ps(c0, c4);
|
c8 = _mm_add_ps(c8, cC);
|
c0 = _mm_add_ps(c0, c8);
|
c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 16.0f));
|
|
_mm_store_ps((float*)(source0 + 8 * x), c0);
|
}
|
|
source0 += pitch;
|
source1 += pitch;
|
source2 += pitch;
|
source3 += pitch;
|
source4 += pitch;
|
source5 += pitch;
|
source6 += pitch;
|
source7 += pitch;
|
source8 += pitch;
|
source9 += pitch;
|
sourceA += pitch;
|
sourceB += pitch;
|
sourceC += pitch;
|
sourceD += pitch;
|
sourceE += pitch;
|
sourceF += pitch;
|
}
|
}
|
else ASSERT(false);
|
}
|
else
|
#endif
|
{
|
if(internal.samples == 2)
|
{
|
for(int y = 0; y < height; y++)
|
{
|
for(int x = 0; x < 2 * width; x++)
|
{
|
float c0 = *(float*)(source0 + 4 * x);
|
float c1 = *(float*)(source1 + 4 * x);
|
|
c0 = c0 + c1;
|
c0 *= 1.0f / 2.0f;
|
|
*(float*)(source0 + 4 * x) = c0;
|
}
|
|
source0 += pitch;
|
source1 += pitch;
|
}
|
}
|
else if(internal.samples == 4)
|
{
|
for(int y = 0; y < height; y++)
|
{
|
for(int x = 0; x < 2 * width; x++)
|
{
|
float c0 = *(float*)(source0 + 4 * x);
|
float c1 = *(float*)(source1 + 4 * x);
|
float c2 = *(float*)(source2 + 4 * x);
|
float c3 = *(float*)(source3 + 4 * x);
|
|
c0 = c0 + c1;
|
c2 = c2 + c3;
|
c0 = c0 + c2;
|
c0 *= 1.0f / 4.0f;
|
|
*(float*)(source0 + 4 * x) = c0;
|
}
|
|
source0 += pitch;
|
source1 += pitch;
|
source2 += pitch;
|
source3 += pitch;
|
}
|
}
|
else if(internal.samples == 8)
|
{
|
for(int y = 0; y < height; y++)
|
{
|
for(int x = 0; x < 2 * width; x++)
|
{
|
float c0 = *(float*)(source0 + 4 * x);
|
float c1 = *(float*)(source1 + 4 * x);
|
float c2 = *(float*)(source2 + 4 * x);
|
float c3 = *(float*)(source3 + 4 * x);
|
float c4 = *(float*)(source4 + 4 * x);
|
float c5 = *(float*)(source5 + 4 * x);
|
float c6 = *(float*)(source6 + 4 * x);
|
float c7 = *(float*)(source7 + 4 * x);
|
|
c0 = c0 + c1;
|
c2 = c2 + c3;
|
c4 = c4 + c5;
|
c6 = c6 + c7;
|
c0 = c0 + c2;
|
c4 = c4 + c6;
|
c0 = c0 + c4;
|
c0 *= 1.0f / 8.0f;
|
|
*(float*)(source0 + 4 * x) = c0;
|
}
|
|
source0 += pitch;
|
source1 += pitch;
|
source2 += pitch;
|
source3 += pitch;
|
source4 += pitch;
|
source5 += pitch;
|
source6 += pitch;
|
source7 += pitch;
|
}
|
}
|
else if(internal.samples == 16)
|
{
|
for(int y = 0; y < height; y++)
|
{
|
for(int x = 0; x < 2 * width; x++)
|
{
|
float c0 = *(float*)(source0 + 4 * x);
|
float c1 = *(float*)(source1 + 4 * x);
|
float c2 = *(float*)(source2 + 4 * x);
|
float c3 = *(float*)(source3 + 4 * x);
|
float c4 = *(float*)(source4 + 4 * x);
|
float c5 = *(float*)(source5 + 4 * x);
|
float c6 = *(float*)(source6 + 4 * x);
|
float c7 = *(float*)(source7 + 4 * x);
|
float c8 = *(float*)(source8 + 4 * x);
|
float c9 = *(float*)(source9 + 4 * x);
|
float cA = *(float*)(sourceA + 4 * x);
|
float cB = *(float*)(sourceB + 4 * x);
|
float cC = *(float*)(sourceC + 4 * x);
|
float cD = *(float*)(sourceD + 4 * x);
|
float cE = *(float*)(sourceE + 4 * x);
|
float cF = *(float*)(sourceF + 4 * x);
|
|
c0 = c0 + c1;
|
c2 = c2 + c3;
|
c4 = c4 + c5;
|
c6 = c6 + c7;
|
c8 = c8 + c9;
|
cA = cA + cB;
|
cC = cC + cD;
|
cE = cE + cF;
|
c0 = c0 + c2;
|
c4 = c4 + c6;
|
c8 = c8 + cA;
|
cC = cC + cE;
|
c0 = c0 + c4;
|
c8 = c8 + cC;
|
c0 = c0 + c8;
|
c0 *= 1.0f / 16.0f;
|
|
*(float*)(source0 + 4 * x) = c0;
|
}
|
|
source0 += pitch;
|
source1 += pitch;
|
source2 += pitch;
|
source3 += pitch;
|
source4 += pitch;
|
source5 += pitch;
|
source6 += pitch;
|
source7 += pitch;
|
source8 += pitch;
|
source9 += pitch;
|
sourceA += pitch;
|
sourceB += pitch;
|
sourceC += pitch;
|
sourceD += pitch;
|
sourceE += pitch;
|
sourceF += pitch;
|
}
|
}
|
else ASSERT(false);
|
}
|
}
|
else if(internal.format == FORMAT_A32B32G32R32F ||
|
internal.format == FORMAT_X32B32G32R32F ||
|
internal.format == FORMAT_X32B32G32R32F_UNSIGNED)
|
{
|
#if defined(__i386__) || defined(__x86_64__)
|
if(CPUID::supportsSSE())
|
{
|
if(internal.samples == 2)
|
{
|
for(int y = 0; y < height; y++)
|
{
|
for(int x = 0; x < width; x++)
|
{
|
__m128 c0 = _mm_load_ps((float*)(source0 + 16 * x));
|
__m128 c1 = _mm_load_ps((float*)(source1 + 16 * x));
|
|
c0 = _mm_add_ps(c0, c1);
|
c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 2.0f));
|
|
_mm_store_ps((float*)(source0 + 16 * x), c0);
|
}
|
|
source0 += pitch;
|
source1 += pitch;
|
}
|
}
|
else if(internal.samples == 4)
|
{
|
for(int y = 0; y < height; y++)
|
{
|
for(int x = 0; x < width; x++)
|
{
|
__m128 c0 = _mm_load_ps((float*)(source0 + 16 * x));
|
__m128 c1 = _mm_load_ps((float*)(source1 + 16 * x));
|
__m128 c2 = _mm_load_ps((float*)(source2 + 16 * x));
|
__m128 c3 = _mm_load_ps((float*)(source3 + 16 * x));
|
|
c0 = _mm_add_ps(c0, c1);
|
c2 = _mm_add_ps(c2, c3);
|
c0 = _mm_add_ps(c0, c2);
|
c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 4.0f));
|
|
_mm_store_ps((float*)(source0 + 16 * x), c0);
|
}
|
|
source0 += pitch;
|
source1 += pitch;
|
source2 += pitch;
|
source3 += pitch;
|
}
|
}
|
else if(internal.samples == 8)
|
{
|
for(int y = 0; y < height; y++)
|
{
|
for(int x = 0; x < width; x++)
|
{
|
__m128 c0 = _mm_load_ps((float*)(source0 + 16 * x));
|
__m128 c1 = _mm_load_ps((float*)(source1 + 16 * x));
|
__m128 c2 = _mm_load_ps((float*)(source2 + 16 * x));
|
__m128 c3 = _mm_load_ps((float*)(source3 + 16 * x));
|
__m128 c4 = _mm_load_ps((float*)(source4 + 16 * x));
|
__m128 c5 = _mm_load_ps((float*)(source5 + 16 * x));
|
__m128 c6 = _mm_load_ps((float*)(source6 + 16 * x));
|
__m128 c7 = _mm_load_ps((float*)(source7 + 16 * x));
|
|
c0 = _mm_add_ps(c0, c1);
|
c2 = _mm_add_ps(c2, c3);
|
c4 = _mm_add_ps(c4, c5);
|
c6 = _mm_add_ps(c6, c7);
|
c0 = _mm_add_ps(c0, c2);
|
c4 = _mm_add_ps(c4, c6);
|
c0 = _mm_add_ps(c0, c4);
|
c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 8.0f));
|
|
_mm_store_ps((float*)(source0 + 16 * x), c0);
|
}
|
|
source0 += pitch;
|
source1 += pitch;
|
source2 += pitch;
|
source3 += pitch;
|
source4 += pitch;
|
source5 += pitch;
|
source6 += pitch;
|
source7 += pitch;
|
}
|
}
|
else if(internal.samples == 16)
|
{
|
for(int y = 0; y < height; y++)
|
{
|
for(int x = 0; x < width; x++)
|
{
|
__m128 c0 = _mm_load_ps((float*)(source0 + 16 * x));
|
__m128 c1 = _mm_load_ps((float*)(source1 + 16 * x));
|
__m128 c2 = _mm_load_ps((float*)(source2 + 16 * x));
|
__m128 c3 = _mm_load_ps((float*)(source3 + 16 * x));
|
__m128 c4 = _mm_load_ps((float*)(source4 + 16 * x));
|
__m128 c5 = _mm_load_ps((float*)(source5 + 16 * x));
|
__m128 c6 = _mm_load_ps((float*)(source6 + 16 * x));
|
__m128 c7 = _mm_load_ps((float*)(source7 + 16 * x));
|
__m128 c8 = _mm_load_ps((float*)(source8 + 16 * x));
|
__m128 c9 = _mm_load_ps((float*)(source9 + 16 * x));
|
__m128 cA = _mm_load_ps((float*)(sourceA + 16 * x));
|
__m128 cB = _mm_load_ps((float*)(sourceB + 16 * x));
|
__m128 cC = _mm_load_ps((float*)(sourceC + 16 * x));
|
__m128 cD = _mm_load_ps((float*)(sourceD + 16 * x));
|
__m128 cE = _mm_load_ps((float*)(sourceE + 16 * x));
|
__m128 cF = _mm_load_ps((float*)(sourceF + 16 * x));
|
|
c0 = _mm_add_ps(c0, c1);
|
c2 = _mm_add_ps(c2, c3);
|
c4 = _mm_add_ps(c4, c5);
|
c6 = _mm_add_ps(c6, c7);
|
c8 = _mm_add_ps(c8, c9);
|
cA = _mm_add_ps(cA, cB);
|
cC = _mm_add_ps(cC, cD);
|
cE = _mm_add_ps(cE, cF);
|
c0 = _mm_add_ps(c0, c2);
|
c4 = _mm_add_ps(c4, c6);
|
c8 = _mm_add_ps(c8, cA);
|
cC = _mm_add_ps(cC, cE);
|
c0 = _mm_add_ps(c0, c4);
|
c8 = _mm_add_ps(c8, cC);
|
c0 = _mm_add_ps(c0, c8);
|
c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 16.0f));
|
|
_mm_store_ps((float*)(source0 + 16 * x), c0);
|
}
|
|
source0 += pitch;
|
source1 += pitch;
|
source2 += pitch;
|
source3 += pitch;
|
source4 += pitch;
|
source5 += pitch;
|
source6 += pitch;
|
source7 += pitch;
|
source8 += pitch;
|
source9 += pitch;
|
sourceA += pitch;
|
sourceB += pitch;
|
sourceC += pitch;
|
sourceD += pitch;
|
sourceE += pitch;
|
sourceF += pitch;
|
}
|
}
|
else ASSERT(false);
|
}
|
else
|
#endif
|
{
|
if(internal.samples == 2)
|
{
|
for(int y = 0; y < height; y++)
|
{
|
for(int x = 0; x < 4 * width; x++)
|
{
|
float c0 = *(float*)(source0 + 4 * x);
|
float c1 = *(float*)(source1 + 4 * x);
|
|
c0 = c0 + c1;
|
c0 *= 1.0f / 2.0f;
|
|
*(float*)(source0 + 4 * x) = c0;
|
}
|
|
source0 += pitch;
|
source1 += pitch;
|
}
|
}
|
else if(internal.samples == 4)
|
{
|
for(int y = 0; y < height; y++)
|
{
|
for(int x = 0; x < 4 * width; x++)
|
{
|
float c0 = *(float*)(source0 + 4 * x);
|
float c1 = *(float*)(source1 + 4 * x);
|
float c2 = *(float*)(source2 + 4 * x);
|
float c3 = *(float*)(source3 + 4 * x);
|
|
c0 = c0 + c1;
|
c2 = c2 + c3;
|
c0 = c0 + c2;
|
c0 *= 1.0f / 4.0f;
|
|
*(float*)(source0 + 4 * x) = c0;
|
}
|
|
source0 += pitch;
|
source1 += pitch;
|
source2 += pitch;
|
source3 += pitch;
|
}
|
}
|
else if(internal.samples == 8)
|
{
|
for(int y = 0; y < height; y++)
|
{
|
for(int x = 0; x < 4 * width; x++)
|
{
|
float c0 = *(float*)(source0 + 4 * x);
|
float c1 = *(float*)(source1 + 4 * x);
|
float c2 = *(float*)(source2 + 4 * x);
|
float c3 = *(float*)(source3 + 4 * x);
|
float c4 = *(float*)(source4 + 4 * x);
|
float c5 = *(float*)(source5 + 4 * x);
|
float c6 = *(float*)(source6 + 4 * x);
|
float c7 = *(float*)(source7 + 4 * x);
|
|
c0 = c0 + c1;
|
c2 = c2 + c3;
|
c4 = c4 + c5;
|
c6 = c6 + c7;
|
c0 = c0 + c2;
|
c4 = c4 + c6;
|
c0 = c0 + c4;
|
c0 *= 1.0f / 8.0f;
|
|
*(float*)(source0 + 4 * x) = c0;
|
}
|
|
source0 += pitch;
|
source1 += pitch;
|
source2 += pitch;
|
source3 += pitch;
|
source4 += pitch;
|
source5 += pitch;
|
source6 += pitch;
|
source7 += pitch;
|
}
|
}
|
else if(internal.samples == 16)
|
{
|
for(int y = 0; y < height; y++)
|
{
|
for(int x = 0; x < 4 * width; x++)
|
{
|
float c0 = *(float*)(source0 + 4 * x);
|
float c1 = *(float*)(source1 + 4 * x);
|
float c2 = *(float*)(source2 + 4 * x);
|
float c3 = *(float*)(source3 + 4 * x);
|
float c4 = *(float*)(source4 + 4 * x);
|
float c5 = *(float*)(source5 + 4 * x);
|
float c6 = *(float*)(source6 + 4 * x);
|
float c7 = *(float*)(source7 + 4 * x);
|
float c8 = *(float*)(source8 + 4 * x);
|
float c9 = *(float*)(source9 + 4 * x);
|
float cA = *(float*)(sourceA + 4 * x);
|
float cB = *(float*)(sourceB + 4 * x);
|
float cC = *(float*)(sourceC + 4 * x);
|
float cD = *(float*)(sourceD + 4 * x);
|
float cE = *(float*)(sourceE + 4 * x);
|
float cF = *(float*)(sourceF + 4 * x);
|
|
c0 = c0 + c1;
|
c2 = c2 + c3;
|
c4 = c4 + c5;
|
c6 = c6 + c7;
|
c8 = c8 + c9;
|
cA = cA + cB;
|
cC = cC + cD;
|
cE = cE + cF;
|
c0 = c0 + c2;
|
c4 = c4 + c6;
|
c8 = c8 + cA;
|
cC = cC + cE;
|
c0 = c0 + c4;
|
c8 = c8 + cC;
|
c0 = c0 + c8;
|
c0 *= 1.0f / 16.0f;
|
|
*(float*)(source0 + 4 * x) = c0;
|
}
|
|
source0 += pitch;
|
source1 += pitch;
|
source2 += pitch;
|
source3 += pitch;
|
source4 += pitch;
|
source5 += pitch;
|
source6 += pitch;
|
source7 += pitch;
|
source8 += pitch;
|
source9 += pitch;
|
sourceA += pitch;
|
sourceB += pitch;
|
sourceC += pitch;
|
sourceD += pitch;
|
sourceE += pitch;
|
sourceF += pitch;
|
}
|
}
|
else ASSERT(false);
|
}
|
}
|
else if(internal.format == FORMAT_R5G6B5)
|
{
|
#if defined(__i386__) || defined(__x86_64__)
|
if(CPUID::supportsSSE2() && (width % 8) == 0)
|
{
|
if(internal.samples == 2)
|
{
|
for(int y = 0; y < height; y++)
|
{
|
for(int x = 0; x < width; x += 8)
|
{
|
__m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x));
|
__m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x));
|
|
static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F};
|
static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0};
|
__m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
|
__m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_));
|
__m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b));
|
__m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
|
|
c0 = _mm_avg_epu8(c0_r_b, c1_r_b);
|
c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
|
c1 = _mm_avg_epu16(c0__g_, c1__g_);
|
c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
|
c0 = _mm_or_si128(c0, c1);
|
|
_mm_store_si128((__m128i*)(source0 + 2 * x), c0);
|
}
|
|
source0 += pitch;
|
source1 += pitch;
|
}
|
}
|
else if(internal.samples == 4)
|
{
|
for(int y = 0; y < height; y++)
|
{
|
for(int x = 0; x < width; x += 8)
|
{
|
__m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x));
|
__m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x));
|
__m128i c2 = _mm_load_si128((__m128i*)(source2 + 2 * x));
|
__m128i c3 = _mm_load_si128((__m128i*)(source3 + 2 * x));
|
|
static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F};
|
static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0};
|
__m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
|
__m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_));
|
__m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b));
|
__m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
|
__m128i c2_r_b = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(r_b));
|
__m128i c2__g_ = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(_g_));
|
__m128i c3_r_b = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(r_b));
|
__m128i c3__g_ = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(_g_));
|
|
c0 = _mm_avg_epu8(c0_r_b, c1_r_b);
|
c2 = _mm_avg_epu8(c2_r_b, c3_r_b);
|
c0 = _mm_avg_epu8(c0, c2);
|
c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
|
c1 = _mm_avg_epu16(c0__g_, c1__g_);
|
c3 = _mm_avg_epu16(c2__g_, c3__g_);
|
c1 = _mm_avg_epu16(c1, c3);
|
c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
|
c0 = _mm_or_si128(c0, c1);
|
|
_mm_store_si128((__m128i*)(source0 + 2 * x), c0);
|
}
|
|
source0 += pitch;
|
source1 += pitch;
|
source2 += pitch;
|
source3 += pitch;
|
}
|
}
|
else if(internal.samples == 8)
|
{
|
for(int y = 0; y < height; y++)
|
{
|
for(int x = 0; x < width; x += 8)
|
{
|
__m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x));
|
__m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x));
|
__m128i c2 = _mm_load_si128((__m128i*)(source2 + 2 * x));
|
__m128i c3 = _mm_load_si128((__m128i*)(source3 + 2 * x));
|
__m128i c4 = _mm_load_si128((__m128i*)(source4 + 2 * x));
|
__m128i c5 = _mm_load_si128((__m128i*)(source5 + 2 * x));
|
__m128i c6 = _mm_load_si128((__m128i*)(source6 + 2 * x));
|
__m128i c7 = _mm_load_si128((__m128i*)(source7 + 2 * x));
|
|
static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F};
|
static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0};
|
__m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
|
__m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_));
|
__m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b));
|
__m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
|
__m128i c2_r_b = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(r_b));
|
__m128i c2__g_ = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(_g_));
|
__m128i c3_r_b = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(r_b));
|
__m128i c3__g_ = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(_g_));
|
__m128i c4_r_b = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(r_b));
|
__m128i c4__g_ = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(_g_));
|
__m128i c5_r_b = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(r_b));
|
__m128i c5__g_ = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(_g_));
|
__m128i c6_r_b = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(r_b));
|
__m128i c6__g_ = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(_g_));
|
__m128i c7_r_b = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(r_b));
|
__m128i c7__g_ = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(_g_));
|
|
c0 = _mm_avg_epu8(c0_r_b, c1_r_b);
|
c2 = _mm_avg_epu8(c2_r_b, c3_r_b);
|
c4 = _mm_avg_epu8(c4_r_b, c5_r_b);
|
c6 = _mm_avg_epu8(c6_r_b, c7_r_b);
|
c0 = _mm_avg_epu8(c0, c2);
|
c4 = _mm_avg_epu8(c4, c6);
|
c0 = _mm_avg_epu8(c0, c4);
|
c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
|
c1 = _mm_avg_epu16(c0__g_, c1__g_);
|
c3 = _mm_avg_epu16(c2__g_, c3__g_);
|
c5 = _mm_avg_epu16(c4__g_, c5__g_);
|
c7 = _mm_avg_epu16(c6__g_, c7__g_);
|
c1 = _mm_avg_epu16(c1, c3);
|
c5 = _mm_avg_epu16(c5, c7);
|
c1 = _mm_avg_epu16(c1, c5);
|
c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
|
c0 = _mm_or_si128(c0, c1);
|
|
_mm_store_si128((__m128i*)(source0 + 2 * x), c0);
|
}
|
|
source0 += pitch;
|
source1 += pitch;
|
source2 += pitch;
|
source3 += pitch;
|
source4 += pitch;
|
source5 += pitch;
|
source6 += pitch;
|
source7 += pitch;
|
}
|
}
|
else if(internal.samples == 16)
|
{
|
for(int y = 0; y < height; y++)
|
{
|
for(int x = 0; x < width; x += 8)
|
{
|
__m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x));
|
__m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x));
|
__m128i c2 = _mm_load_si128((__m128i*)(source2 + 2 * x));
|
__m128i c3 = _mm_load_si128((__m128i*)(source3 + 2 * x));
|
__m128i c4 = _mm_load_si128((__m128i*)(source4 + 2 * x));
|
__m128i c5 = _mm_load_si128((__m128i*)(source5 + 2 * x));
|
__m128i c6 = _mm_load_si128((__m128i*)(source6 + 2 * x));
|
__m128i c7 = _mm_load_si128((__m128i*)(source7 + 2 * x));
|
__m128i c8 = _mm_load_si128((__m128i*)(source8 + 2 * x));
|
__m128i c9 = _mm_load_si128((__m128i*)(source9 + 2 * x));
|
__m128i cA = _mm_load_si128((__m128i*)(sourceA + 2 * x));
|
__m128i cB = _mm_load_si128((__m128i*)(sourceB + 2 * x));
|
__m128i cC = _mm_load_si128((__m128i*)(sourceC + 2 * x));
|
__m128i cD = _mm_load_si128((__m128i*)(sourceD + 2 * x));
|
__m128i cE = _mm_load_si128((__m128i*)(sourceE + 2 * x));
|
__m128i cF = _mm_load_si128((__m128i*)(sourceF + 2 * x));
|
|
static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F};
|
static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0};
|
__m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
|
__m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_));
|
__m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b));
|
__m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
|
__m128i c2_r_b = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(r_b));
|
__m128i c2__g_ = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(_g_));
|
__m128i c3_r_b = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(r_b));
|
__m128i c3__g_ = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(_g_));
|
__m128i c4_r_b = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(r_b));
|
__m128i c4__g_ = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(_g_));
|
__m128i c5_r_b = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(r_b));
|
__m128i c5__g_ = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(_g_));
|
__m128i c6_r_b = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(r_b));
|
__m128i c6__g_ = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(_g_));
|
__m128i c7_r_b = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(r_b));
|
__m128i c7__g_ = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(_g_));
|
__m128i c8_r_b = _mm_and_si128(c8, reinterpret_cast<const __m128i&>(r_b));
|
__m128i c8__g_ = _mm_and_si128(c8, reinterpret_cast<const __m128i&>(_g_));
|
__m128i c9_r_b = _mm_and_si128(c9, reinterpret_cast<const __m128i&>(r_b));
|
__m128i c9__g_ = _mm_and_si128(c9, reinterpret_cast<const __m128i&>(_g_));
|
__m128i cA_r_b = _mm_and_si128(cA, reinterpret_cast<const __m128i&>(r_b));
|
__m128i cA__g_ = _mm_and_si128(cA, reinterpret_cast<const __m128i&>(_g_));
|
__m128i cB_r_b = _mm_and_si128(cB, reinterpret_cast<const __m128i&>(r_b));
|
__m128i cB__g_ = _mm_and_si128(cB, reinterpret_cast<const __m128i&>(_g_));
|
__m128i cC_r_b = _mm_and_si128(cC, reinterpret_cast<const __m128i&>(r_b));
|
__m128i cC__g_ = _mm_and_si128(cC, reinterpret_cast<const __m128i&>(_g_));
|
__m128i cD_r_b = _mm_and_si128(cD, reinterpret_cast<const __m128i&>(r_b));
|
__m128i cD__g_ = _mm_and_si128(cD, reinterpret_cast<const __m128i&>(_g_));
|
__m128i cE_r_b = _mm_and_si128(cE, reinterpret_cast<const __m128i&>(r_b));
|
__m128i cE__g_ = _mm_and_si128(cE, reinterpret_cast<const __m128i&>(_g_));
|
__m128i cF_r_b = _mm_and_si128(cF, reinterpret_cast<const __m128i&>(r_b));
|
__m128i cF__g_ = _mm_and_si128(cF, reinterpret_cast<const __m128i&>(_g_));
|
|
c0 = _mm_avg_epu8(c0_r_b, c1_r_b);
|
c2 = _mm_avg_epu8(c2_r_b, c3_r_b);
|
c4 = _mm_avg_epu8(c4_r_b, c5_r_b);
|
c6 = _mm_avg_epu8(c6_r_b, c7_r_b);
|
c8 = _mm_avg_epu8(c8_r_b, c9_r_b);
|
cA = _mm_avg_epu8(cA_r_b, cB_r_b);
|
cC = _mm_avg_epu8(cC_r_b, cD_r_b);
|
cE = _mm_avg_epu8(cE_r_b, cF_r_b);
|
c0 = _mm_avg_epu8(c0, c2);
|
c4 = _mm_avg_epu8(c4, c6);
|
c8 = _mm_avg_epu8(c8, cA);
|
cC = _mm_avg_epu8(cC, cE);
|
c0 = _mm_avg_epu8(c0, c4);
|
c8 = _mm_avg_epu8(c8, cC);
|
c0 = _mm_avg_epu8(c0, c8);
|
c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
|
c1 = _mm_avg_epu16(c0__g_, c1__g_);
|
c3 = _mm_avg_epu16(c2__g_, c3__g_);
|
c5 = _mm_avg_epu16(c4__g_, c5__g_);
|
c7 = _mm_avg_epu16(c6__g_, c7__g_);
|
c9 = _mm_avg_epu16(c8__g_, c9__g_);
|
cB = _mm_avg_epu16(cA__g_, cB__g_);
|
cD = _mm_avg_epu16(cC__g_, cD__g_);
|
cF = _mm_avg_epu16(cE__g_, cF__g_);
|
c1 = _mm_avg_epu8(c1, c3);
|
c5 = _mm_avg_epu8(c5, c7);
|
c9 = _mm_avg_epu8(c9, cB);
|
cD = _mm_avg_epu8(cD, cF);
|
c1 = _mm_avg_epu8(c1, c5);
|
c9 = _mm_avg_epu8(c9, cD);
|
c1 = _mm_avg_epu8(c1, c9);
|
c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
|
c0 = _mm_or_si128(c0, c1);
|
|
_mm_store_si128((__m128i*)(source0 + 2 * x), c0);
|
}
|
|
source0 += pitch;
|
source1 += pitch;
|
source2 += pitch;
|
source3 += pitch;
|
source4 += pitch;
|
source5 += pitch;
|
source6 += pitch;
|
source7 += pitch;
|
source8 += pitch;
|
source9 += pitch;
|
sourceA += pitch;
|
sourceB += pitch;
|
sourceC += pitch;
|
sourceD += pitch;
|
sourceE += pitch;
|
sourceF += pitch;
|
}
|
}
|
else ASSERT(false);
|
}
|
else
|
#endif
|
{
|
#define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7BEF) + (((x) ^ (y)) & 0x0821))
|
|
if(internal.samples == 2)
|
{
|
for(int y = 0; y < height; y++)
|
{
|
for(int x = 0; x < width; x++)
|
{
|
unsigned short c0 = *(unsigned short*)(source0 + 2 * x);
|
unsigned short c1 = *(unsigned short*)(source1 + 2 * x);
|
|
c0 = AVERAGE(c0, c1);
|
|
*(unsigned short*)(source0 + 2 * x) = c0;
|
}
|
|
source0 += pitch;
|
source1 += pitch;
|
}
|
}
|
else if(internal.samples == 4)
|
{
|
for(int y = 0; y < height; y++)
|
{
|
for(int x = 0; x < width; x++)
|
{
|
unsigned short c0 = *(unsigned short*)(source0 + 2 * x);
|
unsigned short c1 = *(unsigned short*)(source1 + 2 * x);
|
unsigned short c2 = *(unsigned short*)(source2 + 2 * x);
|
unsigned short c3 = *(unsigned short*)(source3 + 2 * x);
|
|
c0 = AVERAGE(c0, c1);
|
c2 = AVERAGE(c2, c3);
|
c0 = AVERAGE(c0, c2);
|
|
*(unsigned short*)(source0 + 2 * x) = c0;
|
}
|
|
source0 += pitch;
|
source1 += pitch;
|
source2 += pitch;
|
source3 += pitch;
|
}
|
}
|
else if(internal.samples == 8)
|
{
|
for(int y = 0; y < height; y++)
|
{
|
for(int x = 0; x < width; x++)
|
{
|
unsigned short c0 = *(unsigned short*)(source0 + 2 * x);
|
unsigned short c1 = *(unsigned short*)(source1 + 2 * x);
|
unsigned short c2 = *(unsigned short*)(source2 + 2 * x);
|
unsigned short c3 = *(unsigned short*)(source3 + 2 * x);
|
unsigned short c4 = *(unsigned short*)(source4 + 2 * x);
|
unsigned short c5 = *(unsigned short*)(source5 + 2 * x);
|
unsigned short c6 = *(unsigned short*)(source6 + 2 * x);
|
unsigned short c7 = *(unsigned short*)(source7 + 2 * x);
|
|
c0 = AVERAGE(c0, c1);
|
c2 = AVERAGE(c2, c3);
|
c4 = AVERAGE(c4, c5);
|
c6 = AVERAGE(c6, c7);
|
c0 = AVERAGE(c0, c2);
|
c4 = AVERAGE(c4, c6);
|
c0 = AVERAGE(c0, c4);
|
|
*(unsigned short*)(source0 + 2 * x) = c0;
|
}
|
|
source0 += pitch;
|
source1 += pitch;
|
source2 += pitch;
|
source3 += pitch;
|
source4 += pitch;
|
source5 += pitch;
|
source6 += pitch;
|
source7 += pitch;
|
}
|
}
|
else if(internal.samples == 16)
|
{
|
for(int y = 0; y < height; y++)
|
{
|
for(int x = 0; x < width; x++)
|
{
|
unsigned short c0 = *(unsigned short*)(source0 + 2 * x);
|
unsigned short c1 = *(unsigned short*)(source1 + 2 * x);
|
unsigned short c2 = *(unsigned short*)(source2 + 2 * x);
|
unsigned short c3 = *(unsigned short*)(source3 + 2 * x);
|
unsigned short c4 = *(unsigned short*)(source4 + 2 * x);
|
unsigned short c5 = *(unsigned short*)(source5 + 2 * x);
|
unsigned short c6 = *(unsigned short*)(source6 + 2 * x);
|
unsigned short c7 = *(unsigned short*)(source7 + 2 * x);
|
unsigned short c8 = *(unsigned short*)(source8 + 2 * x);
|
unsigned short c9 = *(unsigned short*)(source9 + 2 * x);
|
unsigned short cA = *(unsigned short*)(sourceA + 2 * x);
|
unsigned short cB = *(unsigned short*)(sourceB + 2 * x);
|
unsigned short cC = *(unsigned short*)(sourceC + 2 * x);
|
unsigned short cD = *(unsigned short*)(sourceD + 2 * x);
|
unsigned short cE = *(unsigned short*)(sourceE + 2 * x);
|
unsigned short cF = *(unsigned short*)(sourceF + 2 * x);
|
|
c0 = AVERAGE(c0, c1);
|
c2 = AVERAGE(c2, c3);
|
c4 = AVERAGE(c4, c5);
|
c6 = AVERAGE(c6, c7);
|
c8 = AVERAGE(c8, c9);
|
cA = AVERAGE(cA, cB);
|
cC = AVERAGE(cC, cD);
|
cE = AVERAGE(cE, cF);
|
c0 = AVERAGE(c0, c2);
|
c4 = AVERAGE(c4, c6);
|
c8 = AVERAGE(c8, cA);
|
cC = AVERAGE(cC, cE);
|
c0 = AVERAGE(c0, c4);
|
c8 = AVERAGE(c8, cC);
|
c0 = AVERAGE(c0, c8);
|
|
*(unsigned short*)(source0 + 2 * x) = c0;
|
}
|
|
source0 += pitch;
|
source1 += pitch;
|
source2 += pitch;
|
source3 += pitch;
|
source4 += pitch;
|
source5 += pitch;
|
source6 += pitch;
|
source7 += pitch;
|
source8 += pitch;
|
source9 += pitch;
|
sourceA += pitch;
|
sourceB += pitch;
|
sourceC += pitch;
|
sourceD += pitch;
|
sourceE += pitch;
|
sourceF += pitch;
|
}
|
}
|
else ASSERT(false);
|
|
#undef AVERAGE
|
}
|
}
|
else
|
{
|
// UNIMPLEMENTED();
|
}
|
}
|
}
|