// This file is part of OpenCV project.
|
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
// of this distribution and at http://opencv.org/license.html
|
|
#ifndef CV__SIMD_FORWARD
|
#error "Need to pre-define forward width"
|
#endif
|
|
namespace cv
|
{
|
|
//! @cond IGNORED
|
|
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
|
|
/** Types **/
|
#if CV__SIMD_FORWARD == 512
|
// [todo] 512
|
#error "AVX512 Not implemented yet"
|
#elif CV__SIMD_FORWARD == 256
|
// 256
|
#define __CV_VX(fun) v256_##fun
|
#define __CV_V_UINT8 v_uint8x32
|
#define __CV_V_INT8 v_int8x32
|
#define __CV_V_UINT16 v_uint16x16
|
#define __CV_V_INT16 v_int16x16
|
#define __CV_V_UINT32 v_uint32x8
|
#define __CV_V_INT32 v_int32x8
|
#define __CV_V_UINT64 v_uint64x4
|
#define __CV_V_INT64 v_int64x4
|
#define __CV_V_FLOAT32 v_float32x8
|
#define __CV_V_FLOAT64 v_float64x4
|
struct v_uint8x32;
|
struct v_int8x32;
|
struct v_uint16x16;
|
struct v_int16x16;
|
struct v_uint32x8;
|
struct v_int32x8;
|
struct v_uint64x4;
|
struct v_int64x4;
|
struct v_float32x8;
|
struct v_float64x4;
|
#else
|
// 128
|
#define __CV_VX(fun) v_##fun
|
#define __CV_V_UINT8 v_uint8x16
|
#define __CV_V_INT8 v_int8x16
|
#define __CV_V_UINT16 v_uint16x8
|
#define __CV_V_INT16 v_int16x8
|
#define __CV_V_UINT32 v_uint32x4
|
#define __CV_V_INT32 v_int32x4
|
#define __CV_V_UINT64 v_uint64x2
|
#define __CV_V_INT64 v_int64x2
|
#define __CV_V_FLOAT32 v_float32x4
|
#define __CV_V_FLOAT64 v_float64x2
|
struct v_uint8x16;
|
struct v_int8x16;
|
struct v_uint16x8;
|
struct v_int16x8;
|
struct v_uint32x4;
|
struct v_int32x4;
|
struct v_uint64x2;
|
struct v_int64x2;
|
struct v_float32x4;
|
struct v_float64x2;
|
#endif
|
|
/** Value reordering **/
|
|
// Expansion
|
void v_expand(const __CV_V_UINT8&, __CV_V_UINT16&, __CV_V_UINT16&);
|
void v_expand(const __CV_V_INT8&, __CV_V_INT16&, __CV_V_INT16&);
|
void v_expand(const __CV_V_UINT16&, __CV_V_UINT32&, __CV_V_UINT32&);
|
void v_expand(const __CV_V_INT16&, __CV_V_INT32&, __CV_V_INT32&);
|
void v_expand(const __CV_V_UINT32&, __CV_V_UINT64&, __CV_V_UINT64&);
|
void v_expand(const __CV_V_INT32&, __CV_V_INT64&, __CV_V_INT64&);
|
// Low Expansion
|
__CV_V_UINT16 v_expand_low(const __CV_V_UINT8&);
|
__CV_V_INT16 v_expand_low(const __CV_V_INT8&);
|
__CV_V_UINT32 v_expand_low(const __CV_V_UINT16&);
|
__CV_V_INT32 v_expand_low(const __CV_V_INT16&);
|
__CV_V_UINT64 v_expand_low(const __CV_V_UINT32&);
|
__CV_V_INT64 v_expand_low(const __CV_V_INT32&);
|
// High Expansion
|
__CV_V_UINT16 v_expand_high(const __CV_V_UINT8&);
|
__CV_V_INT16 v_expand_high(const __CV_V_INT8&);
|
__CV_V_UINT32 v_expand_high(const __CV_V_UINT16&);
|
__CV_V_INT32 v_expand_high(const __CV_V_INT16&);
|
__CV_V_UINT64 v_expand_high(const __CV_V_UINT32&);
|
__CV_V_INT64 v_expand_high(const __CV_V_INT32&);
|
// Load & Low Expansion
|
__CV_V_UINT16 __CV_VX(load_expand)(const uchar*);
|
__CV_V_INT16 __CV_VX(load_expand)(const schar*);
|
__CV_V_UINT32 __CV_VX(load_expand)(const ushort*);
|
__CV_V_INT32 __CV_VX(load_expand)(const short*);
|
__CV_V_UINT64 __CV_VX(load_expand)(const uint*);
|
__CV_V_INT64 __CV_VX(load_expand)(const int*);
|
// Load lower 8-bit and expand into 32-bit
|
__CV_V_UINT32 __CV_VX(load_expand_q)(const uchar*);
|
__CV_V_INT32 __CV_VX(load_expand_q)(const schar*);
|
|
// Saturating Pack
|
__CV_V_UINT8 v_pack(const __CV_V_UINT16&, const __CV_V_UINT16&);
|
__CV_V_INT8 v_pack(const __CV_V_INT16&, const __CV_V_INT16&);
|
__CV_V_UINT16 v_pack(const __CV_V_UINT32&, const __CV_V_UINT32&);
|
__CV_V_INT16 v_pack(const __CV_V_INT32&, const __CV_V_INT32&);
|
// Non-saturating Pack
|
__CV_V_UINT32 v_pack(const __CV_V_UINT64&, const __CV_V_UINT64&);
|
__CV_V_INT32 v_pack(const __CV_V_INT64&, const __CV_V_INT64&);
|
// Pack signed integers with unsigned saturation
|
__CV_V_UINT8 v_pack_u(const __CV_V_INT16&, const __CV_V_INT16&);
|
__CV_V_UINT16 v_pack_u(const __CV_V_INT32&, const __CV_V_INT32&);
|
|
/** Arithmetic, bitwise and comparison operations **/
|
|
// Non-saturating multiply
|
#if CV_VSX
|
template<typename Tvec>
|
Tvec v_mul_wrap(const Tvec& a, const Tvec& b);
|
#else
|
__CV_V_UINT8 v_mul_wrap(const __CV_V_UINT8&, const __CV_V_UINT8&);
|
__CV_V_INT8 v_mul_wrap(const __CV_V_INT8&, const __CV_V_INT8&);
|
__CV_V_UINT16 v_mul_wrap(const __CV_V_UINT16&, const __CV_V_UINT16&);
|
__CV_V_INT16 v_mul_wrap(const __CV_V_INT16&, const __CV_V_INT16&);
|
#endif
|
|
// Multiply and expand
|
#if CV_VSX
|
template<typename Tvec, typename Twvec>
|
void v_mul_expand(const Tvec& a, const Tvec& b, Twvec& c, Twvec& d);
|
#else
|
void v_mul_expand(const __CV_V_UINT8&, const __CV_V_UINT8&, __CV_V_UINT16&, __CV_V_UINT16&);
|
void v_mul_expand(const __CV_V_INT8&, const __CV_V_INT8&, __CV_V_INT16&, __CV_V_INT16&);
|
void v_mul_expand(const __CV_V_UINT16&, const __CV_V_UINT16&, __CV_V_UINT32&, __CV_V_UINT32&);
|
void v_mul_expand(const __CV_V_INT16&, const __CV_V_INT16&, __CV_V_INT32&, __CV_V_INT32&);
|
void v_mul_expand(const __CV_V_UINT32&, const __CV_V_UINT32&, __CV_V_UINT64&, __CV_V_UINT64&);
|
void v_mul_expand(const __CV_V_INT32&, const __CV_V_INT32&, __CV_V_INT64&, __CV_V_INT64&);
|
#endif
|
|
/** Cleanup **/
|
#undef CV__SIMD_FORWARD
|
#undef __CV_VX
|
#undef __CV_V_UINT8
|
#undef __CV_V_INT8
|
#undef __CV_V_UINT16
|
#undef __CV_V_INT16
|
#undef __CV_V_UINT32
|
#undef __CV_V_INT32
|
#undef __CV_V_UINT64
|
#undef __CV_V_INT64
|
#undef __CV_V_FLOAT32
|
#undef __CV_V_FLOAT64
|
|
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
|
|
//! @endcond
|
|
} // cv::
|