/* -----------------------------------------------------------------------------
|
Software License for The Fraunhofer FDK AAC Codec Library for Android
|
|
© Copyright 1995 - 2018 Fraunhofer-Gesellschaft zur Förderung der angewandten
|
Forschung e.V. All rights reserved.
|
|
1. INTRODUCTION
|
The Fraunhofer FDK AAC Codec Library for Android ("FDK AAC Codec") is software
|
that implements the MPEG Advanced Audio Coding ("AAC") encoding and decoding
|
scheme for digital audio. This FDK AAC Codec software is intended to be used on
|
a wide variety of Android devices.
|
|
AAC's HE-AAC and HE-AAC v2 versions are regarded as today's most efficient
|
general perceptual audio codecs. AAC-ELD is considered the best-performing
|
full-bandwidth communications codec by independent studies and is widely
|
deployed. AAC has been standardized by ISO and IEC as part of the MPEG
|
specifications.
|
|
Patent licenses for necessary patent claims for the FDK AAC Codec (including
|
those of Fraunhofer) may be obtained through Via Licensing
|
(www.vialicensing.com) or through the respective patent owners individually for
|
the purpose of encoding or decoding bit streams in products that are compliant
|
with the ISO/IEC MPEG audio standards. Please note that most manufacturers of
|
Android devices already license these patent claims through Via Licensing or
|
directly from the patent owners, and therefore FDK AAC Codec software may
|
already be covered under those patent licenses when it is used for those
|
licensed purposes only.
|
|
Commercially-licensed AAC software libraries, including floating-point versions
|
with enhanced sound quality, are also available from Fraunhofer. Users are
|
encouraged to check the Fraunhofer website for additional applications
|
information and documentation.
|
|
2. COPYRIGHT LICENSE
|
|
Redistribution and use in source and binary forms, with or without modification,
|
are permitted without payment of copyright license fees provided that you
|
satisfy the following conditions:
|
|
You must retain the complete text of this software license in redistributions of
|
the FDK AAC Codec or your modifications thereto in source code form.
|
|
You must retain the complete text of this software license in the documentation
|
and/or other materials provided with redistributions of the FDK AAC Codec or
|
your modifications thereto in binary form. You must make available free of
|
charge copies of the complete source code of the FDK AAC Codec and your
|
modifications thereto to recipients of copies in binary form.
|
|
The name of Fraunhofer may not be used to endorse or promote products derived
|
from this library without prior written permission.
|
|
You may not charge copyright license fees for anyone to use, copy or distribute
|
the FDK AAC Codec software or your modifications thereto.
|
|
Your modified versions of the FDK AAC Codec must carry prominent notices stating
|
that you changed the software and the date of any change. For modified versions
|
of the FDK AAC Codec, the term "Fraunhofer FDK AAC Codec Library for Android"
|
must be replaced by the term "Third-Party Modified Version of the Fraunhofer FDK
|
AAC Codec Library for Android."
|
|
3. NO PATENT LICENSE
|
|
NO EXPRESS OR IMPLIED LICENSES TO ANY PATENT CLAIMS, including without
|
limitation the patents of Fraunhofer, ARE GRANTED BY THIS SOFTWARE LICENSE.
|
Fraunhofer provides no warranty of patent non-infringement with respect to this
|
software.
|
|
You may use this FDK AAC Codec software or modifications thereto only for
|
purposes that are authorized by appropriate patent licenses.
|
|
4. DISCLAIMER
|
|
This FDK AAC Codec software is provided by Fraunhofer on behalf of the copyright
|
holders and contributors "AS IS" and WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES,
|
including but not limited to the implied warranties of merchantability and
|
fitness for a particular purpose. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
|
CONTRIBUTORS BE LIABLE for any direct, indirect, incidental, special, exemplary,
|
or consequential damages, including but not limited to procurement of substitute
|
goods or services; loss of use, data, or profits, or business interruption,
|
however caused and on any theory of liability, whether in contract, strict
|
liability, or tort (including negligence), arising in any way out of the use of
|
this software, even if advised of the possibility of such damage.
|
|
5. CONTACT INFORMATION
|
|
Fraunhofer Institute for Integrated Circuits IIS
|
Attention: Audio and Multimedia Departments - FDK AAC LL
|
Am Wolfsmantel 33
|
91058 Erlangen, Germany
|
|
www.iis.fraunhofer.de/amm
|
amm-info@iis.fraunhofer.de
|
----------------------------------------------------------------------------- */
|
|
/******************* Library for basic calculation routines ********************
|
|
Author(s): Josef Hoepfl, DSP Solutions
|
|
Description: Fix point FFT
|
|
*******************************************************************************/
|
|
#include "fft_rad2.h"
|
#include "FDK_tools_rom.h"
|
|
#define W_PiFOURTH STC(0x5a82799a)
|
//#define W_PiFOURTH ((FIXP_DBL)(0x5a82799a))
|
#ifndef SUMDIFF_PIFOURTH
|
#define SUMDIFF_PIFOURTH(diff, sum, a, b) \
|
{ \
|
FIXP_DBL wa, wb; \
|
wa = fMultDiv2(a, W_PiFOURTH); \
|
wb = fMultDiv2(b, W_PiFOURTH); \
|
diff = wb - wa; \
|
sum = wb + wa; \
|
}
|
#define SUMDIFF_PIFOURTH16(diff, sum, a, b) \
|
{ \
|
FIXP_SGL wa, wb; \
|
wa = FX_DBL2FX_SGL(fMultDiv2(a, W_PiFOURTH)); \
|
wb = FX_DBL2FX_SGL(fMultDiv2(b, W_PiFOURTH)); \
|
diff = wb - wa; \
|
sum = wb + wa; \
|
}
|
#endif
|
|
#define SCALEFACTOR2048 10
|
#define SCALEFACTOR1024 9
|
#define SCALEFACTOR512 8
|
#define SCALEFACTOR256 7
|
#define SCALEFACTOR128 6
|
#define SCALEFACTOR64 5
|
#define SCALEFACTOR32 4
|
#define SCALEFACTOR16 3
|
#define SCALEFACTOR8 2
|
#define SCALEFACTOR4 1
|
#define SCALEFACTOR2 1
|
|
#define SCALEFACTOR3 1
|
#define SCALEFACTOR5 1
|
#define SCALEFACTOR6 (SCALEFACTOR2 + SCALEFACTOR3 + 2)
|
#define SCALEFACTOR7 2
|
#define SCALEFACTOR9 2
|
#define SCALEFACTOR10 5
|
#define SCALEFACTOR12 3
|
#define SCALEFACTOR15 3
|
#define SCALEFACTOR18 (SCALEFACTOR2 + SCALEFACTOR9 + 2)
|
#define SCALEFACTOR20 (SCALEFACTOR4 + SCALEFACTOR5 + 2)
|
#define SCALEFACTOR21 (SCALEFACTOR3 + SCALEFACTOR7 + 2)
|
#define SCALEFACTOR24 (SCALEFACTOR2 + SCALEFACTOR12 + 2)
|
#define SCALEFACTOR30 (SCALEFACTOR2 + SCALEFACTOR15 + 2)
|
#define SCALEFACTOR40 (SCALEFACTOR5 + SCALEFACTOR8 + 2)
|
#define SCALEFACTOR48 (SCALEFACTOR4 + SCALEFACTOR12 + 2)
|
#define SCALEFACTOR60 (SCALEFACTOR4 + SCALEFACTOR15 + 2)
|
#define SCALEFACTOR80 (SCALEFACTOR5 + SCALEFACTOR16 + 2)
|
#define SCALEFACTOR96 (SCALEFACTOR3 + SCALEFACTOR32 + 2)
|
#define SCALEFACTOR120 (SCALEFACTOR8 + SCALEFACTOR15 + 2)
|
#define SCALEFACTOR160 (SCALEFACTOR10 + SCALEFACTOR16 + 2)
|
#define SCALEFACTOR168 (SCALEFACTOR21 + SCALEFACTOR8 + 2)
|
#define SCALEFACTOR192 (SCALEFACTOR12 + SCALEFACTOR16 + 2)
|
#define SCALEFACTOR240 (SCALEFACTOR16 + SCALEFACTOR15 + 2)
|
#define SCALEFACTOR320 (SCALEFACTOR10 + SCALEFACTOR32 + 2)
|
#define SCALEFACTOR336 (SCALEFACTOR21 + SCALEFACTOR16 + 2)
|
#define SCALEFACTOR384 (SCALEFACTOR12 + SCALEFACTOR32 + 2)
|
#define SCALEFACTOR480 (SCALEFACTOR32 + SCALEFACTOR15 + 2)
|
|
#include "fft.h"
|
|
#ifndef FUNCTION_fft2
|
|
/* Performs the FFT of length 2. Input vector unscaled, output vector scaled
|
* with factor 0.5 */
|
static FDK_FORCEINLINE void fft2(FIXP_DBL *RESTRICT pDat) {
|
FIXP_DBL r1, i1;
|
FIXP_DBL r2, i2;
|
|
/* real part */
|
r1 = pDat[2];
|
r2 = pDat[0];
|
|
/* imaginary part */
|
i1 = pDat[3];
|
i2 = pDat[1];
|
|
/* real part */
|
pDat[0] = (r2 + r1) >> 1;
|
pDat[2] = (r2 - r1) >> 1;
|
|
/* imaginary part */
|
pDat[1] = (i2 + i1) >> 1;
|
pDat[3] = (i2 - i1) >> 1;
|
}
|
#endif /* FUNCTION_fft2 */
|
|
#define C31 (STC(0x91261468)) /* FL2FXCONST_DBL(-0.86602540) = -sqrt(3)/2 */
|
|
#ifndef FUNCTION_fft3
|
/* Performs the FFT of length 3 according to the algorithm after winograd. */
|
static FDK_FORCEINLINE void fft3(FIXP_DBL *RESTRICT pDat) {
|
FIXP_DBL r1, r2;
|
FIXP_DBL s1, s2;
|
FIXP_DBL pD;
|
|
/* real part */
|
r1 = pDat[2] + pDat[4];
|
r2 = fMultDiv2((pDat[2] - pDat[4]), C31);
|
pD = pDat[0] >> 1;
|
pDat[0] = pD + (r1 >> 1);
|
r1 = pD - (r1 >> 2);
|
|
/* imaginary part */
|
s1 = pDat[3] + pDat[5];
|
s2 = fMultDiv2((pDat[3] - pDat[5]), C31);
|
pD = pDat[1] >> 1;
|
pDat[1] = pD + (s1 >> 1);
|
s1 = pD - (s1 >> 2);
|
|
/* combination */
|
pDat[2] = r1 - s2;
|
pDat[4] = r1 + s2;
|
pDat[3] = s1 + r2;
|
pDat[5] = s1 - r2;
|
}
|
#endif /* #ifndef FUNCTION_fft3 */
|
|
#define F5C(x) STC(x)
|
|
#define C51 (F5C(0x79bc3854)) /* FL2FXCONST_DBL( 0.95105652) */
|
#define C52 (F5C(0x9d839db0)) /* FL2FXCONST_DBL(-1.53884180/2) */
|
#define C53 (F5C(0xd18053ce)) /* FL2FXCONST_DBL(-0.36327126) */
|
#define C54 (F5C(0x478dde64)) /* FL2FXCONST_DBL( 0.55901699) */
|
#define C55 (F5C(0xb0000001)) /* FL2FXCONST_DBL(-1.25/2) */
|
|
/* performs the FFT of length 5 according to the algorithm after winograd */
|
/* This version works with a prescale of 2 instead of 3 */
|
static FDK_FORCEINLINE void fft5(FIXP_DBL *RESTRICT pDat) {
|
FIXP_DBL r1, r2, r3, r4;
|
FIXP_DBL s1, s2, s3, s4;
|
FIXP_DBL t;
|
|
/* real part */
|
r1 = (pDat[2] + pDat[8]) >> 1;
|
r4 = (pDat[2] - pDat[8]) >> 1;
|
r3 = (pDat[4] + pDat[6]) >> 1;
|
r2 = (pDat[4] - pDat[6]) >> 1;
|
t = fMult((r1 - r3), C54);
|
r1 = r1 + r3;
|
pDat[0] = (pDat[0] >> 1) + r1;
|
/* Bit shift left because of the constant C55 which was scaled with the factor
|
0.5 because of the representation of the values as fracts */
|
r1 = pDat[0] + (fMultDiv2(r1, C55) << (2));
|
r3 = r1 - t;
|
r1 = r1 + t;
|
t = fMult((r4 + r2), C51);
|
/* Bit shift left because of the constant C55 which was scaled with the factor
|
0.5 because of the representation of the values as fracts */
|
r4 = t + (fMultDiv2(r4, C52) << (2));
|
r2 = t + fMult(r2, C53);
|
|
/* imaginary part */
|
s1 = (pDat[3] + pDat[9]) >> 1;
|
s4 = (pDat[3] - pDat[9]) >> 1;
|
s3 = (pDat[5] + pDat[7]) >> 1;
|
s2 = (pDat[5] - pDat[7]) >> 1;
|
t = fMult((s1 - s3), C54);
|
s1 = s1 + s3;
|
pDat[1] = (pDat[1] >> 1) + s1;
|
/* Bit shift left because of the constant C55 which was scaled with the factor
|
0.5 because of the representation of the values as fracts */
|
s1 = pDat[1] + (fMultDiv2(s1, C55) << (2));
|
s3 = s1 - t;
|
s1 = s1 + t;
|
t = fMult((s4 + s2), C51);
|
/* Bit shift left because of the constant C55 which was scaled with the factor
|
0.5 because of the representation of the values as fracts */
|
s4 = t + (fMultDiv2(s4, C52) << (2));
|
s2 = t + fMult(s2, C53);
|
|
/* combination */
|
pDat[2] = r1 + s2;
|
pDat[8] = r1 - s2;
|
pDat[4] = r3 - s4;
|
pDat[6] = r3 + s4;
|
|
pDat[3] = s1 - r2;
|
pDat[9] = s1 + r2;
|
pDat[5] = s3 + r4;
|
pDat[7] = s3 - r4;
|
}
|
|
#define F5C(x) STC(x)
|
|
#define C51 (F5C(0x79bc3854)) /* FL2FXCONST_DBL( 0.95105652) */
|
#define C52 (F5C(0x9d839db0)) /* FL2FXCONST_DBL(-1.53884180/2) */
|
#define C53 (F5C(0xd18053ce)) /* FL2FXCONST_DBL(-0.36327126) */
|
#define C54 (F5C(0x478dde64)) /* FL2FXCONST_DBL( 0.55901699) */
|
#define C55 (F5C(0xb0000001)) /* FL2FXCONST_DBL(-1.25/2) */
|
/**
|
* \brief Function performs a complex 10-point FFT
|
* The FFT is performed inplace. The result of the FFT
|
* is scaled by SCALEFACTOR10 bits.
|
*
|
* WOPS FLC version: 1093 cycles
|
* WOPS with 32x16 bit multiplications: 196 cycles
|
*
|
* \param [i/o] re real input / output
|
* \param [i/o] im imag input / output
|
* \param [i ] s stride real and imag input / output
|
*
|
* \return void
|
*/
|
static void fft10(FIXP_DBL *x) // FIXP_DBL *re, FIXP_DBL *im, FIXP_SGL s)
|
{
|
FIXP_DBL t;
|
FIXP_DBL x0, x1, x2, x3, x4;
|
FIXP_DBL r1, r2, r3, r4;
|
FIXP_DBL s1, s2, s3, s4;
|
FIXP_DBL y00, y01, y02, y03, y04, y05, y06, y07, y08, y09;
|
FIXP_DBL y10, y11, y12, y13, y14, y15, y16, y17, y18, y19;
|
|
const int s = 1; // stride factor
|
|
/* 2 fft5 stages */
|
|
/* real part */
|
x0 = (x[s * 0] >> SCALEFACTOR10);
|
x1 = (x[s * 4] >> SCALEFACTOR10);
|
x2 = (x[s * 8] >> SCALEFACTOR10);
|
x3 = (x[s * 12] >> SCALEFACTOR10);
|
x4 = (x[s * 16] >> SCALEFACTOR10);
|
|
r1 = (x3 + x2);
|
r4 = (x3 - x2);
|
r3 = (x1 + x4);
|
r2 = (x1 - x4);
|
t = fMult((r1 - r3), C54);
|
r1 = (r1 + r3);
|
y00 = (x0 + r1);
|
r1 = (y00 + ((fMult(r1, C55) << 1)));
|
r3 = (r1 - t);
|
r1 = (r1 + t);
|
t = fMult((r4 + r2), C51);
|
r4 = (t + (fMult(r4, C52) << 1));
|
r2 = (t + fMult(r2, C53));
|
|
/* imaginary part */
|
x0 = (x[s * 0 + 1] >> SCALEFACTOR10);
|
x1 = (x[s * 4 + 1] >> SCALEFACTOR10);
|
x2 = (x[s * 8 + 1] >> SCALEFACTOR10);
|
x3 = (x[s * 12 + 1] >> SCALEFACTOR10);
|
x4 = (x[s * 16 + 1] >> SCALEFACTOR10);
|
|
s1 = (x3 + x2);
|
s4 = (x3 - x2);
|
s3 = (x1 + x4);
|
s2 = (x1 - x4);
|
t = fMult((s1 - s3), C54);
|
s1 = (s1 + s3);
|
y01 = (x0 + s1);
|
s1 = (y01 + (fMult(s1, C55) << 1));
|
s3 = (s1 - t);
|
s1 = (s1 + t);
|
t = fMult((s4 + s2), C51);
|
s4 = (t + (fMult(s4, C52) << 1));
|
s2 = (t + fMult(s2, C53));
|
|
/* combination */
|
y04 = (r1 + s2);
|
y16 = (r1 - s2);
|
y08 = (r3 - s4);
|
y12 = (r3 + s4);
|
|
y05 = (s1 - r2);
|
y17 = (s1 + r2);
|
y09 = (s3 + r4);
|
y13 = (s3 - r4);
|
|
/* real part */
|
x0 = (x[s * 10] >> SCALEFACTOR10);
|
x1 = (x[s * 2] >> SCALEFACTOR10);
|
x2 = (x[s * 6] >> SCALEFACTOR10);
|
x3 = (x[s * 14] >> SCALEFACTOR10);
|
x4 = (x[s * 18] >> SCALEFACTOR10);
|
|
r1 = (x1 + x4);
|
r4 = (x1 - x4);
|
r3 = (x3 + x2);
|
r2 = (x3 - x2);
|
t = fMult((r1 - r3), C54);
|
r1 = (r1 + r3);
|
y02 = (x0 + r1);
|
r1 = (y02 + ((fMult(r1, C55) << 1)));
|
r3 = (r1 - t);
|
r1 = (r1 + t);
|
t = fMult(((r4 + r2)), C51);
|
r4 = (t + (fMult(r4, C52) << 1));
|
r2 = (t + fMult(r2, C53));
|
|
/* imaginary part */
|
x0 = (x[s * 10 + 1] >> SCALEFACTOR10);
|
x1 = (x[s * 2 + 1] >> SCALEFACTOR10);
|
x2 = (x[s * 6 + 1] >> SCALEFACTOR10);
|
x3 = (x[s * 14 + 1] >> SCALEFACTOR10);
|
x4 = (x[s * 18 + 1] >> SCALEFACTOR10);
|
|
s1 = (x1 + x4);
|
s4 = (x1 - x4);
|
s3 = (x3 + x2);
|
s2 = (x3 - x2);
|
t = fMult((s1 - s3), C54);
|
s1 = (s1 + s3);
|
y03 = (x0 + s1);
|
s1 = (y03 + (fMult(s1, C55) << 1));
|
s3 = (s1 - t);
|
s1 = (s1 + t);
|
t = fMult((s4 + s2), C51);
|
s4 = (t + (fMult(s4, C52) << 1));
|
s2 = (t + fMult(s2, C53));
|
|
/* combination */
|
y06 = (r1 + s2);
|
y18 = (r1 - s2);
|
y10 = (r3 - s4);
|
y14 = (r3 + s4);
|
|
y07 = (s1 - r2);
|
y19 = (s1 + r2);
|
y11 = (s3 + r4);
|
y15 = (s3 - r4);
|
|
/* 5 fft2 stages */
|
x[s * 0] = (y00 + y02);
|
x[s * 0 + 1] = (y01 + y03);
|
x[s * 10] = (y00 - y02);
|
x[s * 10 + 1] = (y01 - y03);
|
|
x[s * 4] = (y04 + y06);
|
x[s * 4 + 1] = (y05 + y07);
|
x[s * 14] = (y04 - y06);
|
x[s * 14 + 1] = (y05 - y07);
|
|
x[s * 8] = (y08 + y10);
|
x[s * 8 + 1] = (y09 + y11);
|
x[s * 18] = (y08 - y10);
|
x[s * 18 + 1] = (y09 - y11);
|
|
x[s * 12] = (y12 + y14);
|
x[s * 12 + 1] = (y13 + y15);
|
x[s * 2] = (y12 - y14);
|
x[s * 2 + 1] = (y13 - y15);
|
|
x[s * 16] = (y16 + y18);
|
x[s * 16 + 1] = (y17 + y19);
|
x[s * 6] = (y16 - y18);
|
x[s * 6 + 1] = (y17 - y19);
|
}
|
|
#ifndef FUNCTION_fft12
|
#define FUNCTION_fft12
|
|
#undef C31
|
#define C31 (STC(0x91261468)) /* FL2FXCONST_DBL(-0.86602540) = -sqrt(3)/2 */
|
|
static inline void fft12(FIXP_DBL *pInput) {
|
FIXP_DBL aDst[24];
|
FIXP_DBL *pSrc, *pDst;
|
int i;
|
|
pSrc = pInput;
|
pDst = aDst;
|
FIXP_DBL r1, r2, s1, s2, pD;
|
|
/* First 3*2 samples are shifted right by 2 before output */
|
r1 = pSrc[8] + pSrc[16];
|
r2 = fMultDiv2((pSrc[8] - pSrc[16]), C31);
|
pD = pSrc[0] >> 1;
|
pDst[0] = (pD + (r1 >> 1)) >> 1;
|
r1 = pD - (r1 >> 2);
|
|
/* imaginary part */
|
s1 = pSrc[9] + pSrc[17];
|
s2 = fMultDiv2((pSrc[9] - pSrc[17]), C31);
|
pD = pSrc[1] >> 1;
|
pDst[1] = (pD + (s1 >> 1)) >> 1;
|
s1 = pD - (s1 >> 2);
|
|
/* combination */
|
pDst[2] = (r1 - s2) >> 1;
|
pDst[3] = (s1 + r2) >> 1;
|
pDst[4] = (r1 + s2) >> 1;
|
pDst[5] = (s1 - r2) >> 1;
|
pSrc += 2;
|
pDst += 6;
|
|
const FIXP_STB *pVecRe = RotVectorReal12;
|
const FIXP_STB *pVecIm = RotVectorImag12;
|
FIXP_DBL re, im;
|
FIXP_STB vre, vim;
|
for (i = 0; i < 2; i++) {
|
/* sample 0,1 are shifted right by 2 before output */
|
/* sample 2,3 4,5 are shifted right by 1 and complex multiplied before
|
* output */
|
|
r1 = pSrc[8] + pSrc[16];
|
r2 = fMultDiv2((pSrc[8] - pSrc[16]), C31);
|
pD = pSrc[0] >> 1;
|
pDst[0] = (pD + (r1 >> 1)) >> 1;
|
r1 = pD - (r1 >> 2);
|
|
/* imaginary part */
|
s1 = pSrc[9] + pSrc[17];
|
s2 = fMultDiv2((pSrc[9] - pSrc[17]), C31);
|
pD = pSrc[1] >> 1;
|
pDst[1] = (pD + (s1 >> 1)) >> 1;
|
s1 = pD - (s1 >> 2);
|
|
/* combination */
|
re = (r1 - s2) >> 0;
|
im = (s1 + r2) >> 0;
|
vre = *pVecRe++;
|
vim = *pVecIm++;
|
cplxMultDiv2(&pDst[3], &pDst[2], im, re, vre, vim);
|
|
re = (r1 + s2) >> 0;
|
im = (s1 - r2) >> 0;
|
vre = *pVecRe++;
|
vim = *pVecIm++;
|
cplxMultDiv2(&pDst[5], &pDst[4], im, re, vre, vim);
|
|
pDst += 6;
|
pSrc += 2;
|
}
|
/* sample 0,1 are shifted right by 2 before output */
|
/* sample 2,3 is shifted right by 1 and complex multiplied with (0.0,+1.0) */
|
/* sample 4,5 is shifted right by 1 and complex multiplied with (-1.0,0.0) */
|
r1 = pSrc[8] + pSrc[16];
|
r2 = fMultDiv2((pSrc[8] - pSrc[16]), C31);
|
pD = pSrc[0] >> 1;
|
pDst[0] = (pD + (r1 >> 1)) >> 1;
|
r1 = pD - (r1 >> 2);
|
|
/* imaginary part */
|
s1 = pSrc[9] + pSrc[17];
|
s2 = fMultDiv2((pSrc[9] - pSrc[17]), C31);
|
pD = pSrc[1] >> 1;
|
pDst[1] = (pD + (s1 >> 1)) >> 1;
|
s1 = pD - (s1 >> 2);
|
|
/* combination */
|
pDst[2] = (s1 + r2) >> 1;
|
pDst[3] = (s2 - r1) >> 1;
|
pDst[4] = -((r1 + s2) >> 1);
|
pDst[5] = (r2 - s1) >> 1;
|
|
/* Perform 3 times the fft of length 4. The input samples are at the address
|
of aDst and the output samples are at the address of pInput. The input vector
|
for the fft of length 4 is built of the interleaved samples in aDst, the
|
output samples are stored consecutively at the address of pInput.
|
*/
|
pSrc = aDst;
|
pDst = pInput;
|
for (i = 0; i < 3; i++) {
|
/* inline FFT4 merged with incoming resorting loop */
|
FIXP_DBL a00, a10, a20, a30, tmp0, tmp1;
|
|
a00 = (pSrc[0] + pSrc[12]) >> 1; /* Re A + Re B */
|
a10 = (pSrc[6] + pSrc[18]) >> 1; /* Re C + Re D */
|
a20 = (pSrc[1] + pSrc[13]) >> 1; /* Im A + Im B */
|
a30 = (pSrc[7] + pSrc[19]) >> 1; /* Im C + Im D */
|
|
pDst[0] = a00 + a10; /* Re A' = Re A + Re B + Re C + Re D */
|
pDst[1] = a20 + a30; /* Im A' = Im A + Im B + Im C + Im D */
|
|
tmp0 = a00 - pSrc[12]; /* Re A - Re B */
|
tmp1 = a20 - pSrc[13]; /* Im A - Im B */
|
|
pDst[12] = a00 - a10; /* Re C' = Re A + Re B - Re C - Re D */
|
pDst[13] = a20 - a30; /* Im C' = Im A + Im B - Im C - Im D */
|
|
a10 = a10 - pSrc[18]; /* Re C - Re D */
|
a30 = a30 - pSrc[19]; /* Im C - Im D */
|
|
pDst[6] = tmp0 + a30; /* Re B' = Re A - Re B + Im C - Im D */
|
pDst[18] = tmp0 - a30; /* Re D' = Re A - Re B - Im C + Im D */
|
pDst[7] = tmp1 - a10; /* Im B' = Im A - Im B - Re C + Re D */
|
pDst[19] = tmp1 + a10; /* Im D' = Im A - Im B + Re C - Re D */
|
|
pSrc += 2;
|
pDst += 2;
|
}
|
}
|
#endif /* FUNCTION_fft12 */
|
|
#ifndef FUNCTION_fft15
|
|
#define N3 3
|
#define N5 5
|
#define N6 6
|
#define N15 15
|
|
/* Performs the FFT of length 15. It is split into FFTs of length 3 and
|
* length 5. */
|
static inline void fft15(FIXP_DBL *pInput) {
|
FIXP_DBL aDst[2 * N15];
|
FIXP_DBL aDst1[2 * N15];
|
int i, k, l;
|
|
/* Sort input vector for fft's of length 3
|
input3(0:2) = [input(0) input(5) input(10)];
|
input3(3:5) = [input(3) input(8) input(13)];
|
input3(6:8) = [input(6) input(11) input(1)];
|
input3(9:11) = [input(9) input(14) input(4)];
|
input3(12:14) = [input(12) input(2) input(7)]; */
|
{
|
const FIXP_DBL *pSrc = pInput;
|
FIXP_DBL *RESTRICT pDst = aDst;
|
/* Merge 3 loops into one, skip call of fft3 */
|
for (i = 0, l = 0, k = 0; i < N5; i++, k += 6) {
|
pDst[k + 0] = pSrc[l];
|
pDst[k + 1] = pSrc[l + 1];
|
l += 2 * N5;
|
if (l >= (2 * N15)) l -= (2 * N15);
|
|
pDst[k + 2] = pSrc[l];
|
pDst[k + 3] = pSrc[l + 1];
|
l += 2 * N5;
|
if (l >= (2 * N15)) l -= (2 * N15);
|
pDst[k + 4] = pSrc[l];
|
pDst[k + 5] = pSrc[l + 1];
|
l += (2 * N5) + (2 * N3);
|
if (l >= (2 * N15)) l -= (2 * N15);
|
|
/* fft3 merged with shift right by 2 loop */
|
FIXP_DBL r1, r2, r3;
|
FIXP_DBL s1, s2;
|
/* real part */
|
r1 = pDst[k + 2] + pDst[k + 4];
|
r2 = fMult((pDst[k + 2] - pDst[k + 4]), C31);
|
s1 = pDst[k + 0];
|
pDst[k + 0] = (s1 + r1) >> 2;
|
r1 = s1 - (r1 >> 1);
|
|
/* imaginary part */
|
s1 = pDst[k + 3] + pDst[k + 5];
|
s2 = fMult((pDst[k + 3] - pDst[k + 5]), C31);
|
r3 = pDst[k + 1];
|
pDst[k + 1] = (r3 + s1) >> 2;
|
s1 = r3 - (s1 >> 1);
|
|
/* combination */
|
pDst[k + 2] = (r1 - s2) >> 2;
|
pDst[k + 4] = (r1 + s2) >> 2;
|
pDst[k + 3] = (s1 + r2) >> 2;
|
pDst[k + 5] = (s1 - r2) >> 2;
|
}
|
}
|
/* Sort input vector for fft's of length 5
|
input5(0:4) = [output3(0) output3(3) output3(6) output3(9) output3(12)];
|
input5(5:9) = [output3(1) output3(4) output3(7) output3(10) output3(13)];
|
input5(10:14) = [output3(2) output3(5) output3(8) output3(11) output3(14)]; */
|
/* Merge 2 loops into one, brings about 10% */
|
{
|
const FIXP_DBL *pSrc = aDst;
|
FIXP_DBL *RESTRICT pDst = aDst1;
|
for (i = 0, l = 0, k = 0; i < N3; i++, k += 10) {
|
l = 2 * i;
|
pDst[k + 0] = pSrc[l + 0];
|
pDst[k + 1] = pSrc[l + 1];
|
pDst[k + 2] = pSrc[l + 0 + (2 * N3)];
|
pDst[k + 3] = pSrc[l + 1 + (2 * N3)];
|
pDst[k + 4] = pSrc[l + 0 + (4 * N3)];
|
pDst[k + 5] = pSrc[l + 1 + (4 * N3)];
|
pDst[k + 6] = pSrc[l + 0 + (6 * N3)];
|
pDst[k + 7] = pSrc[l + 1 + (6 * N3)];
|
pDst[k + 8] = pSrc[l + 0 + (8 * N3)];
|
pDst[k + 9] = pSrc[l + 1 + (8 * N3)];
|
fft5(&pDst[k]);
|
}
|
}
|
/* Sort output vector of length 15
|
output = [out5(0) out5(6) out5(12) out5(3) out5(9)
|
out5(10) out5(1) out5(7) out5(13) out5(4)
|
out5(5) out5(11) out5(2) out5(8) out5(14)]; */
|
/* optimize clumsy loop, brings about 5% */
|
{
|
const FIXP_DBL *pSrc = aDst1;
|
FIXP_DBL *RESTRICT pDst = pInput;
|
for (i = 0, l = 0, k = 0; i < N3; i++, k += 10) {
|
pDst[k + 0] = pSrc[l];
|
pDst[k + 1] = pSrc[l + 1];
|
l += (2 * N6);
|
if (l >= (2 * N15)) l -= (2 * N15);
|
pDst[k + 2] = pSrc[l];
|
pDst[k + 3] = pSrc[l + 1];
|
l += (2 * N6);
|
if (l >= (2 * N15)) l -= (2 * N15);
|
pDst[k + 4] = pSrc[l];
|
pDst[k + 5] = pSrc[l + 1];
|
l += (2 * N6);
|
if (l >= (2 * N15)) l -= (2 * N15);
|
pDst[k + 6] = pSrc[l];
|
pDst[k + 7] = pSrc[l + 1];
|
l += (2 * N6);
|
if (l >= (2 * N15)) l -= (2 * N15);
|
pDst[k + 8] = pSrc[l];
|
pDst[k + 9] = pSrc[l + 1];
|
l += 2; /* no modulo check needed, it cannot occur */
|
}
|
}
|
}
|
#endif /* FUNCTION_fft15 */
|
|
/*
|
Select shift placement.
|
Some processors like ARM may shift "for free" in combination with an addition
|
or substraction, but others don't so either combining shift with +/- or reduce
|
the total amount or shift operations is optimal
|
*/
|
#if !defined(__arm__)
|
#define SHIFT_A >> 1
|
#define SHIFT_B
|
#else
|
#define SHIFT_A
|
#define SHIFT_B >> 1
|
#endif
|
|
#ifndef FUNCTION_fft_16 /* we check, if fft_16 (FIXP_DBL *) is not yet defined \
|
*/
|
|
/* This defines prevents this array to be declared twice, if 16-bit fft is
|
* enabled too */
|
#define FUNCTION_DATA_fft_16_w16
|
static const FIXP_STP fft16_w16[2] = {STCP(0x7641af3d, 0x30fbc54d),
|
STCP(0x30fbc54d, 0x7641af3d)};
|
|
LNK_SECTION_CODE_L1
|
inline void fft_16(FIXP_DBL *RESTRICT x) {
|
FIXP_DBL vr, ur;
|
FIXP_DBL vr2, ur2;
|
FIXP_DBL vr3, ur3;
|
FIXP_DBL vr4, ur4;
|
FIXP_DBL vi, ui;
|
FIXP_DBL vi2, ui2;
|
FIXP_DBL vi3, ui3;
|
|
vr = (x[0] >> 1) + (x[16] >> 1); /* Re A + Re B */
|
ur = (x[1] >> 1) + (x[17] >> 1); /* Im A + Im B */
|
vi = (x[8] SHIFT_A) + (x[24] SHIFT_A); /* Re C + Re D */
|
ui = (x[9] SHIFT_A) + (x[25] SHIFT_A); /* Im C + Im D */
|
x[0] = vr + (vi SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
|
x[1] = ur + (ui SHIFT_B); /* Im A' = sum of imag values */
|
|
vr2 = (x[4] >> 1) + (x[20] >> 1); /* Re A + Re B */
|
ur2 = (x[5] >> 1) + (x[21] >> 1); /* Im A + Im B */
|
|
x[4] = vr - (vi SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
|
x[5] = ur - (ui SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
|
vr -= x[16]; /* Re A - Re B */
|
vi = (vi SHIFT_B)-x[24]; /* Re C - Re D */
|
ur -= x[17]; /* Im A - Im B */
|
ui = (ui SHIFT_B)-x[25]; /* Im C - Im D */
|
|
vr3 = (x[2] >> 1) + (x[18] >> 1); /* Re A + Re B */
|
ur3 = (x[3] >> 1) + (x[19] >> 1); /* Im A + Im B */
|
|
x[2] = ui + vr; /* Re B' = Im C - Im D + Re A - Re B */
|
x[3] = ur - vi; /* Im B'= -Re C + Re D + Im A - Im B */
|
|
vr4 = (x[6] >> 1) + (x[22] >> 1); /* Re A + Re B */
|
ur4 = (x[7] >> 1) + (x[23] >> 1); /* Im A + Im B */
|
|
x[6] = vr - ui; /* Re D' = -Im C + Im D + Re A - Re B */
|
x[7] = vi + ur; /* Im D'= Re C - Re D + Im A - Im B */
|
|
vi2 = (x[12] SHIFT_A) + (x[28] SHIFT_A); /* Re C + Re D */
|
ui2 = (x[13] SHIFT_A) + (x[29] SHIFT_A); /* Im C + Im D */
|
x[8] = vr2 + (vi2 SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
|
x[9] = ur2 + (ui2 SHIFT_B); /* Im A' = sum of imag values */
|
x[12] = vr2 - (vi2 SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
|
x[13] = ur2 - (ui2 SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
|
vr2 -= x[20]; /* Re A - Re B */
|
ur2 -= x[21]; /* Im A - Im B */
|
vi2 = (vi2 SHIFT_B)-x[28]; /* Re C - Re D */
|
ui2 = (ui2 SHIFT_B)-x[29]; /* Im C - Im D */
|
|
vi = (x[10] SHIFT_A) + (x[26] SHIFT_A); /* Re C + Re D */
|
ui = (x[11] SHIFT_A) + (x[27] SHIFT_A); /* Im C + Im D */
|
|
x[10] = ui2 + vr2; /* Re B' = Im C - Im D + Re A - Re B */
|
x[11] = ur2 - vi2; /* Im B'= -Re C + Re D + Im A - Im B */
|
|
vi3 = (x[14] SHIFT_A) + (x[30] SHIFT_A); /* Re C + Re D */
|
ui3 = (x[15] SHIFT_A) + (x[31] SHIFT_A); /* Im C + Im D */
|
|
x[14] = vr2 - ui2; /* Re D' = -Im C + Im D + Re A - Re B */
|
x[15] = vi2 + ur2; /* Im D'= Re C - Re D + Im A - Im B */
|
|
x[16] = vr3 + (vi SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
|
x[17] = ur3 + (ui SHIFT_B); /* Im A' = sum of imag values */
|
x[20] = vr3 - (vi SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
|
x[21] = ur3 - (ui SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
|
vr3 -= x[18]; /* Re A - Re B */
|
ur3 -= x[19]; /* Im A - Im B */
|
vi = (vi SHIFT_B)-x[26]; /* Re C - Re D */
|
ui = (ui SHIFT_B)-x[27]; /* Im C - Im D */
|
x[18] = ui + vr3; /* Re B' = Im C - Im D + Re A - Re B */
|
x[19] = ur3 - vi; /* Im B'= -Re C + Re D + Im A - Im B */
|
|
x[24] = vr4 + (vi3 SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
|
x[28] = vr4 - (vi3 SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
|
x[25] = ur4 + (ui3 SHIFT_B); /* Im A' = sum of imag values */
|
x[29] = ur4 - (ui3 SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
|
vr4 -= x[22]; /* Re A - Re B */
|
ur4 -= x[23]; /* Im A - Im B */
|
|
x[22] = vr3 - ui; /* Re D' = -Im C + Im D + Re A - Re B */
|
x[23] = vi + ur3; /* Im D'= Re C - Re D + Im A - Im B */
|
|
vi3 = (vi3 SHIFT_B)-x[30]; /* Re C - Re D */
|
ui3 = (ui3 SHIFT_B)-x[31]; /* Im C - Im D */
|
x[26] = ui3 + vr4; /* Re B' = Im C - Im D + Re A - Re B */
|
x[30] = vr4 - ui3; /* Re D' = -Im C + Im D + Re A - Re B */
|
x[27] = ur4 - vi3; /* Im B'= -Re C + Re D + Im A - Im B */
|
x[31] = vi3 + ur4; /* Im D'= Re C - Re D + Im A - Im B */
|
|
// xt1 = 0
|
// xt2 = 8
|
vr = x[8];
|
vi = x[9];
|
ur = x[0] >> 1;
|
ui = x[1] >> 1;
|
x[0] = ur + (vr >> 1);
|
x[1] = ui + (vi >> 1);
|
x[8] = ur - (vr >> 1);
|
x[9] = ui - (vi >> 1);
|
|
// xt1 = 4
|
// xt2 = 12
|
vr = x[13];
|
vi = x[12];
|
ur = x[4] >> 1;
|
ui = x[5] >> 1;
|
x[4] = ur + (vr >> 1);
|
x[5] = ui - (vi >> 1);
|
x[12] = ur - (vr >> 1);
|
x[13] = ui + (vi >> 1);
|
|
// xt1 = 16
|
// xt2 = 24
|
vr = x[24];
|
vi = x[25];
|
ur = x[16] >> 1;
|
ui = x[17] >> 1;
|
x[16] = ur + (vr >> 1);
|
x[17] = ui + (vi >> 1);
|
x[24] = ur - (vr >> 1);
|
x[25] = ui - (vi >> 1);
|
|
// xt1 = 20
|
// xt2 = 28
|
vr = x[29];
|
vi = x[28];
|
ur = x[20] >> 1;
|
ui = x[21] >> 1;
|
x[20] = ur + (vr >> 1);
|
x[21] = ui - (vi >> 1);
|
x[28] = ur - (vr >> 1);
|
x[29] = ui + (vi >> 1);
|
|
// xt1 = 2
|
// xt2 = 10
|
SUMDIFF_PIFOURTH(vi, vr, x[10], x[11])
|
// vr = fMultDiv2((x[11] + x[10]),W_PiFOURTH);
|
// vi = fMultDiv2((x[11] - x[10]),W_PiFOURTH);
|
ur = x[2];
|
ui = x[3];
|
x[2] = (ur >> 1) + vr;
|
x[3] = (ui >> 1) + vi;
|
x[10] = (ur >> 1) - vr;
|
x[11] = (ui >> 1) - vi;
|
|
// xt1 = 6
|
// xt2 = 14
|
SUMDIFF_PIFOURTH(vr, vi, x[14], x[15])
|
ur = x[6];
|
ui = x[7];
|
x[6] = (ur >> 1) + vr;
|
x[7] = (ui >> 1) - vi;
|
x[14] = (ur >> 1) - vr;
|
x[15] = (ui >> 1) + vi;
|
|
// xt1 = 18
|
// xt2 = 26
|
SUMDIFF_PIFOURTH(vi, vr, x[26], x[27])
|
ur = x[18];
|
ui = x[19];
|
x[18] = (ur >> 1) + vr;
|
x[19] = (ui >> 1) + vi;
|
x[26] = (ur >> 1) - vr;
|
x[27] = (ui >> 1) - vi;
|
|
// xt1 = 22
|
// xt2 = 30
|
SUMDIFF_PIFOURTH(vr, vi, x[30], x[31])
|
ur = x[22];
|
ui = x[23];
|
x[22] = (ur >> 1) + vr;
|
x[23] = (ui >> 1) - vi;
|
x[30] = (ur >> 1) - vr;
|
x[31] = (ui >> 1) + vi;
|
|
// xt1 = 0
|
// xt2 = 16
|
vr = x[16];
|
vi = x[17];
|
ur = x[0] >> 1;
|
ui = x[1] >> 1;
|
x[0] = ur + (vr >> 1);
|
x[1] = ui + (vi >> 1);
|
x[16] = ur - (vr >> 1);
|
x[17] = ui - (vi >> 1);
|
|
// xt1 = 8
|
// xt2 = 24
|
vi = x[24];
|
vr = x[25];
|
ur = x[8] >> 1;
|
ui = x[9] >> 1;
|
x[8] = ur + (vr >> 1);
|
x[9] = ui - (vi >> 1);
|
x[24] = ur - (vr >> 1);
|
x[25] = ui + (vi >> 1);
|
|
// xt1 = 2
|
// xt2 = 18
|
cplxMultDiv2(&vi, &vr, x[19], x[18], fft16_w16[0]);
|
ur = x[2];
|
ui = x[3];
|
x[2] = (ur >> 1) + vr;
|
x[3] = (ui >> 1) + vi;
|
x[18] = (ur >> 1) - vr;
|
x[19] = (ui >> 1) - vi;
|
|
// xt1 = 10
|
// xt2 = 26
|
cplxMultDiv2(&vr, &vi, x[27], x[26], fft16_w16[0]);
|
ur = x[10];
|
ui = x[11];
|
x[10] = (ur >> 1) + vr;
|
x[11] = (ui >> 1) - vi;
|
x[26] = (ur >> 1) - vr;
|
x[27] = (ui >> 1) + vi;
|
|
// xt1 = 4
|
// xt2 = 20
|
SUMDIFF_PIFOURTH(vi, vr, x[20], x[21])
|
ur = x[4];
|
ui = x[5];
|
x[4] = (ur >> 1) + vr;
|
x[5] = (ui >> 1) + vi;
|
x[20] = (ur >> 1) - vr;
|
x[21] = (ui >> 1) - vi;
|
|
// xt1 = 12
|
// xt2 = 28
|
SUMDIFF_PIFOURTH(vr, vi, x[28], x[29])
|
ur = x[12];
|
ui = x[13];
|
x[12] = (ur >> 1) + vr;
|
x[13] = (ui >> 1) - vi;
|
x[28] = (ur >> 1) - vr;
|
x[29] = (ui >> 1) + vi;
|
|
// xt1 = 6
|
// xt2 = 22
|
cplxMultDiv2(&vi, &vr, x[23], x[22], fft16_w16[1]);
|
ur = x[6];
|
ui = x[7];
|
x[6] = (ur >> 1) + vr;
|
x[7] = (ui >> 1) + vi;
|
x[22] = (ur >> 1) - vr;
|
x[23] = (ui >> 1) - vi;
|
|
// xt1 = 14
|
// xt2 = 30
|
cplxMultDiv2(&vr, &vi, x[31], x[30], fft16_w16[1]);
|
ur = x[14];
|
ui = x[15];
|
x[14] = (ur >> 1) + vr;
|
x[15] = (ui >> 1) - vi;
|
x[30] = (ur >> 1) - vr;
|
x[31] = (ui >> 1) + vi;
|
}
|
#endif /* FUNCTION_fft_16 */
|
|
#ifndef FUNCTION_fft_32
|
static const FIXP_STP fft32_w32[6] = {
|
STCP(0x7641af3d, 0x30fbc54d), STCP(0x30fbc54d, 0x7641af3d),
|
STCP(0x7d8a5f40, 0x18f8b83c), STCP(0x6a6d98a4, 0x471cece7),
|
STCP(0x471cece7, 0x6a6d98a4), STCP(0x18f8b83c, 0x7d8a5f40)};
|
#define W_PiFOURTH STC(0x5a82799a)
|
|
LNK_SECTION_CODE_L1
|
inline void fft_32(FIXP_DBL *const _x) {
|
/*
|
* 1+2 stage radix 4
|
*/
|
|
/////////////////////////////////////////////////////////////////////////////////////////
|
{
|
FIXP_DBL *const x = _x;
|
FIXP_DBL vi, ui;
|
FIXP_DBL vi2, ui2;
|
FIXP_DBL vi3, ui3;
|
FIXP_DBL vr, ur;
|
FIXP_DBL vr2, ur2;
|
FIXP_DBL vr3, ur3;
|
FIXP_DBL vr4, ur4;
|
|
// i = 0
|
vr = (x[0] + x[32]) >> 1; /* Re A + Re B */
|
ur = (x[1] + x[33]) >> 1; /* Im A + Im B */
|
vi = (x[16] + x[48]) SHIFT_A; /* Re C + Re D */
|
ui = (x[17] + x[49]) SHIFT_A; /* Im C + Im D */
|
|
x[0] = vr + (vi SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
|
x[1] = ur + (ui SHIFT_B); /* Im A' = sum of imag values */
|
|
vr2 = (x[4] + x[36]) >> 1; /* Re A + Re B */
|
ur2 = (x[5] + x[37]) >> 1; /* Im A + Im B */
|
|
x[4] = vr - (vi SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
|
x[5] = ur - (ui SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
|
|
vr -= x[32]; /* Re A - Re B */
|
ur -= x[33]; /* Im A - Im B */
|
vi = (vi SHIFT_B)-x[48]; /* Re C - Re D */
|
ui = (ui SHIFT_B)-x[49]; /* Im C - Im D */
|
|
vr3 = (x[2] + x[34]) >> 1; /* Re A + Re B */
|
ur3 = (x[3] + x[35]) >> 1; /* Im A + Im B */
|
|
x[2] = ui + vr; /* Re B' = Im C - Im D + Re A - Re B */
|
x[3] = ur - vi; /* Im B'= -Re C + Re D + Im A - Im B */
|
|
vr4 = (x[6] + x[38]) >> 1; /* Re A + Re B */
|
ur4 = (x[7] + x[39]) >> 1; /* Im A + Im B */
|
|
x[6] = vr - ui; /* Re D' = -Im C + Im D + Re A - Re B */
|
x[7] = vi + ur; /* Im D'= Re C - Re D + Im A - Im B */
|
|
// i=16
|
vi = (x[20] + x[52]) SHIFT_A; /* Re C + Re D */
|
ui = (x[21] + x[53]) SHIFT_A; /* Im C + Im D */
|
|
x[16] = vr2 + (vi SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
|
x[17] = ur2 + (ui SHIFT_B); /* Im A' = sum of imag values */
|
x[20] = vr2 - (vi SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
|
x[21] = ur2 - (ui SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
|
|
vr2 -= x[36]; /* Re A - Re B */
|
ur2 -= x[37]; /* Im A - Im B */
|
vi = (vi SHIFT_B)-x[52]; /* Re C - Re D */
|
ui = (ui SHIFT_B)-x[53]; /* Im C - Im D */
|
|
vi2 = (x[18] + x[50]) SHIFT_A; /* Re C + Re D */
|
ui2 = (x[19] + x[51]) SHIFT_A; /* Im C + Im D */
|
|
x[18] = ui + vr2; /* Re B' = Im C - Im D + Re A - Re B */
|
x[19] = ur2 - vi; /* Im B'= -Re C + Re D + Im A - Im B */
|
|
vi3 = (x[22] + x[54]) SHIFT_A; /* Re C + Re D */
|
ui3 = (x[23] + x[55]) SHIFT_A; /* Im C + Im D */
|
|
x[22] = vr2 - ui; /* Re D' = -Im C + Im D + Re A - Re B */
|
x[23] = vi + ur2; /* Im D'= Re C - Re D + Im A - Im B */
|
|
// i = 32
|
|
x[32] = vr3 + (vi2 SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
|
x[33] = ur3 + (ui2 SHIFT_B); /* Im A' = sum of imag values */
|
x[36] = vr3 - (vi2 SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
|
x[37] = ur3 - (ui2 SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
|
|
vr3 -= x[34]; /* Re A - Re B */
|
ur3 -= x[35]; /* Im A - Im B */
|
vi2 = (vi2 SHIFT_B)-x[50]; /* Re C - Re D */
|
ui2 = (ui2 SHIFT_B)-x[51]; /* Im C - Im D */
|
|
x[34] = ui2 + vr3; /* Re B' = Im C - Im D + Re A - Re B */
|
x[35] = ur3 - vi2; /* Im B'= -Re C + Re D + Im A - Im B */
|
|
// i=48
|
|
x[48] = vr4 + (vi3 SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
|
x[52] = vr4 - (vi3 SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
|
x[49] = ur4 + (ui3 SHIFT_B); /* Im A' = sum of imag values */
|
x[53] = ur4 - (ui3 SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
|
|
vr4 -= x[38]; /* Re A - Re B */
|
ur4 -= x[39]; /* Im A - Im B */
|
|
x[38] = vr3 - ui2; /* Re D' = -Im C + Im D + Re A - Re B */
|
x[39] = vi2 + ur3; /* Im D'= Re C - Re D + Im A - Im B */
|
|
vi3 = (vi3 SHIFT_B)-x[54]; /* Re C - Re D */
|
ui3 = (ui3 SHIFT_B)-x[55]; /* Im C - Im D */
|
|
x[50] = ui3 + vr4; /* Re B' = Im C - Im D + Re A - Re B */
|
x[54] = vr4 - ui3; /* Re D' = -Im C + Im D + Re A - Re B */
|
x[51] = ur4 - vi3; /* Im B'= -Re C + Re D + Im A - Im B */
|
x[55] = vi3 + ur4; /* Im D'= Re C - Re D + Im A - Im B */
|
|
// i=8
|
vr = (x[8] + x[40]) >> 1; /* Re A + Re B */
|
ur = (x[9] + x[41]) >> 1; /* Im A + Im B */
|
vi = (x[24] + x[56]) SHIFT_A; /* Re C + Re D */
|
ui = (x[25] + x[57]) SHIFT_A; /* Im C + Im D */
|
|
x[8] = vr + (vi SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
|
x[9] = ur + (ui SHIFT_B); /* Im A' = sum of imag values */
|
|
vr2 = (x[12] + x[44]) >> 1; /* Re A + Re B */
|
ur2 = (x[13] + x[45]) >> 1; /* Im A + Im B */
|
|
x[12] = vr - (vi SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
|
x[13] = ur - (ui SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
|
|
vr -= x[40]; /* Re A - Re B */
|
ur -= x[41]; /* Im A - Im B */
|
vi = (vi SHIFT_B)-x[56]; /* Re C - Re D */
|
ui = (ui SHIFT_B)-x[57]; /* Im C - Im D */
|
|
vr3 = (x[10] + x[42]) >> 1; /* Re A + Re B */
|
ur3 = (x[11] + x[43]) >> 1; /* Im A + Im B */
|
|
x[10] = ui + vr; /* Re B' = Im C - Im D + Re A - Re B */
|
x[11] = ur - vi; /* Im B'= -Re C + Re D + Im A - Im B */
|
|
vr4 = (x[14] + x[46]) >> 1; /* Re A + Re B */
|
ur4 = (x[15] + x[47]) >> 1; /* Im A + Im B */
|
|
x[14] = vr - ui; /* Re D' = -Im C + Im D + Re A - Re B */
|
x[15] = vi + ur; /* Im D'= Re C - Re D + Im A - Im B */
|
|
// i=24
|
vi = (x[28] + x[60]) SHIFT_A; /* Re C + Re D */
|
ui = (x[29] + x[61]) SHIFT_A; /* Im C + Im D */
|
|
x[24] = vr2 + (vi SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
|
x[28] = vr2 - (vi SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
|
x[25] = ur2 + (ui SHIFT_B); /* Im A' = sum of imag values */
|
x[29] = ur2 - (ui SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
|
|
vr2 -= x[44]; /* Re A - Re B */
|
ur2 -= x[45]; /* Im A - Im B */
|
vi = (vi SHIFT_B)-x[60]; /* Re C - Re D */
|
ui = (ui SHIFT_B)-x[61]; /* Im C - Im D */
|
|
vi2 = (x[26] + x[58]) SHIFT_A; /* Re C + Re D */
|
ui2 = (x[27] + x[59]) SHIFT_A; /* Im C + Im D */
|
|
x[26] = ui + vr2; /* Re B' = Im C - Im D + Re A - Re B */
|
x[27] = ur2 - vi; /* Im B'= -Re C + Re D + Im A - Im B */
|
|
vi3 = (x[30] + x[62]) SHIFT_A; /* Re C + Re D */
|
ui3 = (x[31] + x[63]) SHIFT_A; /* Im C + Im D */
|
|
x[30] = vr2 - ui; /* Re D' = -Im C + Im D + Re A - Re B */
|
x[31] = vi + ur2; /* Im D'= Re C - Re D + Im A - Im B */
|
|
// i=40
|
|
x[40] = vr3 + (vi2 SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
|
x[44] = vr3 - (vi2 SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
|
x[41] = ur3 + (ui2 SHIFT_B); /* Im A' = sum of imag values */
|
x[45] = ur3 - (ui2 SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
|
|
vr3 -= x[42]; /* Re A - Re B */
|
ur3 -= x[43]; /* Im A - Im B */
|
vi2 = (vi2 SHIFT_B)-x[58]; /* Re C - Re D */
|
ui2 = (ui2 SHIFT_B)-x[59]; /* Im C - Im D */
|
|
x[42] = ui2 + vr3; /* Re B' = Im C - Im D + Re A - Re B */
|
x[43] = ur3 - vi2; /* Im B'= -Re C + Re D + Im A - Im B */
|
|
// i=56
|
|
x[56] = vr4 + (vi3 SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
|
x[60] = vr4 - (vi3 SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
|
x[57] = ur4 + (ui3 SHIFT_B); /* Im A' = sum of imag values */
|
x[61] = ur4 - (ui3 SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
|
|
vr4 -= x[46]; /* Re A - Re B */
|
ur4 -= x[47]; /* Im A - Im B */
|
|
x[46] = vr3 - ui2; /* Re D' = -Im C + Im D + Re A - Re B */
|
x[47] = vi2 + ur3; /* Im D'= Re C - Re D + Im A - Im B */
|
|
vi3 = (vi3 SHIFT_B)-x[62]; /* Re C - Re D */
|
ui3 = (ui3 SHIFT_B)-x[63]; /* Im C - Im D */
|
|
x[58] = ui3 + vr4; /* Re B' = Im C - Im D + Re A - Re B */
|
x[62] = vr4 - ui3; /* Re D' = -Im C + Im D + Re A - Re B */
|
x[59] = ur4 - vi3; /* Im B'= -Re C + Re D + Im A - Im B */
|
x[63] = vi3 + ur4; /* Im D'= Re C - Re D + Im A - Im B */
|
}
|
|
{
|
FIXP_DBL *xt = _x;
|
|
int j = 4;
|
do {
|
FIXP_DBL vi, ui, vr, ur;
|
|
vr = xt[8];
|
vi = xt[9];
|
ur = xt[0] >> 1;
|
ui = xt[1] >> 1;
|
xt[0] = ur + (vr >> 1);
|
xt[1] = ui + (vi >> 1);
|
xt[8] = ur - (vr >> 1);
|
xt[9] = ui - (vi >> 1);
|
|
vr = xt[13];
|
vi = xt[12];
|
ur = xt[4] >> 1;
|
ui = xt[5] >> 1;
|
xt[4] = ur + (vr >> 1);
|
xt[5] = ui - (vi >> 1);
|
xt[12] = ur - (vr >> 1);
|
xt[13] = ui + (vi >> 1);
|
|
SUMDIFF_PIFOURTH(vi, vr, xt[10], xt[11])
|
ur = xt[2];
|
ui = xt[3];
|
xt[2] = (ur >> 1) + vr;
|
xt[3] = (ui >> 1) + vi;
|
xt[10] = (ur >> 1) - vr;
|
xt[11] = (ui >> 1) - vi;
|
|
SUMDIFF_PIFOURTH(vr, vi, xt[14], xt[15])
|
ur = xt[6];
|
ui = xt[7];
|
|
xt[6] = (ur >> 1) + vr;
|
xt[7] = (ui >> 1) - vi;
|
xt[14] = (ur >> 1) - vr;
|
xt[15] = (ui >> 1) + vi;
|
xt += 16;
|
} while (--j != 0);
|
}
|
|
{
|
FIXP_DBL *const x = _x;
|
FIXP_DBL vi, ui, vr, ur;
|
|
vr = x[16];
|
vi = x[17];
|
ur = x[0] >> 1;
|
ui = x[1] >> 1;
|
x[0] = ur + (vr >> 1);
|
x[1] = ui + (vi >> 1);
|
x[16] = ur - (vr >> 1);
|
x[17] = ui - (vi >> 1);
|
|
vi = x[24];
|
vr = x[25];
|
ur = x[8] >> 1;
|
ui = x[9] >> 1;
|
x[8] = ur + (vr >> 1);
|
x[9] = ui - (vi >> 1);
|
x[24] = ur - (vr >> 1);
|
x[25] = ui + (vi >> 1);
|
|
vr = x[48];
|
vi = x[49];
|
ur = x[32] >> 1;
|
ui = x[33] >> 1;
|
x[32] = ur + (vr >> 1);
|
x[33] = ui + (vi >> 1);
|
x[48] = ur - (vr >> 1);
|
x[49] = ui - (vi >> 1);
|
|
vi = x[56];
|
vr = x[57];
|
ur = x[40] >> 1;
|
ui = x[41] >> 1;
|
x[40] = ur + (vr >> 1);
|
x[41] = ui - (vi >> 1);
|
x[56] = ur - (vr >> 1);
|
x[57] = ui + (vi >> 1);
|
|
cplxMultDiv2(&vi, &vr, x[19], x[18], fft32_w32[0]);
|
ur = x[2];
|
ui = x[3];
|
x[2] = (ur >> 1) + vr;
|
x[3] = (ui >> 1) + vi;
|
x[18] = (ur >> 1) - vr;
|
x[19] = (ui >> 1) - vi;
|
|
cplxMultDiv2(&vr, &vi, x[27], x[26], fft32_w32[0]);
|
ur = x[10];
|
ui = x[11];
|
x[10] = (ur >> 1) + vr;
|
x[11] = (ui >> 1) - vi;
|
x[26] = (ur >> 1) - vr;
|
x[27] = (ui >> 1) + vi;
|
|
cplxMultDiv2(&vi, &vr, x[51], x[50], fft32_w32[0]);
|
ur = x[34];
|
ui = x[35];
|
x[34] = (ur >> 1) + vr;
|
x[35] = (ui >> 1) + vi;
|
x[50] = (ur >> 1) - vr;
|
x[51] = (ui >> 1) - vi;
|
|
cplxMultDiv2(&vr, &vi, x[59], x[58], fft32_w32[0]);
|
ur = x[42];
|
ui = x[43];
|
x[42] = (ur >> 1) + vr;
|
x[43] = (ui >> 1) - vi;
|
x[58] = (ur >> 1) - vr;
|
x[59] = (ui >> 1) + vi;
|
|
SUMDIFF_PIFOURTH(vi, vr, x[20], x[21])
|
ur = x[4];
|
ui = x[5];
|
x[4] = (ur >> 1) + vr;
|
x[5] = (ui >> 1) + vi;
|
x[20] = (ur >> 1) - vr;
|
x[21] = (ui >> 1) - vi;
|
|
SUMDIFF_PIFOURTH(vr, vi, x[28], x[29])
|
ur = x[12];
|
ui = x[13];
|
x[12] = (ur >> 1) + vr;
|
x[13] = (ui >> 1) - vi;
|
x[28] = (ur >> 1) - vr;
|
x[29] = (ui >> 1) + vi;
|
|
SUMDIFF_PIFOURTH(vi, vr, x[52], x[53])
|
ur = x[36];
|
ui = x[37];
|
x[36] = (ur >> 1) + vr;
|
x[37] = (ui >> 1) + vi;
|
x[52] = (ur >> 1) - vr;
|
x[53] = (ui >> 1) - vi;
|
|
SUMDIFF_PIFOURTH(vr, vi, x[60], x[61])
|
ur = x[44];
|
ui = x[45];
|
x[44] = (ur >> 1) + vr;
|
x[45] = (ui >> 1) - vi;
|
x[60] = (ur >> 1) - vr;
|
x[61] = (ui >> 1) + vi;
|
|
cplxMultDiv2(&vi, &vr, x[23], x[22], fft32_w32[1]);
|
ur = x[6];
|
ui = x[7];
|
x[6] = (ur >> 1) + vr;
|
x[7] = (ui >> 1) + vi;
|
x[22] = (ur >> 1) - vr;
|
x[23] = (ui >> 1) - vi;
|
|
cplxMultDiv2(&vr, &vi, x[31], x[30], fft32_w32[1]);
|
ur = x[14];
|
ui = x[15];
|
x[14] = (ur >> 1) + vr;
|
x[15] = (ui >> 1) - vi;
|
x[30] = (ur >> 1) - vr;
|
x[31] = (ui >> 1) + vi;
|
|
cplxMultDiv2(&vi, &vr, x[55], x[54], fft32_w32[1]);
|
ur = x[38];
|
ui = x[39];
|
x[38] = (ur >> 1) + vr;
|
x[39] = (ui >> 1) + vi;
|
x[54] = (ur >> 1) - vr;
|
x[55] = (ui >> 1) - vi;
|
|
cplxMultDiv2(&vr, &vi, x[63], x[62], fft32_w32[1]);
|
ur = x[46];
|
ui = x[47];
|
|
x[46] = (ur >> 1) + vr;
|
x[47] = (ui >> 1) - vi;
|
x[62] = (ur >> 1) - vr;
|
x[63] = (ui >> 1) + vi;
|
|
vr = x[32];
|
vi = x[33];
|
ur = x[0] >> 1;
|
ui = x[1] >> 1;
|
x[0] = ur + (vr >> 1);
|
x[1] = ui + (vi >> 1);
|
x[32] = ur - (vr >> 1);
|
x[33] = ui - (vi >> 1);
|
|
vi = x[48];
|
vr = x[49];
|
ur = x[16] >> 1;
|
ui = x[17] >> 1;
|
x[16] = ur + (vr >> 1);
|
x[17] = ui - (vi >> 1);
|
x[48] = ur - (vr >> 1);
|
x[49] = ui + (vi >> 1);
|
|
cplxMultDiv2(&vi, &vr, x[35], x[34], fft32_w32[2]);
|
ur = x[2];
|
ui = x[3];
|
x[2] = (ur >> 1) + vr;
|
x[3] = (ui >> 1) + vi;
|
x[34] = (ur >> 1) - vr;
|
x[35] = (ui >> 1) - vi;
|
|
cplxMultDiv2(&vr, &vi, x[51], x[50], fft32_w32[2]);
|
ur = x[18];
|
ui = x[19];
|
x[18] = (ur >> 1) + vr;
|
x[19] = (ui >> 1) - vi;
|
x[50] = (ur >> 1) - vr;
|
x[51] = (ui >> 1) + vi;
|
|
cplxMultDiv2(&vi, &vr, x[37], x[36], fft32_w32[0]);
|
ur = x[4];
|
ui = x[5];
|
x[4] = (ur >> 1) + vr;
|
x[5] = (ui >> 1) + vi;
|
x[36] = (ur >> 1) - vr;
|
x[37] = (ui >> 1) - vi;
|
|
cplxMultDiv2(&vr, &vi, x[53], x[52], fft32_w32[0]);
|
ur = x[20];
|
ui = x[21];
|
x[20] = (ur >> 1) + vr;
|
x[21] = (ui >> 1) - vi;
|
x[52] = (ur >> 1) - vr;
|
x[53] = (ui >> 1) + vi;
|
|
cplxMultDiv2(&vi, &vr, x[39], x[38], fft32_w32[3]);
|
ur = x[6];
|
ui = x[7];
|
x[6] = (ur >> 1) + vr;
|
x[7] = (ui >> 1) + vi;
|
x[38] = (ur >> 1) - vr;
|
x[39] = (ui >> 1) - vi;
|
|
cplxMultDiv2(&vr, &vi, x[55], x[54], fft32_w32[3]);
|
ur = x[22];
|
ui = x[23];
|
x[22] = (ur >> 1) + vr;
|
x[23] = (ui >> 1) - vi;
|
x[54] = (ur >> 1) - vr;
|
x[55] = (ui >> 1) + vi;
|
|
SUMDIFF_PIFOURTH(vi, vr, x[40], x[41])
|
ur = x[8];
|
ui = x[9];
|
x[8] = (ur >> 1) + vr;
|
x[9] = (ui >> 1) + vi;
|
x[40] = (ur >> 1) - vr;
|
x[41] = (ui >> 1) - vi;
|
|
SUMDIFF_PIFOURTH(vr, vi, x[56], x[57])
|
ur = x[24];
|
ui = x[25];
|
x[24] = (ur >> 1) + vr;
|
x[25] = (ui >> 1) - vi;
|
x[56] = (ur >> 1) - vr;
|
x[57] = (ui >> 1) + vi;
|
|
cplxMultDiv2(&vi, &vr, x[43], x[42], fft32_w32[4]);
|
ur = x[10];
|
ui = x[11];
|
|
x[10] = (ur >> 1) + vr;
|
x[11] = (ui >> 1) + vi;
|
x[42] = (ur >> 1) - vr;
|
x[43] = (ui >> 1) - vi;
|
|
cplxMultDiv2(&vr, &vi, x[59], x[58], fft32_w32[4]);
|
ur = x[26];
|
ui = x[27];
|
x[26] = (ur >> 1) + vr;
|
x[27] = (ui >> 1) - vi;
|
x[58] = (ur >> 1) - vr;
|
x[59] = (ui >> 1) + vi;
|
|
cplxMultDiv2(&vi, &vr, x[45], x[44], fft32_w32[1]);
|
ur = x[12];
|
ui = x[13];
|
x[12] = (ur >> 1) + vr;
|
x[13] = (ui >> 1) + vi;
|
x[44] = (ur >> 1) - vr;
|
x[45] = (ui >> 1) - vi;
|
|
cplxMultDiv2(&vr, &vi, x[61], x[60], fft32_w32[1]);
|
ur = x[28];
|
ui = x[29];
|
x[28] = (ur >> 1) + vr;
|
x[29] = (ui >> 1) - vi;
|
x[60] = (ur >> 1) - vr;
|
x[61] = (ui >> 1) + vi;
|
|
cplxMultDiv2(&vi, &vr, x[47], x[46], fft32_w32[5]);
|
ur = x[14];
|
ui = x[15];
|
x[14] = (ur >> 1) + vr;
|
x[15] = (ui >> 1) + vi;
|
x[46] = (ur >> 1) - vr;
|
x[47] = (ui >> 1) - vi;
|
|
cplxMultDiv2(&vr, &vi, x[63], x[62], fft32_w32[5]);
|
ur = x[30];
|
ui = x[31];
|
x[30] = (ur >> 1) + vr;
|
x[31] = (ui >> 1) - vi;
|
x[62] = (ur >> 1) - vr;
|
x[63] = (ui >> 1) + vi;
|
}
|
}
|
#endif /* #ifndef FUNCTION_fft_32 */
|
|
/**
|
* \brief Apply rotation vectors to a data buffer.
|
* \param cl length of each row of input data.
|
* \param l total length of input data.
|
* \param pVecRe real part of rotation coefficient vector.
|
* \param pVecIm imaginary part of rotation coefficient vector.
|
*/
|
|
/*
|
This defines patches each inaccurate 0x7FFF i.e. 0.9999 and uses 0x8000
|
(-1.0) instead. At the end, the sign of the result is inverted
|
*/
|
#define noFFT_APPLY_ROT_VECTOR_HQ
|
|
#ifndef FUNCTION_fft_apply_rot_vector__FIXP_DBL
|
static inline void fft_apply_rot_vector(FIXP_DBL *RESTRICT pData, const int cl,
|
const int l, const FIXP_STB *pVecRe,
|
const FIXP_STB *pVecIm) {
|
FIXP_DBL re, im;
|
FIXP_STB vre, vim;
|
|
int i, c;
|
|
for (i = 0; i < cl; i++) {
|
re = pData[2 * i];
|
im = pData[2 * i + 1];
|
|
pData[2 * i] = re >> 2; /* * 0.25 */
|
pData[2 * i + 1] = im >> 2; /* * 0.25 */
|
}
|
for (; i < l; i += cl) {
|
re = pData[2 * i];
|
im = pData[2 * i + 1];
|
|
pData[2 * i] = re >> 2; /* * 0.25 */
|
pData[2 * i + 1] = im >> 2; /* * 0.25 */
|
|
for (c = i + 1; c < i + cl; c++) {
|
re = pData[2 * c] >> 1;
|
im = pData[2 * c + 1] >> 1;
|
vre = *pVecRe++;
|
vim = *pVecIm++;
|
|
cplxMultDiv2(&pData[2 * c + 1], &pData[2 * c], im, re, vre, vim);
|
}
|
}
|
}
|
#endif /* FUNCTION_fft_apply_rot_vector__FIXP_DBL */
|
|
/* select either switch case of function pointer. */
|
//#define FFT_TWO_STAGE_SWITCH_CASE
|
#ifndef FUNCTION_fftN2_func
|
static inline void fftN2_func(FIXP_DBL *pInput, const int length,
|
const int dim1, const int dim2,
|
void (*const fft1)(FIXP_DBL *),
|
void (*const fft2)(FIXP_DBL *),
|
const FIXP_STB *RotVectorReal,
|
const FIXP_STB *RotVectorImag, FIXP_DBL *aDst,
|
FIXP_DBL *aDst2) {
|
/* The real part of the input samples are at the addresses with even indices
|
and the imaginary part of the input samples are at the addresses with odd
|
indices. The output samples are stored at the address of pInput
|
*/
|
FIXP_DBL *pSrc, *pDst, *pDstOut;
|
int i;
|
|
FDK_ASSERT(length == dim1 * dim2);
|
|
/* Perform dim2 times the fft of length dim1. The input samples are at the
|
address of pSrc and the output samples are at the address of pDst. The input
|
vector for the fft of length dim1 is built of the interleaved samples in pSrc,
|
the output samples are stored consecutively.
|
*/
|
pSrc = pInput;
|
pDst = aDst;
|
for (i = 0; i < dim2; i++) {
|
for (int j = 0; j < dim1; j++) {
|
pDst[2 * j] = pSrc[2 * j * dim2];
|
pDst[2 * j + 1] = pSrc[2 * j * dim2 + 1];
|
}
|
|
/* fft of size dim1 */
|
#ifndef FFT_TWO_STAGE_SWITCH_CASE
|
fft1(pDst);
|
#else
|
switch (dim1) {
|
case 2:
|
fft2(pDst);
|
break;
|
case 3:
|
fft3(pDst);
|
break;
|
case 4:
|
fft_4(pDst);
|
break;
|
/* case 5: fft5(pDst); break; */
|
/* case 8: fft_8(pDst); break; */
|
case 12:
|
fft12(pDst);
|
break;
|
/* case 15: fft15(pDst); break; */
|
case 16:
|
fft_16(pDst);
|
break;
|
case 32:
|
fft_32(pDst);
|
break;
|
/*case 64: fft_64(pDst); break;*/
|
/* case 128: fft_128(pDst); break; */
|
}
|
#endif
|
pSrc += 2;
|
pDst = pDst + 2 * dim1;
|
}
|
|
/* Perform the modulation of the output of the fft of length dim1 */
|
pSrc = aDst;
|
fft_apply_rot_vector(pSrc, dim1, length, RotVectorReal, RotVectorImag);
|
|
/* Perform dim1 times the fft of length dim2. The input samples are at the
|
address of aDst and the output samples are at the address of pInput. The input
|
vector for the fft of length dim2 is built of the interleaved samples in aDst,
|
the output samples are stored consecutively at the address of pInput.
|
*/
|
pSrc = aDst;
|
pDst = aDst2;
|
pDstOut = pInput;
|
for (i = 0; i < dim1; i++) {
|
for (int j = 0; j < dim2; j++) {
|
pDst[2 * j] = pSrc[2 * j * dim1];
|
pDst[2 * j + 1] = pSrc[2 * j * dim1 + 1];
|
}
|
|
#ifndef FFT_TWO_STAGE_SWITCH_CASE
|
fft2(pDst);
|
#else
|
switch (dim2) {
|
case 4:
|
fft_4(pDst);
|
break;
|
case 9:
|
fft9(pDst);
|
break;
|
case 12:
|
fft12(pDst);
|
break;
|
case 15:
|
fft15(pDst);
|
break;
|
case 16:
|
fft_16(pDst);
|
break;
|
case 32:
|
fft_32(pDst);
|
break;
|
}
|
#endif
|
|
for (int j = 0; j < dim2; j++) {
|
pDstOut[2 * j * dim1] = pDst[2 * j];
|
pDstOut[2 * j * dim1 + 1] = pDst[2 * j + 1];
|
}
|
pSrc += 2;
|
pDstOut += 2;
|
}
|
}
|
#endif /* FUNCTION_fftN2_function */
|
|
#define fftN2(DATA_TYPE, pInput, length, dim1, dim2, fft_func1, fft_func2, \
|
RotVectorReal, RotVectorImag) \
|
{ \
|
C_AALLOC_SCRATCH_START(aDst, DATA_TYPE, 2 * length) \
|
C_AALLOC_SCRATCH_START(aDst2, DATA_TYPE, 2 * dim2) \
|
fftN2_func(pInput, length, dim1, dim2, fft_func1, fft_func2, \
|
RotVectorReal, RotVectorImag, aDst, aDst2); \
|
C_AALLOC_SCRATCH_END(aDst2, DATA_TYPE, 2 * dim2) \
|
C_AALLOC_SCRATCH_END(aDst, DATA_TYPE, 2 * length) \
|
}
|
|
/*!
|
*
|
* \brief complex FFT of length 12,18,24,30,48,60,96, 192, 240, 384, 480
|
* \param pInput contains the input signal prescaled right by 2
|
* pInput contains the output signal scaled by SCALEFACTOR<#length>
|
* The output signal does not have any fixed headroom
|
* \return void
|
*
|
*/
|
|
#ifndef FUNCTION_fft6
|
static inline void fft6(FIXP_DBL *pInput) {
|
fftN2(FIXP_DBL, pInput, 6, 2, 3, fft2, fft3, RotVectorReal6, RotVectorImag6);
|
}
|
#endif /* #ifndef FUNCTION_fft6 */
|
|
#ifndef FUNCTION_fft12
|
static inline void fft12(FIXP_DBL *pInput) {
|
fftN2(FIXP_DBL, pInput, 12, 3, 4, fft3, fft_4, RotVectorReal12,
|
RotVectorImag12); /* 16,58 */
|
}
|
#endif /* #ifndef FUNCTION_fft12 */
|
|
#ifndef FUNCTION_fft20
|
static inline void fft20(FIXP_DBL *pInput) {
|
fftN2(FIXP_DBL, pInput, 20, 4, 5, fft_4, fft5, RotVectorReal20,
|
RotVectorImag20);
|
}
|
#endif /* FUNCTION_fft20 */
|
|
static inline void fft24(FIXP_DBL *pInput) {
|
fftN2(FIXP_DBL, pInput, 24, 2, 12, fft2, fft12, RotVectorReal24,
|
RotVectorImag24); /* 16,73 */
|
}
|
|
static inline void fft48(FIXP_DBL *pInput) {
|
fftN2(FIXP_DBL, pInput, 48, 4, 12, fft_4, fft12, RotVectorReal48,
|
RotVectorImag48); /* 16,32 */
|
}
|
|
#ifndef FUNCTION_fft60
|
static inline void fft60(FIXP_DBL *pInput) {
|
fftN2(FIXP_DBL, pInput, 60, 4, 15, fft_4, fft15, RotVectorReal60,
|
RotVectorImag60); /* 15,51 */
|
}
|
#endif /* FUNCTION_fft60 */
|
|
#ifndef FUNCTION_fft80
|
static inline void fft80(FIXP_DBL *pInput) {
|
fftN2(FIXP_DBL, pInput, 80, 5, 16, fft5, fft_16, RotVectorReal80,
|
RotVectorImag80); /* */
|
}
|
#endif
|
|
#ifndef FUNCTION_fft96
|
static inline void fft96(FIXP_DBL *pInput) {
|
fftN2(FIXP_DBL, pInput, 96, 3, 32, fft3, fft_32, RotVectorReal96,
|
RotVectorImag96); /* 15,47 */
|
}
|
#endif /* FUNCTION_fft96*/
|
|
#ifndef FUNCTION_fft120
|
static inline void fft120(FIXP_DBL *pInput) {
|
fftN2(FIXP_DBL, pInput, 120, 8, 15, fft_8, fft15, RotVectorReal120,
|
RotVectorImag120);
|
}
|
#endif /* FUNCTION_fft120 */
|
|
#ifndef FUNCTION_fft192
|
static inline void fft192(FIXP_DBL *pInput) {
|
fftN2(FIXP_DBL, pInput, 192, 16, 12, fft_16, fft12, RotVectorReal192,
|
RotVectorImag192); /* 15,50 */
|
}
|
#endif
|
|
#ifndef FUNCTION_fft240
|
static inline void fft240(FIXP_DBL *pInput) {
|
fftN2(FIXP_DBL, pInput, 240, 16, 15, fft_16, fft15, RotVectorReal240,
|
RotVectorImag240); /* 15.44 */
|
}
|
#endif
|
|
#ifndef FUNCTION_fft384
|
static inline void fft384(FIXP_DBL *pInput) {
|
fftN2(FIXP_DBL, pInput, 384, 12, 32, fft12, fft_32, RotVectorReal384,
|
RotVectorImag384); /* 16.02 */
|
}
|
#endif /* FUNCTION_fft384 */
|
|
#ifndef FUNCTION_fft480
|
static inline void fft480(FIXP_DBL *pInput) {
|
fftN2(FIXP_DBL, pInput, 480, 32, 15, fft_32, fft15, RotVectorReal480,
|
RotVectorImag480); /* 15.84 */
|
}
|
#endif /* FUNCTION_fft480 */
|
|
void fft(int length, FIXP_DBL *pInput, INT *pScalefactor) {
|
/* Ensure, that the io-ptr is always (at least 8-byte) aligned */
|
C_ALLOC_ALIGNED_CHECK(pInput);
|
|
if (length == 32) {
|
fft_32(pInput);
|
*pScalefactor += SCALEFACTOR32;
|
} else {
|
switch (length) {
|
case 16:
|
fft_16(pInput);
|
*pScalefactor += SCALEFACTOR16;
|
break;
|
case 8:
|
fft_8(pInput);
|
*pScalefactor += SCALEFACTOR8;
|
break;
|
case 2:
|
fft2(pInput);
|
*pScalefactor += SCALEFACTOR2;
|
break;
|
case 3:
|
fft3(pInput);
|
*pScalefactor += SCALEFACTOR3;
|
break;
|
case 4:
|
fft_4(pInput);
|
*pScalefactor += SCALEFACTOR4;
|
break;
|
case 5:
|
fft5(pInput);
|
*pScalefactor += SCALEFACTOR5;
|
break;
|
case 6:
|
fft6(pInput);
|
*pScalefactor += SCALEFACTOR6;
|
break;
|
case 10:
|
fft10(pInput);
|
*pScalefactor += SCALEFACTOR10;
|
break;
|
case 12:
|
fft12(pInput);
|
*pScalefactor += SCALEFACTOR12;
|
break;
|
case 15:
|
fft15(pInput);
|
*pScalefactor += SCALEFACTOR15;
|
break;
|
case 20:
|
fft20(pInput);
|
*pScalefactor += SCALEFACTOR20;
|
break;
|
case 24:
|
fft24(pInput);
|
*pScalefactor += SCALEFACTOR24;
|
break;
|
case 48:
|
fft48(pInput);
|
*pScalefactor += SCALEFACTOR48;
|
break;
|
case 60:
|
fft60(pInput);
|
*pScalefactor += SCALEFACTOR60;
|
break;
|
case 64:
|
dit_fft(pInput, 6, SineTable512, 512);
|
*pScalefactor += SCALEFACTOR64;
|
break;
|
case 80:
|
fft80(pInput);
|
*pScalefactor += SCALEFACTOR80;
|
break;
|
case 96:
|
fft96(pInput);
|
*pScalefactor += SCALEFACTOR96;
|
break;
|
case 120:
|
fft120(pInput);
|
*pScalefactor += SCALEFACTOR120;
|
break;
|
case 128:
|
dit_fft(pInput, 7, SineTable512, 512);
|
*pScalefactor += SCALEFACTOR128;
|
break;
|
case 192:
|
fft192(pInput);
|
*pScalefactor += SCALEFACTOR192;
|
break;
|
case 240:
|
fft240(pInput);
|
*pScalefactor += SCALEFACTOR240;
|
break;
|
case 256:
|
dit_fft(pInput, 8, SineTable512, 512);
|
*pScalefactor += SCALEFACTOR256;
|
break;
|
case 384:
|
fft384(pInput);
|
*pScalefactor += SCALEFACTOR384;
|
break;
|
case 480:
|
fft480(pInput);
|
*pScalefactor += SCALEFACTOR480;
|
break;
|
case 512:
|
dit_fft(pInput, 9, SineTable512, 512);
|
*pScalefactor += SCALEFACTOR512;
|
break;
|
default:
|
FDK_ASSERT(0); /* FFT length not supported! */
|
break;
|
}
|
}
|
}
|
|
void ifft(int length, FIXP_DBL *pInput, INT *scalefactor) {
|
switch (length) {
|
default:
|
FDK_ASSERT(0); /* IFFT length not supported! */
|
break;
|
}
|
}
|