/*
|
* rredf.h - trigonometric range reduction function written new for RVCT 4.1
|
*
|
* Copyright (c) 2009-2018, Arm Limited.
|
* SPDX-License-Identifier: MIT
|
*/
|
|
/*
|
* This header file defines an inline function which all three of
|
* the single-precision trig functions (sinf, cosf, tanf) should use
|
* to perform range reduction. The inline function handles the
|
* quickest and most common cases inline, before handing off to an
|
* out-of-line function defined in rredf.c for everything else. Thus
|
* a reasonable compromise is struck between speed and space. (I
|
* hope.) In particular, this approach avoids a function call
|
* overhead in the common case.
|
*/
|
|
#ifndef _included_rredf_h
|
#define _included_rredf_h
|
|
#include "math_private.h"
|
|
#ifdef __cplusplus
|
extern "C" {
|
#endif /* __cplusplus */
|
|
extern float __mathlib_rredf2(float x, int *q, unsigned k);
|
|
/*
|
* Semantics of the function:
|
* - x is the single-precision input value provided by the user
|
* - the return value is in the range [-pi/4,pi/4], and is equal
|
* (within reasonable accuracy bounds) to x minus n*pi/2 for some
|
* integer n. (FIXME: perhaps some slippage on the output
|
* interval is acceptable, requiring more range from the
|
* following polynomial approximations but permitting more
|
* approximate rred decisions?)
|
* - *q is set to a positive value whose low two bits match those
|
* of n. Alternatively, it comes back as -1 indicating that the
|
* input value was trivial in some way (infinity, NaN, or so
|
* small that we can safely return sin(x)=tan(x)=x,cos(x)=1).
|
*/
|
static __inline float __mathlib_rredf(float x, int *q)
|
{
|
/*
|
* First, extract the bit pattern of x as an integer, so that we
|
* can repeatedly compare things to it without multiple
|
* overheads in retrieving comparison results from the VFP.
|
*/
|
unsigned k = fai(x);
|
|
/*
|
* Deal immediately with the simplest possible case, in which x
|
* is already within the interval [-pi/4,pi/4]. This also
|
* identifies the subcase of ludicrously small x.
|
*/
|
if ((k << 1) < (0x3f490fdb << 1)) {
|
if ((k << 1) < (0x39800000 << 1))
|
*q = -1;
|
else
|
*q = 0;
|
return x;
|
}
|
|
/*
|
* The next plan is to multiply x by 2/pi and convert to an
|
* integer, which gives us n; then we subtract n*pi/2 from x to
|
* get our output value.
|
*
|
* By representing pi/2 in that final step by a prec-and-a-half
|
* approximation, we can arrange good accuracy for n strictly
|
* less than 2^13 (so that an FP representation of n has twelve
|
* zero bits at the bottom). So our threshold for this strategy
|
* is 2^13 * pi/2 - pi/4, otherwise known as 8191.75 * pi/2 or
|
* 4095.875*pi. (Or, for those perverse people interested in
|
* actual numbers rather than multiples of pi/2, about 12867.5.)
|
*/
|
if (__builtin_expect((k & 0x7fffffff) < 0x46490e49, 1)) {
|
float nf = 0.636619772367581343f * x;
|
/*
|
* The difference between that single-precision constant and
|
* the real 2/pi is about 2.568e-8. Hence the product nf has a
|
* potential error of 2.568e-8|x| even before rounding; since
|
* |x| < 4096 pi, that gives us an error bound of about
|
* 0.0003305.
|
*
|
* nf is then rounded to single precision, with a max error of
|
* 1/2 ULP, and since nf goes up to just under 8192, half a
|
* ULP could be as big as 2^-12 ~= 0.0002441.
|
*
|
* So by the time we convert nf to an integer, it could be off
|
* by that much, causing the wrong integer to be selected, and
|
* causing us to return a value a little bit outside the
|
* theoretical [-pi/4,+pi/4] output interval.
|
*
|
* How much outside? Well, we subtract nf*pi/2 from x, so the
|
* error bounds above have be be multiplied by pi/2. And if
|
* both of the above sources of error suffer their worst cases
|
* at once, then the very largest value we could return is
|
* obtained by adding that lot to the interval bound pi/4 to
|
* get
|
*
|
* pi/4 + ((2/pi - 0f_3f22f983)*4096*pi + 2^-12) * pi/2
|
*
|
* which comes to 0f_3f494b02. (Compare 0f_3f490fdb = pi/4.)
|
*
|
* So callers of this range reducer should be prepared to
|
* handle numbers up to that large.
|
*/
|
#ifdef __TARGET_FPU_SOFTVFP
|
nf = _frnd(nf);
|
#else
|
if (k & 0x80000000)
|
nf = (nf - 8388608.0f) + 8388608.0f;
|
else
|
nf = (nf + 8388608.0f) - 8388608.0f; /* round to _nearest_ integer. FIXME: use some sort of frnd in softfp */
|
#endif
|
*q = 3 & (int)nf;
|
#if 0
|
/*
|
* FIXME: now I need a bunch of special cases to avoid
|
* having to do the full four-word reduction every time.
|
* Also, adjust the comment at the top of this section!
|
*/
|
if (__builtin_expect((k & 0x7fffffff) < 0x46490e49, 1))
|
return ((x - nf * 0x1.92p+0F) - nf * 0x1.fb4p-12F) - nf * 0x1.4442d2p-24F;
|
else
|
#endif
|
return ((x - nf * 0x1.92p+0F) - nf * 0x1.fb4p-12F) - nf * 0x1.444p-24F - nf * 0x1.68c234p-39F;
|
}
|
|
/*
|
* That's enough to do in-line; if we're still playing, hand off
|
* to the out-of-line main range reducer.
|
*/
|
return __mathlib_rredf2(x, q, k);
|
}
|
|
#ifdef __cplusplus
|
} /* end of extern "C" */
|
#endif /* __cplusplus */
|
|
#endif /* included */
|
|
/* end of rredf.h */
|