Skip to content

Commit a7da702

Browse files
authored
[libc][math] Add small code size options for atan2f. (llvm#118532)
1 parent 7efd613 commit a7da702

File tree

3 files changed

+76
-15
lines changed

3 files changed

+76
-15
lines changed

libc/src/__support/macros/optimization.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ LIBC_INLINE constexpr bool expects_bool_condition(T value, T expected) {
4848

4949
#ifndef LIBC_MATH
5050
#define LIBC_MATH 0
51-
#else
51+
#endif // LIBC_MATH
5252

5353
#if (LIBC_MATH & LIBC_MATH_SKIP_ACCURATE_PASS)
5454
#define LIBC_MATH_HAS_SKIP_ACCURATE_PASS
@@ -58,6 +58,4 @@ LIBC_INLINE constexpr bool expects_bool_condition(T value, T expected) {
5858
#define LIBC_MATH_HAS_SMALL_TABLES
5959
#endif
6060

61-
#endif // LIBC_MATH
62-
6361
#endif // LLVM_LIBC_SRC___SUPPORT_MACROS_OPTIMIZATION_H

libc/src/math/generic/atan2f.cpp

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ namespace LIBC_NAMESPACE_DECL {
2121

2222
namespace {
2323

24+
#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
25+
2426
// Look up tables for accurate pass:
2527

2628
// atan(i/16) with i = 0..16, generated by Sollya with:
@@ -163,6 +165,8 @@ float atan2f_double_double(double num_d, double den_d, double q_d, int idx,
163165
return static_cast<float>(cpp::bit_cast<double>(rr_bits));
164166
}
165167

168+
#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
169+
166170
} // anonymous namespace
167171

168172
// There are several range reduction steps we can take for atan2(y, x) as
@@ -283,14 +287,24 @@ LLVM_LIBC_FUNCTION(float, atan2f, (float y, float x)) {
283287
fputil::DoubleDouble const_term = CONST_ADJ[x_sign][y_sign][recip];
284288
double q_d = num_d / den_d;
285289

286-
double k_d = fputil::nearest_integer(q_d * 0x1.0p4f);
290+
double k_d = fputil::nearest_integer(q_d * 0x1.0p4);
287291
int idx = static_cast<int>(k_d);
292+
double r;
293+
294+
#ifdef LIBC_MATH_HAS_SMALL_TABLES
295+
double p = atan_eval_no_table(num_d, den_d, k_d * 0x1.0p-4);
296+
r = final_sign * (p + (const_term.hi + ATAN_K_OVER_16[idx]));
297+
#else
288298
q_d = fputil::multiply_add(k_d, -0x1.0p-4, q_d);
289299

290300
double p = atan_eval(q_d, idx);
291-
double r = final_sign *
292-
fputil::multiply_add(q_d, p, const_term.hi + ATAN_COEFFS[idx][0]);
301+
r = final_sign *
302+
fputil::multiply_add(q_d, p, const_term.hi + ATAN_COEFFS[idx][0]);
303+
#endif // LIBC_MATH_HAS_SMALL_TABLES
293304

305+
#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
306+
return static_cast<float>(r);
307+
#else
294308
constexpr uint32_t LOWER_ERR = 4;
295309
// Mask sticky bits in double precision before rounding to single precision.
296310
constexpr uint32_t MASK =
@@ -306,6 +320,7 @@ LLVM_LIBC_FUNCTION(float, atan2f, (float y, float x)) {
306320

307321
return atan2f_double_double(num_d, den_d, q_d, idx, k_d, final_sign,
308322
const_term);
323+
#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
309324
}
310325

311326
} // namespace LIBC_NAMESPACE_DECL

libc/src/math/generic/inv_trigf_utils.h

Lines changed: 57 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,35 @@
1717
namespace LIBC_NAMESPACE_DECL {
1818

1919
// PI and PI / 2
20-
constexpr double M_MATH_PI = 0x1.921fb54442d18p+1;
21-
constexpr double M_MATH_PI_2 = 0x1.921fb54442d18p+0;
20+
static constexpr double M_MATH_PI = 0x1.921fb54442d18p+1;
21+
static constexpr double M_MATH_PI_2 = 0x1.921fb54442d18p+0;
2222

2323
extern double ATAN_COEFFS[17][9];
2424

25+
// Look-up table for atan(k/16) with k = 0..16.
26+
static constexpr double ATAN_K_OVER_16[17] = {
27+
0.0,
28+
0x1.ff55bb72cfdeap-5,
29+
0x1.fd5ba9aac2f6ep-4,
30+
0x1.7b97b4bce5b02p-3,
31+
0x1.f5b75f92c80ddp-3,
32+
0x1.362773707ebccp-2,
33+
0x1.6f61941e4def1p-2,
34+
0x1.a64eec3cc23fdp-2,
35+
0x1.dac670561bb4fp-2,
36+
0x1.0657e94db30dp-1,
37+
0x1.1e00babdefeb4p-1,
38+
0x1.345f01cce37bbp-1,
39+
0x1.4978fa3269ee1p-1,
40+
0x1.5d58987169b18p-1,
41+
0x1.700a7c5784634p-1,
42+
0x1.819d0b7158a4dp-1,
43+
0x1.921fb54442d18p-1,
44+
};
45+
2546
// For |x| <= 1/32 and 0 <= i <= 16, return Q(x) such that:
2647
// Q(x) ~ (atan(x + i/16) - atan(i/16)) / x.
27-
LIBC_INLINE double atan_eval(double x, int i) {
48+
LIBC_INLINE static double atan_eval(double x, unsigned i) {
2849
double x2 = x * x;
2950

3051
double c0 = fputil::multiply_add(x, ATAN_COEFFS[i][2], ATAN_COEFFS[i][1]);
@@ -39,16 +60,43 @@ LIBC_INLINE double atan_eval(double x, int i) {
3960
return p;
4061
}
4162

63+
// Evaluate atan without big lookup table.
64+
// atan(n/d) - atan(k/16) = atan((n/d - k/16) / (1 + (n/d) * (k/16)))
65+
// = atan((n - d * k/16)) / (d + n * k/16))
66+
// So we let q = (n - d * k/16) / (d + n * k/16),
67+
// and approximate with Taylor polynomial:
68+
// atan(q) ~ q - q^3/3 + q^5/5 - q^7/7 + q^9/9
69+
LIBC_INLINE static double atan_eval_no_table(double num, double den,
70+
double k_over_16) {
71+
double num_r = fputil::multiply_add(den, -k_over_16, num);
72+
double den_r = fputil::multiply_add(num, k_over_16, den);
73+
double q = num_r / den_r;
74+
75+
constexpr double ATAN_TAYLOR[] = {
76+
-0x1.5555555555555p-2,
77+
0x1.999999999999ap-3,
78+
-0x1.2492492492492p-3,
79+
0x1.c71c71c71c71cp-4,
80+
};
81+
double q2 = q * q;
82+
double q3 = q2 * q;
83+
double q4 = q2 * q2;
84+
double c0 = fputil::multiply_add(q2, ATAN_TAYLOR[1], ATAN_TAYLOR[0]);
85+
double c1 = fputil::multiply_add(q2, ATAN_TAYLOR[3], ATAN_TAYLOR[2]);
86+
double d = fputil::multiply_add(q4, c1, c0);
87+
return fputil::multiply_add(q3, d, q);
88+
}
89+
4290
// > Q = fpminimax(asin(x)/x, [|0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20|],
4391
// [|1, D...|], [0, 0.5]);
44-
constexpr double ASIN_COEFFS[10] = {0x1.5555555540fa1p-3, 0x1.333333512edc2p-4,
45-
0x1.6db6cc1541b31p-5, 0x1.f1caff324770ep-6,
46-
0x1.6e43899f5f4f4p-6, 0x1.1f847cf652577p-6,
47-
0x1.9b60f47f87146p-7, 0x1.259e2634c494fp-6,
48-
-0x1.df946fa875ddp-8, 0x1.02311ecf99c28p-5};
92+
static constexpr double ASIN_COEFFS[10] = {
93+
0x1.5555555540fa1p-3, 0x1.333333512edc2p-4, 0x1.6db6cc1541b31p-5,
94+
0x1.f1caff324770ep-6, 0x1.6e43899f5f4f4p-6, 0x1.1f847cf652577p-6,
95+
0x1.9b60f47f87146p-7, 0x1.259e2634c494fp-6, -0x1.df946fa875ddp-8,
96+
0x1.02311ecf99c28p-5};
4997

5098
// Evaluate P(x^2) - 1, where P(x^2) ~ asin(x)/x
51-
LIBC_INLINE double asin_eval(double xsq) {
99+
LIBC_INLINE static double asin_eval(double xsq) {
52100
double x4 = xsq * xsq;
53101
double r1 = fputil::polyeval(x4, ASIN_COEFFS[0], ASIN_COEFFS[2],
54102
ASIN_COEFFS[4], ASIN_COEFFS[6], ASIN_COEFFS[8]);

0 commit comments

Comments
 (0)