Skip to content

Commit cb1a5a7

Browse files
[SimplifyLibCalls] powf(x, sitofp(n)) -> powi(x, n)
Summary: Partially solves https://bugs.llvm.org/show_bug.cgi?id=42190 Reviewers: spatel, nikic, efriedma Reviewed By: efriedma Subscribers: efriedma, nikic, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D63038 llvm-svn: 364940
1 parent 893bbc9 commit cb1a5a7

File tree

3 files changed

+390
-88
lines changed

3 files changed

+390
-88
lines changed

llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp

Lines changed: 47 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1322,12 +1322,12 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilder<> &B) {
13221322
APFloat BaseR = APFloat(1.0);
13231323
BaseR.convert(BaseF->getSemantics(), APFloat::rmTowardZero, &Ignored);
13241324
BaseR = BaseR / *BaseF;
1325-
bool IsInteger = BaseF->isInteger(),
1326-
IsReciprocal = BaseR.isInteger();
1325+
bool IsInteger = BaseF->isInteger(), IsReciprocal = BaseR.isInteger();
13271326
const APFloat *NF = IsReciprocal ? &BaseR : BaseF;
13281327
APSInt NI(64, false);
13291328
if ((IsInteger || IsReciprocal) &&
1330-
!NF->convertToInteger(NI, APFloat::rmTowardZero, &Ignored) &&
1329+
NF->convertToInteger(NI, APFloat::rmTowardZero, &Ignored) ==
1330+
APFloat::opOK &&
13311331
NI > 1 && NI.isPowerOf2()) {
13321332
double N = NI.logBase2() * (IsReciprocal ? -1.0 : 1.0);
13331333
Value *FMul = B.CreateFMul(Expo, ConstantFP::get(Ty, N), "mul");
@@ -1410,12 +1410,22 @@ Value *LibCallSimplifier::replacePowWithSqrt(CallInst *Pow, IRBuilder<> &B) {
14101410
return Sqrt;
14111411
}
14121412

1413+
static Value *createPowWithIntegerExponent(Value *Base, Value *Expo, Module *M,
1414+
IRBuilder<> &B) {
1415+
Value *Args[] = {Base, Expo};
1416+
Function *F = Intrinsic::getDeclaration(M, Intrinsic::powi, Base->getType());
1417+
return B.CreateCall(F, Args);
1418+
}
1419+
14131420
Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) {
1414-
Value *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1);
1421+
Value *Base = Pow->getArgOperand(0);
1422+
Value *Expo = Pow->getArgOperand(1);
14151423
Function *Callee = Pow->getCalledFunction();
14161424
StringRef Name = Callee->getName();
14171425
Type *Ty = Pow->getType();
1426+
Module *M = Pow->getModule();
14181427
Value *Shrunk = nullptr;
1428+
bool AllowApprox = Pow->hasApproxFunc();
14191429
bool Ignored;
14201430

14211431
// Bail out if simplifying libcalls to pow() is disabled.
@@ -1428,8 +1438,8 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) {
14281438

14291439
// Shrink pow() to powf() if the arguments are single precision,
14301440
// unless the result is expected to be double precision.
1431-
if (UnsafeFPShrink &&
1432-
Name == TLI->getName(LibFunc_pow) && hasFloatVersion(Name))
1441+
if (UnsafeFPShrink && Name == TLI->getName(LibFunc_pow) &&
1442+
hasFloatVersion(Name))
14331443
Shrunk = optimizeBinaryDoubleFP(Pow, B, true);
14341444

14351445
// Evaluate special cases related to the base.
@@ -1438,6 +1448,21 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) {
14381448
if (match(Base, m_FPOne()))
14391449
return Base;
14401450

1451+
// powf(x, sitofp(e)) -> powi(x, e)
1452+
// powf(x, uitofp(e)) -> powi(x, e)
1453+
if (AllowApprox && (isa<SIToFPInst>(Expo) || isa<UIToFPInst>(Expo))) {
1454+
Value *IntExpo = cast<Instruction>(Expo)->getOperand(0);
1455+
Value *NewExpo = nullptr;
1456+
unsigned BitWidth = IntExpo->getType()->getPrimitiveSizeInBits();
1457+
if (isa<SIToFPInst>(Expo) && BitWidth == 32)
1458+
NewExpo = IntExpo;
1459+
else if (BitWidth < 32)
1460+
NewExpo = isa<SIToFPInst>(Expo) ? B.CreateSExt(IntExpo, B.getInt32Ty())
1461+
: B.CreateZExt(IntExpo, B.getInt32Ty());
1462+
if (NewExpo)
1463+
return createPowWithIntegerExponent(Base, NewExpo, M, B);
1464+
}
1465+
14411466
if (Value *Exp = replacePowWithExp(Pow, B))
14421467
return Exp;
14431468

@@ -1449,7 +1474,7 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) {
14491474

14501475
// pow(x, 0.0) -> 1.0
14511476
if (match(Expo, m_SpecificFP(0.0)))
1452-
return ConstantFP::get(Ty, 1.0);
1477+
return ConstantFP::get(Ty, 1.0);
14531478

14541479
// pow(x, 1.0) -> x
14551480
if (match(Expo, m_FPOne()))
@@ -1462,9 +1487,12 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) {
14621487
if (Value *Sqrt = replacePowWithSqrt(Pow, B))
14631488
return Sqrt;
14641489

1490+
if (!AllowApprox)
1491+
return Shrunk;
1492+
14651493
// pow(x, n) -> x * x * x * ...
14661494
const APFloat *ExpoF;
1467-
if (Pow->isFast() && match(Expo, m_APFloat(ExpoF))) {
1495+
if (match(Expo, m_APFloat(ExpoF))) {
14681496
// We limit to a max of 7 multiplications, thus the maximum exponent is 32.
14691497
// If the exponent is an integer+0.5 we generate a call to sqrt and an
14701498
// additional fmul.
@@ -1488,9 +1516,8 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) {
14881516
if (!Expo2.isInteger())
14891517
return nullptr;
14901518

1491-
Sqrt =
1492-
getSqrtCall(Base, Pow->getCalledFunction()->getAttributes(),
1493-
Pow->doesNotAccessMemory(), Pow->getModule(), B, TLI);
1519+
Sqrt = getSqrtCall(Base, Pow->getCalledFunction()->getAttributes(),
1520+
Pow->doesNotAccessMemory(), M, B, TLI);
14941521
}
14951522

14961523
// We will memoize intermediate products of the Addition Chain.
@@ -1513,6 +1540,14 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) {
15131540

15141541
return FMul;
15151542
}
1543+
1544+
APSInt IntExpo(32, /*isUnsigned=*/false);
1545+
// powf(x, C) -> powi(x, C) iff C is a constant signed integer value
1546+
if (ExpoF->convertToInteger(IntExpo, APFloat::rmTowardZero, &Ignored) ==
1547+
APFloat::opOK) {
1548+
return createPowWithIntegerExponent(
1549+
Base, ConstantInt::get(B.getInt32Ty(), IntExpo), M, B);
1550+
}
15161551
}
15171552

15181553
return Shrunk;
@@ -3101,4 +3136,4 @@ Value *FortifiedLibCallSimplifier::optimizeCall(CallInst *CI) {
31013136

31023137
FortifiedLibCallSimplifier::FortifiedLibCallSimplifier(
31033138
const TargetLibraryInfo *TLI, bool OnlyLowerUnknownSize)
3104-
: TLI(TLI), OnlyLowerUnknownSize(OnlyLowerUnknownSize) {}
3139+
: TLI(TLI), OnlyLowerUnknownSize(OnlyLowerUnknownSize) {}

llvm/test/Transforms/InstCombine/pow-4.ll

Lines changed: 38 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,9 @@ declare double @pow(double, double)
1111
; pow(x, 3.0)
1212
define double @test_simplify_3(double %x) {
1313
; CHECK-LABEL: @test_simplify_3(
14-
; CHECK-NEXT: [[TMP1:%.*]] = fmul fast double [[X:%.*]], [[X]]
15-
; CHECK-NEXT: [[TMP2:%.*]] = fmul fast double [[TMP1]], [[X]]
16-
; CHECK-NEXT: ret double [[TMP2]]
14+
; CHECK-NEXT: [[SQUARE:%.*]] = fmul fast double [[X:%.*]], [[X]]
15+
; CHECK-NEXT: [[TMP1:%.*]] = fmul fast double [[SQUARE]], [[X]]
16+
; CHECK-NEXT: ret double [[TMP1]]
1717
;
1818
%1 = call fast double @llvm.pow.f64(double %x, double 3.000000e+00)
1919
ret double %1
@@ -22,9 +22,9 @@ define double @test_simplify_3(double %x) {
2222
; powf(x, 4.0)
2323
define float @test_simplify_4f(float %x) {
2424
; CHECK-LABEL: @test_simplify_4f(
25-
; CHECK-NEXT: [[TMP1:%.*]] = fmul fast float [[X:%.*]], [[X]]
26-
; CHECK-NEXT: [[TMP2:%.*]] = fmul fast float [[TMP1]], [[TMP1]]
27-
; CHECK-NEXT: ret float [[TMP2]]
25+
; CHECK-NEXT: [[SQUARE:%.*]] = fmul fast float [[X:%.*]], [[X]]
26+
; CHECK-NEXT: [[TMP1:%.*]] = fmul fast float [[SQUARE]], [[SQUARE]]
27+
; CHECK-NEXT: ret float [[TMP1]]
2828
;
2929
%1 = call fast float @llvm.pow.f32(float %x, float 4.000000e+00)
3030
ret float %1
@@ -33,9 +33,9 @@ define float @test_simplify_4f(float %x) {
3333
; pow(x, 4.0)
3434
define double @test_simplify_4(double %x) {
3535
; CHECK-LABEL: @test_simplify_4(
36-
; CHECK-NEXT: [[TMP1:%.*]] = fmul fast double [[X:%.*]], [[X]]
37-
; CHECK-NEXT: [[TMP2:%.*]] = fmul fast double [[TMP1]], [[TMP1]]
38-
; CHECK-NEXT: ret double [[TMP2]]
36+
; CHECK-NEXT: [[SQUARE:%.*]] = fmul fast double [[X:%.*]], [[X]]
37+
; CHECK-NEXT: [[TMP1:%.*]] = fmul fast double [[SQUARE]], [[SQUARE]]
38+
; CHECK-NEXT: ret double [[TMP1]]
3939
;
4040
%1 = call fast double @llvm.pow.f64(double %x, double 4.000000e+00)
4141
ret double %1
@@ -44,12 +44,12 @@ define double @test_simplify_4(double %x) {
4444
; powf(x, <15.0, 15.0>)
4545
define <2 x float> @test_simplify_15(<2 x float> %x) {
4646
; CHECK-LABEL: @test_simplify_15(
47-
; CHECK-NEXT: [[TMP1:%.*]] = fmul fast <2 x float> [[X:%.*]], [[X]]
48-
; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <2 x float> [[TMP1]], [[X]]
47+
; CHECK-NEXT: [[SQUARE:%.*]] = fmul fast <2 x float> [[X:%.*]], [[X]]
48+
; CHECK-NEXT: [[TMP1:%.*]] = fmul fast <2 x float> [[SQUARE]], [[X]]
49+
; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <2 x float> [[TMP1]], [[TMP1]]
4950
; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <2 x float> [[TMP2]], [[TMP2]]
50-
; CHECK-NEXT: [[TMP4:%.*]] = fmul fast <2 x float> [[TMP3]], [[TMP3]]
51-
; CHECK-NEXT: [[TMP5:%.*]] = fmul fast <2 x float> [[TMP2]], [[TMP4]]
52-
; CHECK-NEXT: ret <2 x float> [[TMP5]]
51+
; CHECK-NEXT: [[TMP4:%.*]] = fmul fast <2 x float> [[TMP1]], [[TMP3]]
52+
; CHECK-NEXT: ret <2 x float> [[TMP4]]
5353
;
5454
%1 = call fast <2 x float> @llvm.pow.v2f32(<2 x float> %x, <2 x float> <float 1.500000e+01, float 1.500000e+01>)
5555
ret <2 x float> %1
@@ -58,12 +58,12 @@ define <2 x float> @test_simplify_15(<2 x float> %x) {
5858
; pow(x, -7.0)
5959
define <2 x double> @test_simplify_neg_7(<2 x double> %x) {
6060
; CHECK-LABEL: @test_simplify_neg_7(
61-
; CHECK-NEXT: [[TMP1:%.*]] = fmul fast <2 x double> [[X:%.*]], [[X]]
62-
; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <2 x double> [[TMP1]], [[TMP1]]
63-
; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <2 x double> [[TMP2]], [[X]]
64-
; CHECK-NEXT: [[TMP4:%.*]] = fmul fast <2 x double> [[TMP1]], [[TMP3]]
65-
; CHECK-NEXT: [[TMP5:%.*]] = fdiv fast <2 x double> <double 1.000000e+00, double 1.000000e+00>, [[TMP4]]
66-
; CHECK-NEXT: ret <2 x double> [[TMP5]]
61+
; CHECK-NEXT: [[SQUARE:%.*]] = fmul fast <2 x double> [[X:%.*]], [[X]]
62+
; CHECK-NEXT: [[TMP1:%.*]] = fmul fast <2 x double> [[SQUARE]], [[SQUARE]]
63+
; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <2 x double> [[TMP1]], [[X]]
64+
; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <2 x double> [[SQUARE]], [[TMP2]]
65+
; CHECK-NEXT: [[RECIPROCAL:%.*]] = fdiv fast <2 x double> <double 1.000000e+00, double 1.000000e+00>, [[TMP3]]
66+
; CHECK-NEXT: ret <2 x double> [[RECIPROCAL]]
6767
;
6868
%1 = call fast <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> <double -7.000000e+00, double -7.000000e+00>)
6969
ret <2 x double> %1
@@ -72,14 +72,14 @@ define <2 x double> @test_simplify_neg_7(<2 x double> %x) {
7272
; powf(x, -19.0)
7373
define float @test_simplify_neg_19(float %x) {
7474
; CHECK-LABEL: @test_simplify_neg_19(
75-
; CHECK-NEXT: [[TMP1:%.*]] = fmul fast float [[X:%.*]], [[X]]
75+
; CHECK-NEXT: [[SQUARE:%.*]] = fmul fast float [[X:%.*]], [[X]]
76+
; CHECK-NEXT: [[TMP1:%.*]] = fmul fast float [[SQUARE]], [[SQUARE]]
7677
; CHECK-NEXT: [[TMP2:%.*]] = fmul fast float [[TMP1]], [[TMP1]]
7778
; CHECK-NEXT: [[TMP3:%.*]] = fmul fast float [[TMP2]], [[TMP2]]
78-
; CHECK-NEXT: [[TMP4:%.*]] = fmul fast float [[TMP3]], [[TMP3]]
79-
; CHECK-NEXT: [[TMP5:%.*]] = fmul fast float [[TMP1]], [[TMP4]]
80-
; CHECK-NEXT: [[TMP6:%.*]] = fmul fast float [[TMP5]], [[X]]
81-
; CHECK-NEXT: [[TMP7:%.*]] = fdiv fast float 1.000000e+00, [[TMP6]]
82-
; CHECK-NEXT: ret float [[TMP7]]
79+
; CHECK-NEXT: [[TMP4:%.*]] = fmul fast float [[SQUARE]], [[TMP3]]
80+
; CHECK-NEXT: [[TMP5:%.*]] = fmul fast float [[TMP4]], [[X]]
81+
; CHECK-NEXT: [[RECIPROCAL:%.*]] = fdiv fast float 1.000000e+00, [[TMP5]]
82+
; CHECK-NEXT: ret float [[RECIPROCAL]]
8383
;
8484
%1 = call fast float @llvm.pow.f32(float %x, float -1.900000e+01)
8585
ret float %1
@@ -98,12 +98,12 @@ define double @test_simplify_11_23(double %x) {
9898
; powf(x, 32.0)
9999
define float @test_simplify_32(float %x) {
100100
; CHECK-LABEL: @test_simplify_32(
101-
; CHECK-NEXT: [[TMP1:%.*]] = fmul fast float [[X:%.*]], [[X]]
101+
; CHECK-NEXT: [[SQUARE:%.*]] = fmul fast float [[X:%.*]], [[X]]
102+
; CHECK-NEXT: [[TMP1:%.*]] = fmul fast float [[SQUARE]], [[SQUARE]]
102103
; CHECK-NEXT: [[TMP2:%.*]] = fmul fast float [[TMP1]], [[TMP1]]
103104
; CHECK-NEXT: [[TMP3:%.*]] = fmul fast float [[TMP2]], [[TMP2]]
104105
; CHECK-NEXT: [[TMP4:%.*]] = fmul fast float [[TMP3]], [[TMP3]]
105-
; CHECK-NEXT: [[TMP5:%.*]] = fmul fast float [[TMP4]], [[TMP4]]
106-
; CHECK-NEXT: ret float [[TMP5]]
106+
; CHECK-NEXT: ret float [[TMP4]]
107107
;
108108
%1 = call fast float @llvm.pow.f32(float %x, float 3.200000e+01)
109109
ret float %1
@@ -112,7 +112,7 @@ define float @test_simplify_32(float %x) {
112112
; pow(x, 33.0)
113113
define double @test_simplify_33(double %x) {
114114
; CHECK-LABEL: @test_simplify_33(
115-
; CHECK-NEXT: [[TMP1:%.*]] = call fast double @llvm.pow.f64(double [[X:%.*]], double 3.300000e+01)
115+
; CHECK-NEXT: [[TMP1:%.*]] = call fast double @llvm.powi.f64(double [[X:%.*]], i32 33)
116116
; CHECK-NEXT: ret double [[TMP1]]
117117
;
118118
%1 = call fast double @llvm.pow.f64(double %x, double 3.300000e+01)
@@ -122,8 +122,8 @@ define double @test_simplify_33(double %x) {
122122
; pow(x, 16.5) with double
123123
define double @test_simplify_16_5(double %x) {
124124
; CHECK-LABEL: @test_simplify_16_5(
125-
; CHECK-NEXT: [[SQRT:%.*]] = call fast double @llvm.sqrt.f64(double [[X]])
126-
; CHECK-NEXT: [[SQUARE:%.*]] = fmul fast double [[X:%.*]], [[X]]
125+
; CHECK-NEXT: [[SQRT:%.*]] = call fast double @llvm.sqrt.f64(double [[X:%.*]])
126+
; CHECK-NEXT: [[SQUARE:%.*]] = fmul fast double [[X]], [[X]]
127127
; CHECK-NEXT: [[TMP1:%.*]] = fmul fast double [[SQUARE]], [[SQUARE]]
128128
; CHECK-NEXT: [[TMP2:%.*]] = fmul fast double [[TMP1]], [[TMP1]]
129129
; CHECK-NEXT: [[TMP3:%.*]] = fmul fast double [[TMP2]], [[TMP2]]
@@ -137,8 +137,8 @@ define double @test_simplify_16_5(double %x) {
137137
; pow(x, -16.5) with double
138138
define double @test_simplify_neg_16_5(double %x) {
139139
; CHECK-LABEL: @test_simplify_neg_16_5(
140-
; CHECK-NEXT: [[SQRT:%.*]] = call fast double @llvm.sqrt.f64(double [[X]])
141-
; CHECK-NEXT: [[SQUARE:%.*]] = fmul fast double [[X:%.*]], [[X]]
140+
; CHECK-NEXT: [[SQRT:%.*]] = call fast double @llvm.sqrt.f64(double [[X:%.*]])
141+
; CHECK-NEXT: [[SQUARE:%.*]] = fmul fast double [[X]], [[X]]
142142
; CHECK-NEXT: [[TMP1:%.*]] = fmul fast double [[SQUARE]], [[SQUARE]]
143143
; CHECK-NEXT: [[TMP2:%.*]] = fmul fast double [[TMP1]], [[TMP1]]
144144
; CHECK-NEXT: [[TMP3:%.*]] = fmul fast double [[TMP2]], [[TMP2]]
@@ -214,10 +214,10 @@ define <2 x double> @test_simplify_7_5(<2 x double> %x) {
214214
define <4 x float> @test_simplify_3_5(<4 x float> %x) {
215215
; CHECK-LABEL: @test_simplify_3_5(
216216
; CHECK-NEXT: [[SQRT:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[X:%.*]])
217-
; CHECK-NEXT: [[TMP1:%.*]] = fmul fast <4 x float> [[X]], [[X]]
218-
; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <4 x float> [[TMP1]], [[X]]
219-
; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[SQRT]]
220-
; CHECK-NEXT: ret <4 x float> [[TMP3]]
217+
; CHECK-NEXT: [[SQUARE:%.*]] = fmul fast <4 x float> [[X]], [[X]]
218+
; CHECK-NEXT: [[TMP1:%.*]] = fmul fast <4 x float> [[SQUARE]], [[X]]
219+
; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <4 x float> [[TMP1]], [[SQRT]]
220+
; CHECK-NEXT: ret <4 x float> [[TMP2]]
221221
;
222222
%1 = call fast <4 x float> @llvm.pow.v4f32(<4 x float> %x, <4 x float> <float 3.500000e+00, float 3.500000e+00, float 3.500000e+00, float 3.500000e+00>)
223223
ret <4 x float> %1

0 commit comments

Comments
 (0)