Skip to content

Commit 3906ae3

Browse files
author
QingShan Zhang
committed
[DAGCombine] Check the uses of negated floating constant and remove the hack
PowerPC hits an assertion due to somewhat the same reason as https://reviews.llvm.org/D70975. Though there are already some hack, it still failed with some case, when the operand 0 is NOT a const fp, it is another fma that with const fp. And that const fp is negated which result in multi-uses. A better fix is to check the uses of the negated const fp. If there are already use of its negated value, we will have benefit as no extra Node is added. Differential revision: https://reviews.llvm.org/D75501
1 parent ea6eb81 commit 3906ae3

File tree

2 files changed

+116
-11
lines changed

2 files changed

+116
-11
lines changed

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5552,9 +5552,20 @@ TargetLowering::getNegatibleCost(SDValue Op, SelectionDAG &DAG,
55525552
EVT VT = Op.getValueType();
55535553
const SDNodeFlags Flags = Op->getFlags();
55545554
const TargetOptions &Options = DAG.getTarget().Options;
5555-
if (!Op.hasOneUse() && !(Op.getOpcode() == ISD::FP_EXTEND &&
5556-
isFPExtFree(VT, Op.getOperand(0).getValueType())))
5557-
return NegatibleCost::Expensive;
5555+
if (!Op.hasOneUse()) {
5556+
bool IsFreeExtend = Op.getOpcode() == ISD::FP_EXTEND &&
5557+
isFPExtFree(VT, Op.getOperand(0).getValueType());
5558+
5559+
// If we already have the use of the negated floating constant, it is free
5560+
// to negate it even it has multiple uses.
5561+
bool IsFreeConstant =
5562+
Op.getOpcode() == ISD::ConstantFP &&
5563+
!getNegatedExpression(Op, DAG, LegalOperations, ForCodeSize)
5564+
.use_empty();
5565+
5566+
if (!IsFreeExtend && !IsFreeConstant)
5567+
return NegatibleCost::Expensive;
5568+
}
55585569

55595570
// Don't recurse exponentially.
55605571
if (Depth > SelectionDAG::MaxRecursionDepth)
@@ -5760,14 +5771,7 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
57605771
ForCodeSize, Depth + 1);
57615772
NegatibleCost V1 = getNegatibleCost(Op.getOperand(1), DAG, LegalOperations,
57625773
ForCodeSize, Depth + 1);
5763-
// TODO: This is a hack. It is possible that costs have changed between now
5764-
// and the initial calls to getNegatibleCost(). That is because we
5765-
// are rewriting the expression, and that may change the number of
5766-
// uses (and therefore the cost) of values. If the negation costs are
5767-
// equal, only negate this value if it is a constant. Otherwise, try
5768-
// operand 1. A better fix would eliminate uses as a cost factor or
5769-
// track the change in uses as we rewrite the expression.
5770-
if (V0 > V1 || (V0 == V1 && isa<ConstantFPSDNode>(Op.getOperand(0)))) {
5774+
if (V0 > V1) {
57715775
// fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
57725776
SDValue Neg0 = getNegatedExpression(
57735777
Op.getOperand(0), DAG, LegalOperations, ForCodeSize, Depth + 1);

llvm/test/CodeGen/PowerPC/fma-combine.ll

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,3 +137,104 @@ entry:
137137
%add = fsub double %mul, %a
138138
ret double %add
139139
}
140+
141+
define float @fma_combine_no_ice() {
142+
; CHECK-FAST-LABEL: fma_combine_no_ice:
143+
; CHECK-FAST: # %bb.0:
144+
; CHECK-FAST-NEXT: addis 3, 2, .LCPI4_0@toc@ha
145+
; CHECK-FAST-NEXT: addis 4, 2, .LCPI4_1@toc@ha
146+
; CHECK-FAST-NEXT: lfs 0, .LCPI4_0@toc@l(3)
147+
; CHECK-FAST-NEXT: lfsx 2, 0, 3
148+
; CHECK-FAST-NEXT: addis 3, 2, .LCPI4_2@toc@ha
149+
; CHECK-FAST-NEXT: lfs 3, .LCPI4_1@toc@l(4)
150+
; CHECK-FAST-NEXT: lfs 1, .LCPI4_2@toc@l(3)
151+
; CHECK-FAST-NEXT: xsmaddasp 3, 2, 0
152+
; CHECK-FAST-NEXT: xsmaddasp 1, 2, 3
153+
; CHECK-FAST-NEXT: xsnmsubasp 1, 3, 2
154+
; CHECK-FAST-NEXT: blr
155+
;
156+
; CHECK-FAST-NOVSX-LABEL: fma_combine_no_ice:
157+
; CHECK-FAST-NOVSX: # %bb.0:
158+
; CHECK-FAST-NOVSX-NEXT: addis 3, 2, .LCPI4_0@toc@ha
159+
; CHECK-FAST-NOVSX-NEXT: lfs 0, .LCPI4_0@toc@l(3)
160+
; CHECK-FAST-NOVSX-NEXT: addis 3, 2, .LCPI4_1@toc@ha
161+
; CHECK-FAST-NOVSX-NEXT: lfs 1, 0(3)
162+
; CHECK-FAST-NOVSX-NEXT: lfs 2, .LCPI4_1@toc@l(3)
163+
; CHECK-FAST-NOVSX-NEXT: addis 3, 2, .LCPI4_2@toc@ha
164+
; CHECK-FAST-NOVSX-NEXT: fmadds 0, 1, 2, 0
165+
; CHECK-FAST-NOVSX-NEXT: lfs 2, .LCPI4_2@toc@l(3)
166+
; CHECK-FAST-NOVSX-NEXT: fmadds 2, 1, 0, 2
167+
; CHECK-FAST-NOVSX-NEXT: fnmsubs 1, 0, 1, 2
168+
; CHECK-FAST-NOVSX-NEXT: blr
169+
;
170+
; CHECK-LABEL: fma_combine_no_ice:
171+
; CHECK: # %bb.0:
172+
; CHECK-NEXT: addis 3, 2, .LCPI4_0@toc@ha
173+
; CHECK-NEXT: addis 4, 2, .LCPI4_1@toc@ha
174+
; CHECK-NEXT: lfs 0, .LCPI4_0@toc@l(3)
175+
; CHECK-NEXT: lfsx 2, 0, 3
176+
; CHECK-NEXT: addis 3, 2, .LCPI4_2@toc@ha
177+
; CHECK-NEXT: lfs 3, .LCPI4_1@toc@l(4)
178+
; CHECK-NEXT: lfs 1, .LCPI4_2@toc@l(3)
179+
; CHECK-NEXT: xsmaddasp 3, 2, 0
180+
; CHECK-NEXT: xsmaddasp 1, 2, 3
181+
; CHECK-NEXT: xsnmsubasp 1, 3, 2
182+
; CHECK-NEXT: blr
183+
%tmp = load float, float* undef, align 4
184+
%tmp2 = load float, float* undef, align 4
185+
%tmp3 = fmul fast float %tmp, 0x3FE372D780000000
186+
%tmp4 = fadd fast float %tmp3, 1.000000e+00
187+
%tmp5 = fmul fast float %tmp2, %tmp4
188+
%tmp6 = load float, float* undef, align 4
189+
%tmp7 = load float, float* undef, align 4
190+
%tmp8 = fmul fast float %tmp7, 0x3FE372D780000000
191+
%tmp9 = fsub fast float -1.000000e+00, %tmp8
192+
%tmp10 = fmul fast float %tmp9, %tmp6
193+
%tmp11 = fadd fast float %tmp5, 5.000000e-01
194+
%tmp12 = fadd fast float %tmp11, %tmp10
195+
ret float %tmp12
196+
}
197+
198+
; This would crash while trying getNegatedExpression().
199+
define double @getNegatedExpression_crash(double %x, double %y) {
200+
; CHECK-FAST-LABEL: getNegatedExpression_crash:
201+
; CHECK-FAST: # %bb.0:
202+
; CHECK-FAST-NEXT: addis 3, 2, .LCPI5_1@toc@ha
203+
; CHECK-FAST-NEXT: addis 4, 2, .LCPI5_0@toc@ha
204+
; CHECK-FAST-NEXT: lfs 3, .LCPI5_1@toc@l(3)
205+
; CHECK-FAST-NEXT: lfs 4, .LCPI5_0@toc@l(4)
206+
; CHECK-FAST-NEXT: xssubdp 0, 1, 3
207+
; CHECK-FAST-NEXT: xsmaddadp 3, 1, 4
208+
; CHECK-FAST-NEXT: xsmaddadp 0, 3, 2
209+
; CHECK-FAST-NEXT: fmr 1, 0
210+
; CHECK-FAST-NEXT: blr
211+
;
212+
; CHECK-FAST-NOVSX-LABEL: getNegatedExpression_crash:
213+
; CHECK-FAST-NOVSX: # %bb.0:
214+
; CHECK-FAST-NOVSX-NEXT: addis 3, 2, .LCPI5_0@toc@ha
215+
; CHECK-FAST-NOVSX-NEXT: addis 4, 2, .LCPI5_1@toc@ha
216+
; CHECK-FAST-NOVSX-NEXT: lfs 0, .LCPI5_0@toc@l(3)
217+
; CHECK-FAST-NOVSX-NEXT: lfs 3, .LCPI5_1@toc@l(4)
218+
; CHECK-FAST-NOVSX-NEXT: fmadd 3, 1, 3, 0
219+
; CHECK-FAST-NOVSX-NEXT: fsub 0, 1, 0
220+
; CHECK-FAST-NOVSX-NEXT: fmadd 1, 3, 2, 0
221+
; CHECK-FAST-NOVSX-NEXT: blr
222+
;
223+
; CHECK-LABEL: getNegatedExpression_crash:
224+
; CHECK: # %bb.0:
225+
; CHECK-NEXT: addis 3, 2, .LCPI5_1@toc@ha
226+
; CHECK-NEXT: addis 4, 2, .LCPI5_0@toc@ha
227+
; CHECK-NEXT: lfs 3, .LCPI5_1@toc@l(3)
228+
; CHECK-NEXT: lfs 4, .LCPI5_0@toc@l(4)
229+
; CHECK-NEXT: xssubdp 0, 1, 3
230+
; CHECK-NEXT: xsmaddadp 3, 1, 4
231+
; CHECK-NEXT: xsmaddadp 0, 3, 2
232+
; CHECK-NEXT: fmr 1, 0
233+
; CHECK-NEXT: blr
234+
%neg = fneg fast double %x
235+
%fma = call fast double @llvm.fma.f64(double %neg, double 42.0, double -1.0)
236+
%add = fadd fast double %x, 1.0
237+
%fma1 = call fast double @llvm.fma.f64(double %fma, double %y, double %add)
238+
ret double %fma1
239+
}
240+
declare double @llvm.fma.f64(double, double, double) nounwind readnone

0 commit comments

Comments
 (0)