Skip to content

Commit e3dd684

Browse files
committed
[ARMv8] Add CodeGen for VMAXNM/VMINNM.
llvm-svn: 189103
1 parent f438cb7 commit e3dd684

File tree

5 files changed

+77
-6
lines changed

5 files changed

+77
-6
lines changed

llvm/lib/Target/ARM/ARMISelLowering.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1069,6 +1069,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
10691069
case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR";
10701070
case ARMISD::FMAX: return "ARMISD::FMAX";
10711071
case ARMISD::FMIN: return "ARMISD::FMIN";
1072+
case ARMISD::VMAXNM: return "ARMISD::VMAX";
1073+
case ARMISD::VMINNM: return "ARMISD::VMIN";
10721074
case ARMISD::BFI: return "ARMISD::BFI";
10731075
case ARMISD::VORRIMM: return "ARMISD::VORRIMM";
10741076
case ARMISD::VBICIMM: return "ARMISD::VBICIMM";
@@ -3276,6 +3278,20 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
32763278
// Try to generate VSEL on ARMv8.
32773279
if (getSubtarget()->hasV8FP() && (TrueVal.getValueType() == MVT::f32 ||
32783280
TrueVal.getValueType() == MVT::f64)) {
3281+
// We can select VMAXNM/VMINNM from a compare followed by a select with the
3282+
// same operands, as follows:
3283+
// c = fcmp [ogt, olt, ugt, ult] a, b
3284+
// select c, a, b
3285+
// We only do this in unsafe-fp-math, because signed zeros and NaNs are
3286+
// handled differently than the original code sequence.
3287+
if (getTargetMachine().Options.UnsafeFPMath && LHS == TrueVal &&
3288+
RHS == FalseVal) {
3289+
if (CC == ISD::SETOGT || CC == ISD::SETUGT)
3290+
return DAG.getNode(ARMISD::VMAXNM, dl, VT, TrueVal, FalseVal);
3291+
if (CC == ISD::SETOLT || CC == ISD::SETULT)
3292+
return DAG.getNode(ARMISD::VMINNM, dl, VT, TrueVal, FalseVal);
3293+
}
3294+
32793295
bool swpCmpOps = false;
32803296
bool swpVselOps = false;
32813297
checkVSELConstraints(CC, CondCode, swpCmpOps, swpVselOps);

llvm/lib/Target/ARM/ARMISelLowering.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,8 @@ namespace llvm {
186186
// Floating-point max and min:
187187
FMAX,
188188
FMIN,
189+
VMAXNM,
190+
VMINNM,
189191

190192
// Bit-field insert
191193
BFI,

llvm/lib/Target/ARM/ARMInstrInfo.td

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,9 @@ def SDT_ARMTCRET : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
7171
def SDT_ARMBFI : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
7272
SDTCisVT<2, i32>, SDTCisVT<3, i32>]>;
7373

74+
def SDT_ARMVMAXNM : SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisFP<1>, SDTCisFP<2>]>;
75+
def SDT_ARMVMINNM : SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisFP<1>, SDTCisFP<2>]>;
76+
7477
def SDTBinaryArithWithFlags : SDTypeProfile<2, 2,
7578
[SDTCisSameAs<0, 2>,
7679
SDTCisSameAs<0, 3>,
@@ -174,9 +177,11 @@ def ARMrbit : SDNode<"ARMISD::RBIT", SDTIntUnaryOp>;
174177
def ARMtcret : SDNode<"ARMISD::TC_RETURN", SDT_ARMTCRET,
175178
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
176179

177-
178180
def ARMbfi : SDNode<"ARMISD::BFI", SDT_ARMBFI>;
179181

182+
def ARMvmaxnm : SDNode<"ARMISD::VMAXNM", SDT_ARMVMAXNM, []>;
183+
def ARMvminnm : SDNode<"ARMISD::VMINNM", SDT_ARMVMINNM, []>;
184+
180185
//===----------------------------------------------------------------------===//
181186
// ARM Instruction Predicate Definitions.
182187
//

llvm/lib/Target/ARM/ARMInstrVFP.td

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -356,22 +356,24 @@ defm VSELGE : vsel_inst<"ge", 0b10, 10>;
356356
defm VSELEQ : vsel_inst<"eq", 0b00, 0>;
357357
defm VSELVS : vsel_inst<"vs", 0b01, 6>;
358358

359-
multiclass vmaxmin_inst<string op, bit opc> {
359+
multiclass vmaxmin_inst<string op, bit opc, SDNode SD> {
360360
let DecoderNamespace = "VFPV8", PostEncoderMethod = "" in {
361361
def S : ASbInp<0b11101, 0b00, opc,
362362
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
363363
NoItinerary, !strconcat(op, ".f32\t$Sd, $Sn, $Sm"),
364-
[]>, Requires<[HasV8FP]>;
364+
[(set SPR:$Sd, (SD SPR:$Sn, SPR:$Sm))]>,
365+
Requires<[HasV8FP]>;
365366

366367
def D : ADbInp<0b11101, 0b00, opc,
367368
(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
368369
NoItinerary, !strconcat(op, ".f64\t$Dd, $Dn, $Dm"),
369-
[]>, Requires<[HasV8FP]>;
370+
[(set DPR:$Dd, (f64 (SD (f64 DPR:$Dn), (f64 DPR:$Dm))))]>,
371+
Requires<[HasV8FP]>;
370372
}
371373
}
372374

373-
defm VMAXNM : vmaxmin_inst<"vmaxnm", 0>;
374-
defm VMINNM : vmaxmin_inst<"vminnm", 1>;
375+
defm VMAXNM : vmaxmin_inst<"vmaxnm", 0, ARMvmaxnm>;
376+
defm VMINNM : vmaxmin_inst<"vminnm", 1, ARMvminnm>;
375377

376378
// Match reassociated forms only if not sign dependent rounding.
377379
def : Pat<(fmul (fneg DPR:$a), (f64 DPR:$b)),

llvm/test/CodeGen/ARM/vminmaxnm.ll

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
; RUN: llc < %s -mtriple armv8 -mattr=+neon | FileCheck %s
2+
; RUN: llc < %s -mtriple armv8 -mattr=+neon,+v8fp -enable-unsafe-fp-math | FileCheck %s --check-prefix=CHECK-FAST
23

34
define <4 x float> @vmaxnmq(<4 x float>* %A, <4 x float>* %B) nounwind {
45
; CHECK: vmaxnmq
@@ -36,6 +37,51 @@ define <2 x float> @vminnmd(<2 x float>* %A, <2 x float>* %B) nounwind {
3637
ret <2 x float> %tmp3
3738
}
3839

40+
define float @v8fp_vminnm_o(float %a, float %b) {
41+
; CHECK-FAST: v8fp_vminnm_o
42+
; CHECK-FAST-NOT: vcmp
43+
; CHECK-FAST: vminnm.f32
44+
; CHECK: v8fp_vminnm_o
45+
; CHECK-NOT: vminnm.f32
46+
%cmp = fcmp olt float %a, %b
47+
%cond = select i1 %cmp, float %a, float %b
48+
ret float %cond
49+
}
50+
51+
define float @v8fp_vminnm_u(float %a, float %b) {
52+
; CHECK-FAST: v8fp_vminnm_u
53+
; CHECK-FAST-NOT: vcmp
54+
; CHECK-FAST: vminnm.f32
55+
; CHECK: v8fp_vminnm_u
56+
; CHECK-NOT: vminnm.f32
57+
%cmp = fcmp ult float %a, %b
58+
%cond = select i1 %cmp, float %a, float %b
59+
ret float %cond
60+
}
61+
62+
define float @v8fp_vmaxnm_o(float %a, float %b) {
63+
; CHECK-FAST: v8fp_vmaxnm_o
64+
; CHECK-FAST-NOT: vcmp
65+
; CHECK-FAST: vmaxnm.f32
66+
; CHECK: v8fp_vmaxnm_o
67+
; CHECK-NOT: vmaxnm.f32
68+
%cmp = fcmp ogt float %a, %b
69+
%cond = select i1 %cmp, float %a, float %b
70+
ret float %cond
71+
}
72+
73+
define float @v8fp_vmaxnm_u(float %a, float %b) {
74+
; CHECK-FAST: v8fp_vmaxnm_u
75+
; CHECK-FAST-NOT: vcmp
76+
; CHECK-FAST: vmaxnm.f32
77+
; CHECK: v8fp_vmaxnm_u
78+
; CHECK-NOT: vmaxnm.f32
79+
%cmp = fcmp ugt float %a, %b
80+
%cond = select i1 %cmp, float %a, float %b
81+
ret float %cond
82+
}
83+
84+
3985
declare <4 x float> @llvm.arm.neon.vminnm.v4f32(<4 x float>, <4 x float>) nounwind readnone
4086
declare <2 x float> @llvm.arm.neon.vminnm.v2f32(<2 x float>, <2 x float>) nounwind readnone
4187
declare <4 x float> @llvm.arm.neon.vmaxnm.v4f32(<4 x float>, <4 x float>) nounwind readnone

0 commit comments

Comments
 (0)