Skip to content

Commit 2e2bbca

Browse files
authored
AMDGPU/GlobalISel: Start legalizing minimumnum and maximumnum (llvm#140900)
This is the bare minimum to get the intrinsic to compile for AMDGPU, and it's not optimal. We need to follow along closer with the existing G_FMINNUM/G_FMAXNUM with custom lowering to handle the IEEE=0 case better. Just re-use the existing lowering for the old semantics for G_FMINNUM/G_FMAXNUM. This does not change G_FMINNUM/G_FMAXNUM's treatment, nor try to handle the general expansion without an underlying min/max variant (or with G_FMINIMUM/G_FMAXIMUM).
1 parent b263c08 commit 2e2bbca

File tree

4 files changed

+15219
-7751
lines changed

4 files changed

+15219
-7751
lines changed

llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3221,6 +3221,8 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
32213221
case TargetOpcode::G_FMAXNUM_IEEE:
32223222
case TargetOpcode::G_FMINIMUM:
32233223
case TargetOpcode::G_FMAXIMUM:
3224+
case TargetOpcode::G_FMINIMUMNUM:
3225+
case TargetOpcode::G_FMAXIMUMNUM:
32243226
case TargetOpcode::G_FDIV:
32253227
case TargetOpcode::G_FREM:
32263228
case TargetOpcode::G_FCEIL:
@@ -4591,6 +4593,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
45914593
return lowerFCopySign(MI);
45924594
case G_FMINNUM:
45934595
case G_FMAXNUM:
4596+
case G_FMINIMUMNUM:
4597+
case G_FMAXIMUMNUM:
45944598
return lowerFMinNumMaxNum(MI);
45954599
case G_MERGE_VALUES:
45964600
return lowerMergeValues(MI);
@@ -5379,6 +5383,8 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
53795383
case G_FMAXNUM_IEEE:
53805384
case G_FMINIMUM:
53815385
case G_FMAXIMUM:
5386+
case G_FMINIMUMNUM:
5387+
case G_FMAXIMUMNUM:
53825388
case G_FSHL:
53835389
case G_FSHR:
53845390
case G_ROTL:
@@ -6090,6 +6096,8 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
60906096
case TargetOpcode::G_FMAXNUM_IEEE:
60916097
case TargetOpcode::G_FMINIMUM:
60926098
case TargetOpcode::G_FMAXIMUM:
6099+
case TargetOpcode::G_FMINIMUMNUM:
6100+
case TargetOpcode::G_FMAXIMUMNUM:
60936101
case TargetOpcode::G_STRICT_FADD:
60946102
case TargetOpcode::G_STRICT_FSUB:
60956103
case TargetOpcode::G_STRICT_FMUL:
@@ -8139,8 +8147,27 @@ LegalizerHelper::lowerFCopySign(MachineInstr &MI) {
81398147

81408148
LegalizerHelper::LegalizeResult
81418149
LegalizerHelper::lowerFMinNumMaxNum(MachineInstr &MI) {
8142-
unsigned NewOp = MI.getOpcode() == TargetOpcode::G_FMINNUM ?
8143-
TargetOpcode::G_FMINNUM_IEEE : TargetOpcode::G_FMAXNUM_IEEE;
8150+
// FIXME: fminnum/fmaxnum and fminimumnum/fmaximumnum should not have
8151+
// identical handling. fminimumnum/fmaximumnum also need a path that do not
8152+
// depend on fminnum/fmaxnum.
8153+
8154+
unsigned NewOp;
8155+
switch (MI.getOpcode()) {
8156+
case TargetOpcode::G_FMINNUM:
8157+
NewOp = TargetOpcode::G_FMINNUM_IEEE;
8158+
break;
8159+
case TargetOpcode::G_FMINIMUMNUM:
8160+
NewOp = TargetOpcode::G_FMINNUM;
8161+
break;
8162+
case TargetOpcode::G_FMAXNUM:
8163+
NewOp = TargetOpcode::G_FMAXNUM_IEEE;
8164+
break;
8165+
case TargetOpcode::G_FMAXIMUMNUM:
8166+
NewOp = TargetOpcode::G_FMAXNUM;
8167+
break;
8168+
default:
8169+
llvm_unreachable("unexpected min/max opcode");
8170+
}
81448171

81458172
auto [Dst, Src0, Src1] = MI.getFirst3Regs();
81468173
LLT Ty = MRI.getType(Dst);

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -960,6 +960,10 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
960960
auto &MinNumMaxNum = getActionDefinitionsBuilder({
961961
G_FMINNUM, G_FMAXNUM, G_FMINNUM_IEEE, G_FMAXNUM_IEEE});
962962

963+
// TODO: These should be custom lowered and are directly legal with IEEE=0
964+
auto &MinimumNumMaximumNum =
965+
getActionDefinitionsBuilder({G_FMINIMUMNUM, G_FMAXIMUMNUM});
966+
963967
if (ST.hasVOP3PInsts()) {
964968
MinNumMaxNum.customFor(FPTypesPK16)
965969
.moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
@@ -976,6 +980,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
976980
.scalarize(0);
977981
}
978982

983+
MinimumNumMaximumNum.lower();
984+
979985
if (ST.hasVOP3PInsts())
980986
FPOpActions.clampMaxNumElementsStrict(0, S16, 2);
981987

0 commit comments

Comments
 (0)