Skip to content

Commit cac1151

Browse files
committed
[AArch64][GlobalISel] Overhaul legalization & isel or shifts to select immediate forms.
There are two main issues preventing us from generating immediate form shifts: 1) We have partial SelectionDAG imported support for G_ASHR and G_LSHR shift immediate forms, but they currently don't work because the amount type is expected to be an s64 constant, but we only legalize them to have homogenous types. To deal with this, first we introduce a custom legalizer to *only* custom legalize s32 shifts which have a constant operand into a s64. There is also an additional artifact combiner to fold zexts(g_constant) to a larger G_CONSTANT if it's legal, a counterpart to the anyext version committed in an earlier patch. 2) For G_SHL the importer can't cope with the pattern. For this I introduced an early selection phase in the arm64 selector to select these forms manually before the tablegen selector pessimizes it to a register-register variant. Differential Revision: https://reviews.llvm.org/D63910 llvm-svn: 364994
1 parent dfdccbb commit cac1151

17 files changed

+678
-84
lines changed

llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -69,12 +69,9 @@ class LegalizationArtifactCombiner {
6969
if (SrcMI->getOpcode() == TargetOpcode::G_CONSTANT) {
7070
const LLT &DstTy = MRI.getType(DstReg);
7171
if (isInstLegal({TargetOpcode::G_CONSTANT, {DstTy}})) {
72-
auto CstVal = SrcMI->getOperand(1);
73-
APInt Val = CstVal.isImm()
74-
? APInt(DstTy.getSizeInBits(), CstVal.getImm())
75-
: CstVal.getCImm()->getValue();
76-
Val = Val.sext(DstTy.getSizeInBits());
77-
Builder.buildConstant(DstReg, Val);
72+
auto &CstVal = SrcMI->getOperand(1);
73+
Builder.buildConstant(
74+
DstReg, CstVal.getCImm()->getValue().sext(DstTy.getSizeInBits()));
7875
markInstAndDefDead(MI, *SrcMI, DeadInsts);
7976
return true;
8077
}
@@ -108,6 +105,20 @@ class LegalizationArtifactCombiner {
108105
markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts);
109106
return true;
110107
}
108+
109+
// Try to fold zext(g_constant) when the larger constant type is legal.
110+
// Can't use MIPattern because we don't have a specific constant in mind.
111+
auto *SrcMI = MRI.getVRegDef(SrcReg);
112+
if (SrcMI->getOpcode() == TargetOpcode::G_CONSTANT) {
113+
const LLT &DstTy = MRI.getType(DstReg);
114+
if (isInstLegal({TargetOpcode::G_CONSTANT, {DstTy}})) {
115+
auto &CstVal = SrcMI->getOperand(1);
116+
Builder.buildConstant(
117+
DstReg, CstVal.getCImm()->getValue().zext(DstTy.getSizeInBits()));
118+
markInstAndDefDead(MI, *SrcMI, DeadInsts);
119+
return true;
120+
}
121+
}
111122
return tryFoldImplicitDef(MI, DeadInsts);
112123
}
113124

llvm/lib/Target/AArch64/AArch64GenRegisterBankInfo.def

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,10 @@ RegisterBankInfo::ValueMapping AArch64GenRegisterBankInfo::ValMappings[]{
110110
// 47: FPExt vector: 64 to 128. <-- This must match FPExt64To128Idx.
111111
{&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR128 - PMI_Min], 1},
112112
{&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1},
113+
// 49: Shift scalar with 64 bit shift imm
114+
{&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1},
115+
{&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1},
116+
{&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR64 - PMI_Min], 1},
113117
};
114118

115119
bool AArch64GenRegisterBankInfo::checkPartialMap(unsigned Idx,

llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp

Lines changed: 179 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,15 @@ class AArch64InstructionSelector : public InstructionSelector {
5959
/// the patterns that don't require complex C++.
6060
bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
6161

62+
// A lowering phase that runs before any selection attempts.
63+
64+
void preISelLower(MachineInstr &I) const;
65+
66+
// An early selection function that runs before the selectImpl() call.
67+
bool earlySelect(MachineInstr &I) const;
68+
69+
bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
70+
6271
bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
6372
MachineRegisterInfo &MRI) const;
6473
bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
@@ -136,6 +145,14 @@ class AArch64InstructionSelector : public InstructionSelector {
136145
MachineInstr *emitCSetForICMP(Register DefReg, unsigned Pred,
137146
MachineIRBuilder &MIRBuilder) const;
138147

148+
// Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
149+
// We use these manually instead of using the importer since it doesn't
150+
// support SDNodeXForm.
151+
ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
152+
ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
153+
ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
154+
ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
155+
139156
ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
140157

141158
ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
@@ -1050,6 +1067,98 @@ void AArch64InstructionSelector::materializeLargeCMVal(
10501067
return;
10511068
}
10521069

1070+
void AArch64InstructionSelector::preISelLower(MachineInstr &I) const {
1071+
MachineBasicBlock &MBB = *I.getParent();
1072+
MachineFunction &MF = *MBB.getParent();
1073+
MachineRegisterInfo &MRI = MF.getRegInfo();
1074+
1075+
switch (I.getOpcode()) {
1076+
case TargetOpcode::G_SHL:
1077+
case TargetOpcode::G_ASHR:
1078+
case TargetOpcode::G_LSHR: {
1079+
// These shifts are legalized to have 64 bit shift amounts because we want
1080+
// to take advantage of the existing imported selection patterns that assume
1081+
// the immediates are s64s. However, if the shifted type is 32 bits and for
1082+
// some reason we receive input GMIR that has an s64 shift amount that's not
1083+
// a G_CONSTANT, insert a truncate so that we can still select the s32
1084+
// register-register variant.
1085+
unsigned SrcReg = I.getOperand(1).getReg();
1086+
unsigned ShiftReg = I.getOperand(2).getReg();
1087+
const LLT ShiftTy = MRI.getType(ShiftReg);
1088+
const LLT SrcTy = MRI.getType(SrcReg);
1089+
if (SrcTy.isVector())
1090+
return;
1091+
assert(!ShiftTy.isVector() && "unexpected vector shift ty");
1092+
if (SrcTy.getSizeInBits() != 32 || ShiftTy.getSizeInBits() != 64)
1093+
return;
1094+
auto *AmtMI = MRI.getVRegDef(ShiftReg);
1095+
assert(AmtMI && "could not find a vreg definition for shift amount");
1096+
if (AmtMI->getOpcode() != TargetOpcode::G_CONSTANT) {
1097+
// Insert a subregister copy to implement a 64->32 trunc
1098+
MachineIRBuilder MIB(I);
1099+
auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
1100+
.addReg(ShiftReg, 0, AArch64::sub_32);
1101+
MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
1102+
I.getOperand(2).setReg(Trunc.getReg(0));
1103+
}
1104+
return;
1105+
}
1106+
default:
1107+
return;
1108+
}
1109+
}
1110+
1111+
bool AArch64InstructionSelector::earlySelectSHL(
1112+
MachineInstr &I, MachineRegisterInfo &MRI) const {
1113+
// We try to match the immediate variant of LSL, which is actually an alias
1114+
// for a special case of UBFM. Otherwise, we fall back to the imported
1115+
// selector which will match the register variant.
1116+
assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op");
1117+
const auto &MO = I.getOperand(2);
1118+
auto VRegAndVal = getConstantVRegVal(MO.getReg(), MRI);
1119+
if (!VRegAndVal)
1120+
return false;
1121+
1122+
const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1123+
if (DstTy.isVector())
1124+
return false;
1125+
bool Is64Bit = DstTy.getSizeInBits() == 64;
1126+
auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
1127+
auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
1128+
MachineIRBuilder MIB(I);
1129+
1130+
if (!Imm1Fn || !Imm2Fn)
1131+
return false;
1132+
1133+
auto NewI =
1134+
MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
1135+
{I.getOperand(0).getReg()}, {I.getOperand(1).getReg()});
1136+
1137+
for (auto &RenderFn : *Imm1Fn)
1138+
RenderFn(NewI);
1139+
for (auto &RenderFn : *Imm2Fn)
1140+
RenderFn(NewI);
1141+
1142+
I.eraseFromParent();
1143+
return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
1144+
}
1145+
1146+
bool AArch64InstructionSelector::earlySelect(MachineInstr &I) const {
1147+
assert(I.getParent() && "Instruction should be in a basic block!");
1148+
assert(I.getParent()->getParent() && "Instruction should be in a function!");
1149+
1150+
MachineBasicBlock &MBB = *I.getParent();
1151+
MachineFunction &MF = *MBB.getParent();
1152+
MachineRegisterInfo &MRI = MF.getRegInfo();
1153+
1154+
switch (I.getOpcode()) {
1155+
case TargetOpcode::G_SHL:
1156+
return earlySelectSHL(I, MRI);
1157+
default:
1158+
return false;
1159+
}
1160+
}
1161+
10531162
bool AArch64InstructionSelector::select(MachineInstr &I,
10541163
CodeGenCoverage &CoverageInfo) const {
10551164
assert(I.getParent() && "Instruction should be in a basic block!");
@@ -1107,6 +1216,19 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
11071216
return false;
11081217
}
11091218

1219+
// Try to do some lowering before we start instruction selecting. These
1220+
// lowerings are purely transformations on the input G_MIR and so selection
1221+
// must continue after any modification of the instruction.
1222+
preISelLower(I);
1223+
1224+
// There may be patterns where the importer can't deal with them optimally,
1225+
// but does select it to a suboptimal sequence so our custom C++ selection
1226+
// code later never has a chance to work on it. Therefore, we have an early
1227+
// selection attempt here to give priority to certain selection routines
1228+
// over the imported ones.
1229+
if (earlySelect(I))
1230+
return true;
1231+
11101232
if (selectImpl(I, CoverageInfo))
11111233
return true;
11121234

@@ -3644,21 +3766,11 @@ bool AArch64InstructionSelector::selectIntrinsic(
36443766
return false;
36453767
}
36463768

3647-
/// SelectArithImmed - Select an immediate value that can be represented as
3648-
/// a 12-bit value shifted left by either 0 or 12. If so, return true with
3649-
/// Val set to the 12-bit value and Shift set to the shifter operand.
3650-
InstructionSelector::ComplexRendererFns
3651-
AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
3652-
MachineInstr &MI = *Root.getParent();
3653-
MachineBasicBlock &MBB = *MI.getParent();
3654-
MachineFunction &MF = *MBB.getParent();
3655-
MachineRegisterInfo &MRI = MF.getRegInfo();
3656-
3657-
// This function is called from the addsub_shifted_imm ComplexPattern,
3658-
// which lists [imm] as the list of opcode it's interested in, however
3659-
// we still need to check whether the operand is actually an immediate
3660-
// here because the ComplexPattern opcode list is only used in
3661-
// root-level opcode matching.
3769+
static Optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
3770+
auto &MI = *Root.getParent();
3771+
auto &MBB = *MI.getParent();
3772+
auto &MF = *MBB.getParent();
3773+
auto &MRI = MF.getRegInfo();
36623774
uint64_t Immed;
36633775
if (Root.isImm())
36643776
Immed = Root.getImm();
@@ -3674,7 +3786,59 @@ AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
36743786
Immed = Op1.getCImm()->getZExtValue();
36753787
} else
36763788
return None;
3789+
return Immed;
3790+
}
3791+
3792+
InstructionSelector::ComplexRendererFns
3793+
AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const {
3794+
auto MaybeImmed = getImmedFromMO(Root);
3795+
if (MaybeImmed == None || *MaybeImmed > 31)
3796+
return None;
3797+
uint64_t Enc = (32 - *MaybeImmed) & 0x1f;
3798+
return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
3799+
}
3800+
3801+
InstructionSelector::ComplexRendererFns
3802+
AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const {
3803+
auto MaybeImmed = getImmedFromMO(Root);
3804+
if (MaybeImmed == None || *MaybeImmed > 31)
3805+
return None;
3806+
uint64_t Enc = 31 - *MaybeImmed;
3807+
return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
3808+
}
3809+
3810+
InstructionSelector::ComplexRendererFns
3811+
AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const {
3812+
auto MaybeImmed = getImmedFromMO(Root);
3813+
if (MaybeImmed == None || *MaybeImmed > 63)
3814+
return None;
3815+
uint64_t Enc = (64 - *MaybeImmed) & 0x3f;
3816+
return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
3817+
}
3818+
3819+
InstructionSelector::ComplexRendererFns
3820+
AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const {
3821+
auto MaybeImmed = getImmedFromMO(Root);
3822+
if (MaybeImmed == None || *MaybeImmed > 63)
3823+
return None;
3824+
uint64_t Enc = 63 - *MaybeImmed;
3825+
return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
3826+
}
36773827

3828+
/// SelectArithImmed - Select an immediate value that can be represented as
3829+
/// a 12-bit value shifted left by either 0 or 12. If so, return true with
3830+
/// Val set to the 12-bit value and Shift set to the shifter operand.
3831+
InstructionSelector::ComplexRendererFns
3832+
AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
3833+
// This function is called from the addsub_shifted_imm ComplexPattern,
3834+
// which lists [imm] as the list of opcode it's interested in, however
3835+
// we still need to check whether the operand is actually an immediate
3836+
// here because the ComplexPattern opcode list is only used in
3837+
// root-level opcode matching.
3838+
auto MaybeImmed = getImmedFromMO(Root);
3839+
if (MaybeImmed == None)
3840+
return None;
3841+
uint64_t Immed = *MaybeImmed;
36783842
unsigned ShiftAmt;
36793843

36803844
if (Immed >> 12 == 0) {

llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,14 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
109109
.scalarize(0);
110110

111111
getActionDefinitionsBuilder({G_LSHR, G_ASHR})
112-
.legalFor({{s32, s32}, {s64, s64}, {v2s32, v2s32}, {v4s32, v4s32}})
112+
.customIf([=](const LegalityQuery &Query) {
113+
const auto &SrcTy = Query.Types[0];
114+
const auto &AmtTy = Query.Types[1];
115+
return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
116+
AmtTy.getSizeInBits() == 32;
117+
})
118+
.legalFor(
119+
{{s32, s32}, {s32, s64}, {s64, s64}, {v2s32, v2s32}, {v4s32, v4s32}})
113120
.clampScalar(1, s32, s64)
114121
.clampScalar(0, s32, s64)
115122
.minScalarSameAs(1, 0);
@@ -601,11 +608,39 @@ bool AArch64LegalizerInfo::legalizeCustom(MachineInstr &MI,
601608
case TargetOpcode::G_LOAD:
602609
case TargetOpcode::G_STORE:
603610
return legalizeLoadStore(MI, MRI, MIRBuilder, Observer);
611+
case TargetOpcode::G_SHL:
612+
case TargetOpcode::G_ASHR:
613+
case TargetOpcode::G_LSHR:
614+
return legalizeShlAshrLshr(MI, MRI, MIRBuilder, Observer);
604615
}
605616

606617
llvm_unreachable("expected switch to return");
607618
}
608619

620+
bool AArch64LegalizerInfo::legalizeShlAshrLshr(
621+
MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder,
622+
GISelChangeObserver &Observer) const {
623+
assert(MI.getOpcode() == TargetOpcode::G_ASHR ||
624+
MI.getOpcode() == TargetOpcode::G_LSHR ||
625+
MI.getOpcode() == TargetOpcode::G_SHL);
626+
// If the shift amount is a G_CONSTANT, promote it to a 64 bit type so the
627+
// imported patterns can select it later. Either way, it will be legal.
628+
Register AmtReg = MI.getOperand(2).getReg();
629+
auto *CstMI = MRI.getVRegDef(AmtReg);
630+
assert(CstMI && "expected to find a vreg def");
631+
if (CstMI->getOpcode() != TargetOpcode::G_CONSTANT)
632+
return true;
633+
// Check the shift amount is in range for an immediate form.
634+
unsigned Amount = CstMI->getOperand(1).getCImm()->getZExtValue();
635+
if (Amount > 31)
636+
return true; // This will have to remain a register variant.
637+
assert(MRI.getType(AmtReg).getSizeInBits() == 32);
638+
MIRBuilder.setInstr(MI);
639+
auto ExtCst = MIRBuilder.buildZExt(LLT::scalar(64), AmtReg);
640+
MI.getOperand(2).setReg(ExtCst.getReg(0));
641+
return true;
642+
}
643+
609644
bool AArch64LegalizerInfo::legalizeLoadStore(
610645
MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder,
611646
GISelChangeObserver &Observer) const {

llvm/lib/Target/AArch64/AArch64LegalizerInfo.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,9 @@ class AArch64LegalizerInfo : public LegalizerInfo {
3737
bool legalizeLoadStore(MachineInstr &MI, MachineRegisterInfo &MRI,
3838
MachineIRBuilder &MIRBuilder,
3939
GISelChangeObserver &Observer) const;
40+
bool legalizeShlAshrLshr(MachineInstr &MI, MachineRegisterInfo &MRI,
41+
MachineIRBuilder &MIRBuilder,
42+
GISelChangeObserver &Observer) const;
4043
};
4144
} // End llvm namespace.
4245
#endif

llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -537,10 +537,6 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
537537
case TargetOpcode::G_AND:
538538
case TargetOpcode::G_OR:
539539
case TargetOpcode::G_XOR:
540-
// Shifts.
541-
case TargetOpcode::G_SHL:
542-
case TargetOpcode::G_LSHR:
543-
case TargetOpcode::G_ASHR:
544540
// Floating point ops.
545541
case TargetOpcode::G_FADD:
546542
case TargetOpcode::G_FSUB:
@@ -554,6 +550,17 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
554550
DefaultMappingID, /*Cost*/ 1,
555551
getFPExtMapping(DstTy.getSizeInBits(), SrcTy.getSizeInBits()),
556552
/*NumOperands*/ 2);
553+
}
554+
// Shifts.
555+
case TargetOpcode::G_SHL:
556+
case TargetOpcode::G_LSHR:
557+
case TargetOpcode::G_ASHR: {
558+
LLT ShiftAmtTy = MRI.getType(MI.getOperand(2).getReg());
559+
LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
560+
if (ShiftAmtTy.getSizeInBits() == 64 && SrcTy.getSizeInBits() == 32)
561+
return getInstructionMapping(DefaultMappingID, 1,
562+
&ValMappings[Shift64Imm], 3);
563+
return getSameKindOfOperandsMapping(MI);
557564
}
558565
case TargetOpcode::COPY: {
559566
unsigned DstReg = MI.getOperand(0).getReg();

llvm/lib/Target/AArch64/AArch64RegisterBankInfo.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ class AArch64GenRegisterBankInfo : public RegisterBankInfo {
5757
FPExt16To64Idx = 43,
5858
FPExt32To64Idx = 45,
5959
FPExt64To128Idx = 47,
60+
Shift64Imm = 49
6061
};
6162

6263
static bool checkPartialMap(unsigned Idx, unsigned ValStartIdx,

0 commit comments

Comments
 (0)