@@ -59,6 +59,15 @@ class AArch64InstructionSelector : public InstructionSelector {
59
59
// / the patterns that don't require complex C++.
60
60
bool selectImpl (MachineInstr &I, CodeGenCoverage &CoverageInfo) const ;
61
61
62
+ // A lowering phase that runs before any selection attempts.
63
+
64
+ void preISelLower (MachineInstr &I) const ;
65
+
66
+ // An early selection function that runs before the selectImpl() call.
67
+ bool earlySelect (MachineInstr &I) const ;
68
+
69
+ bool earlySelectSHL (MachineInstr &I, MachineRegisterInfo &MRI) const ;
70
+
62
71
bool selectVaStartAAPCS (MachineInstr &I, MachineFunction &MF,
63
72
MachineRegisterInfo &MRI) const ;
64
73
bool selectVaStartDarwin (MachineInstr &I, MachineFunction &MF,
@@ -136,6 +145,14 @@ class AArch64InstructionSelector : public InstructionSelector {
136
145
MachineInstr *emitCSetForICMP (Register DefReg, unsigned Pred,
137
146
MachineIRBuilder &MIRBuilder) const ;
138
147
148
+ // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
149
+ // We use these manually instead of using the importer since it doesn't
150
+ // support SDNodeXForm.
151
+ ComplexRendererFns selectShiftA_32 (const MachineOperand &Root) const ;
152
+ ComplexRendererFns selectShiftB_32 (const MachineOperand &Root) const ;
153
+ ComplexRendererFns selectShiftA_64 (const MachineOperand &Root) const ;
154
+ ComplexRendererFns selectShiftB_64 (const MachineOperand &Root) const ;
155
+
139
156
ComplexRendererFns selectArithImmed (MachineOperand &Root) const ;
140
157
141
158
ComplexRendererFns selectAddrModeUnscaled (MachineOperand &Root,
@@ -1050,6 +1067,98 @@ void AArch64InstructionSelector::materializeLargeCMVal(
1050
1067
return ;
1051
1068
}
1052
1069
1070
+ void AArch64InstructionSelector::preISelLower (MachineInstr &I) const {
1071
+ MachineBasicBlock &MBB = *I.getParent ();
1072
+ MachineFunction &MF = *MBB.getParent ();
1073
+ MachineRegisterInfo &MRI = MF.getRegInfo ();
1074
+
1075
+ switch (I.getOpcode ()) {
1076
+ case TargetOpcode::G_SHL:
1077
+ case TargetOpcode::G_ASHR:
1078
+ case TargetOpcode::G_LSHR: {
1079
+ // These shifts are legalized to have 64 bit shift amounts because we want
1080
+ // to take advantage of the existing imported selection patterns that assume
1081
+ // the immediates are s64s. However, if the shifted type is 32 bits and for
1082
+ // some reason we receive input GMIR that has an s64 shift amount that's not
1083
+ // a G_CONSTANT, insert a truncate so that we can still select the s32
1084
+ // register-register variant.
1085
+ unsigned SrcReg = I.getOperand (1 ).getReg ();
1086
+ unsigned ShiftReg = I.getOperand (2 ).getReg ();
1087
+ const LLT ShiftTy = MRI.getType (ShiftReg);
1088
+ const LLT SrcTy = MRI.getType (SrcReg);
1089
+ if (SrcTy.isVector ())
1090
+ return ;
1091
+ assert (!ShiftTy.isVector () && " unexpected vector shift ty" );
1092
+ if (SrcTy.getSizeInBits () != 32 || ShiftTy.getSizeInBits () != 64 )
1093
+ return ;
1094
+ auto *AmtMI = MRI.getVRegDef (ShiftReg);
1095
+ assert (AmtMI && " could not find a vreg definition for shift amount" );
1096
+ if (AmtMI->getOpcode () != TargetOpcode::G_CONSTANT) {
1097
+ // Insert a subregister copy to implement a 64->32 trunc
1098
+ MachineIRBuilder MIB (I);
1099
+ auto Trunc = MIB.buildInstr (TargetOpcode::COPY, {SrcTy}, {})
1100
+ .addReg (ShiftReg, 0 , AArch64::sub_32);
1101
+ MRI.setRegBank (Trunc.getReg (0 ), RBI.getRegBank (AArch64::GPRRegBankID));
1102
+ I.getOperand (2 ).setReg (Trunc.getReg (0 ));
1103
+ }
1104
+ return ;
1105
+ }
1106
+ default :
1107
+ return ;
1108
+ }
1109
+ }
1110
+
1111
+ bool AArch64InstructionSelector::earlySelectSHL (
1112
+ MachineInstr &I, MachineRegisterInfo &MRI) const {
1113
+ // We try to match the immediate variant of LSL, which is actually an alias
1114
+ // for a special case of UBFM. Otherwise, we fall back to the imported
1115
+ // selector which will match the register variant.
1116
+ assert (I.getOpcode () == TargetOpcode::G_SHL && " unexpected op" );
1117
+ const auto &MO = I.getOperand (2 );
1118
+ auto VRegAndVal = getConstantVRegVal (MO.getReg (), MRI);
1119
+ if (!VRegAndVal)
1120
+ return false ;
1121
+
1122
+ const LLT DstTy = MRI.getType (I.getOperand (0 ).getReg ());
1123
+ if (DstTy.isVector ())
1124
+ return false ;
1125
+ bool Is64Bit = DstTy.getSizeInBits () == 64 ;
1126
+ auto Imm1Fn = Is64Bit ? selectShiftA_64 (MO) : selectShiftA_32 (MO);
1127
+ auto Imm2Fn = Is64Bit ? selectShiftB_64 (MO) : selectShiftB_32 (MO);
1128
+ MachineIRBuilder MIB (I);
1129
+
1130
+ if (!Imm1Fn || !Imm2Fn)
1131
+ return false ;
1132
+
1133
+ auto NewI =
1134
+ MIB.buildInstr (Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
1135
+ {I.getOperand (0 ).getReg ()}, {I.getOperand (1 ).getReg ()});
1136
+
1137
+ for (auto &RenderFn : *Imm1Fn)
1138
+ RenderFn (NewI);
1139
+ for (auto &RenderFn : *Imm2Fn)
1140
+ RenderFn (NewI);
1141
+
1142
+ I.eraseFromParent ();
1143
+ return constrainSelectedInstRegOperands (*NewI, TII, TRI, RBI);
1144
+ }
1145
+
1146
+ bool AArch64InstructionSelector::earlySelect (MachineInstr &I) const {
1147
+ assert (I.getParent () && " Instruction should be in a basic block!" );
1148
+ assert (I.getParent ()->getParent () && " Instruction should be in a function!" );
1149
+
1150
+ MachineBasicBlock &MBB = *I.getParent ();
1151
+ MachineFunction &MF = *MBB.getParent ();
1152
+ MachineRegisterInfo &MRI = MF.getRegInfo ();
1153
+
1154
+ switch (I.getOpcode ()) {
1155
+ case TargetOpcode::G_SHL:
1156
+ return earlySelectSHL (I, MRI);
1157
+ default :
1158
+ return false ;
1159
+ }
1160
+ }
1161
+
1053
1162
bool AArch64InstructionSelector::select (MachineInstr &I,
1054
1163
CodeGenCoverage &CoverageInfo) const {
1055
1164
assert (I.getParent () && " Instruction should be in a basic block!" );
@@ -1107,6 +1216,19 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
1107
1216
return false ;
1108
1217
}
1109
1218
1219
+ // Try to do some lowering before we start instruction selecting. These
1220
+ // lowerings are purely transformations on the input G_MIR and so selection
1221
+ // must continue after any modification of the instruction.
1222
+ preISelLower (I);
1223
+
1224
+ // There may be patterns where the importer can't deal with them optimally,
1225
+ // but does select it to a suboptimal sequence so our custom C++ selection
1226
+ // code later never has a chance to work on it. Therefore, we have an early
1227
+ // selection attempt here to give priority to certain selection routines
1228
+ // over the imported ones.
1229
+ if (earlySelect (I))
1230
+ return true ;
1231
+
1110
1232
if (selectImpl (I, CoverageInfo))
1111
1233
return true ;
1112
1234
@@ -3644,21 +3766,11 @@ bool AArch64InstructionSelector::selectIntrinsic(
3644
3766
return false ;
3645
3767
}
3646
3768
3647
- // / SelectArithImmed - Select an immediate value that can be represented as
3648
- // / a 12-bit value shifted left by either 0 or 12. If so, return true with
3649
- // / Val set to the 12-bit value and Shift set to the shifter operand.
3650
- InstructionSelector::ComplexRendererFns
3651
- AArch64InstructionSelector::selectArithImmed (MachineOperand &Root) const {
3652
- MachineInstr &MI = *Root.getParent ();
3653
- MachineBasicBlock &MBB = *MI.getParent ();
3654
- MachineFunction &MF = *MBB.getParent ();
3655
- MachineRegisterInfo &MRI = MF.getRegInfo ();
3656
-
3657
- // This function is called from the addsub_shifted_imm ComplexPattern,
3658
- // which lists [imm] as the list of opcode it's interested in, however
3659
- // we still need to check whether the operand is actually an immediate
3660
- // here because the ComplexPattern opcode list is only used in
3661
- // root-level opcode matching.
3769
+ static Optional<uint64_t > getImmedFromMO (const MachineOperand &Root) {
3770
+ auto &MI = *Root.getParent ();
3771
+ auto &MBB = *MI.getParent ();
3772
+ auto &MF = *MBB.getParent ();
3773
+ auto &MRI = MF.getRegInfo ();
3662
3774
uint64_t Immed;
3663
3775
if (Root.isImm ())
3664
3776
Immed = Root.getImm ();
@@ -3674,7 +3786,59 @@ AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
3674
3786
Immed = Op1.getCImm ()->getZExtValue ();
3675
3787
} else
3676
3788
return None;
3789
+ return Immed;
3790
+ }
3791
+
3792
+ InstructionSelector::ComplexRendererFns
3793
+ AArch64InstructionSelector::selectShiftA_32 (const MachineOperand &Root) const {
3794
+ auto MaybeImmed = getImmedFromMO (Root);
3795
+ if (MaybeImmed == None || *MaybeImmed > 31 )
3796
+ return None;
3797
+ uint64_t Enc = (32 - *MaybeImmed) & 0x1f ;
3798
+ return {{[=](MachineInstrBuilder &MIB) { MIB.addImm (Enc); }}};
3799
+ }
3800
+
3801
+ InstructionSelector::ComplexRendererFns
3802
+ AArch64InstructionSelector::selectShiftB_32 (const MachineOperand &Root) const {
3803
+ auto MaybeImmed = getImmedFromMO (Root);
3804
+ if (MaybeImmed == None || *MaybeImmed > 31 )
3805
+ return None;
3806
+ uint64_t Enc = 31 - *MaybeImmed;
3807
+ return {{[=](MachineInstrBuilder &MIB) { MIB.addImm (Enc); }}};
3808
+ }
3809
+
3810
+ InstructionSelector::ComplexRendererFns
3811
+ AArch64InstructionSelector::selectShiftA_64 (const MachineOperand &Root) const {
3812
+ auto MaybeImmed = getImmedFromMO (Root);
3813
+ if (MaybeImmed == None || *MaybeImmed > 63 )
3814
+ return None;
3815
+ uint64_t Enc = (64 - *MaybeImmed) & 0x3f ;
3816
+ return {{[=](MachineInstrBuilder &MIB) { MIB.addImm (Enc); }}};
3817
+ }
3818
+
3819
+ InstructionSelector::ComplexRendererFns
3820
+ AArch64InstructionSelector::selectShiftB_64 (const MachineOperand &Root) const {
3821
+ auto MaybeImmed = getImmedFromMO (Root);
3822
+ if (MaybeImmed == None || *MaybeImmed > 63 )
3823
+ return None;
3824
+ uint64_t Enc = 63 - *MaybeImmed;
3825
+ return {{[=](MachineInstrBuilder &MIB) { MIB.addImm (Enc); }}};
3826
+ }
3677
3827
3828
+ // / SelectArithImmed - Select an immediate value that can be represented as
3829
+ // / a 12-bit value shifted left by either 0 or 12. If so, return true with
3830
+ // / Val set to the 12-bit value and Shift set to the shifter operand.
3831
+ InstructionSelector::ComplexRendererFns
3832
+ AArch64InstructionSelector::selectArithImmed (MachineOperand &Root) const {
3833
+ // This function is called from the addsub_shifted_imm ComplexPattern,
3834
+ // which lists [imm] as the list of opcode it's interested in, however
3835
+ // we still need to check whether the operand is actually an immediate
3836
+ // here because the ComplexPattern opcode list is only used in
3837
+ // root-level opcode matching.
3838
+ auto MaybeImmed = getImmedFromMO (Root);
3839
+ if (MaybeImmed == None)
3840
+ return None;
3841
+ uint64_t Immed = *MaybeImmed;
3678
3842
unsigned ShiftAmt;
3679
3843
3680
3844
if (Immed >> 12 == 0 ) {
0 commit comments