Skip to content

Commit dcd02a5

Browse files
committed
[AArch64][GlobalISel] Fold away lowered vector sign-extend of vector compares.
This fixes a long standing cause of awful code generation when legalization creates G_SEXT(G_FCMP(...)), for example due to promoting the condition of a vector G_SELECT. Since on AArch64 vector compares sign-extend the condition value, there's no need for this extra G_SEXT. Unfortunately by the time we get to post-legalization these G_SEXTs have already been lowered into shifts, so this combine is a bit more involved than I'd ideally like. Oh well. Differential Revision: https://reviews.llvm.org/D135078
1 parent 07ccf65 commit dcd02a5

File tree

7 files changed

+257
-774
lines changed

7 files changed

+257
-774
lines changed

llvm/lib/Target/AArch64/AArch64Combine.td

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,13 @@ def split_store_zero_128 : GICombineRule<
203203
(apply [{ applySplitStoreZero128(*${d}, MRI, B, Observer); }])
204204
>;
205205

206+
def sext_via_shifts_of_vcmp : GICombineRule<
207+
(defs root:$d, register_matchinfo:$info),
208+
(match (wip_match_opcode G_ASHR):$d,
209+
[{ return matchSextViaShiftsOfVCmp(*${d}, MRI, ${info}); }]),
210+
(apply [{ Helper.replaceSingleDefInstWithReg(*${d}, ${info}); }])
211+
>;
212+
206213
// Post-legalization combines which should happen at all optimization levels.
207214
// (E.g. ones that facilitate matching for the selector) For example, matching
208215
// pseudos.
@@ -229,6 +236,6 @@ def AArch64PostLegalizerCombinerHelper
229236
constant_fold, identity_combines,
230237
ptr_add_immed_chain, overlapping_and,
231238
split_store_zero_128, undef_combines,
232-
select_to_minmax]> {
239+
select_to_minmax, sext_via_shifts_of_vcmp]> {
233240
let DisableRuleOption = "aarch64postlegalizercombiner-disable-rule";
234241
}

llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,9 @@
3232
#include "llvm/CodeGen/GlobalISel/Utils.h"
3333
#include "llvm/CodeGen/MachineDominators.h"
3434
#include "llvm/CodeGen/MachineFunctionPass.h"
35+
#include "llvm/CodeGen/MachineInstr.h"
3536
#include "llvm/CodeGen/MachineRegisterInfo.h"
37+
#include "llvm/CodeGen/Register.h"
3638
#include "llvm/CodeGen/TargetOpcodes.h"
3739
#include "llvm/CodeGen/TargetPassConfig.h"
3840
#include "llvm/Support/Debug.h"
@@ -329,6 +331,40 @@ static void applySplitStoreZero128(MachineInstr &MI, MachineRegisterInfo &MRI,
329331
Store.eraseFromParent();
330332
}
331333

334+
// Match a legalized vector sext of a vector compare. Vector compares always
335+
// sign-extend the low bit anyway. Unfortunately we have to match the G_SEXT
336+
// after it's been legalized to shifts since this is after legalization.
337+
static bool matchSextViaShiftsOfVCmp(MachineInstr &MI, MachineRegisterInfo &MRI,
338+
Register &MatchInfo) {
339+
assert(MI.getOpcode() == TargetOpcode::G_ASHR && "Expected G_ASHR");
340+
LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
341+
if (!DstTy.isVector())
342+
return false;
343+
344+
Register ShlLHS;
345+
int64_t ShlRHS;
346+
if (!mi_match(MI.getOperand(1).getReg(), MRI,
347+
m_GShl(m_Reg(ShlLHS), m_ICstOrSplat(ShlRHS))))
348+
return false;
349+
350+
// Check the shift amount is correct for a sext.
351+
if (ShlRHS != DstTy.getScalarSizeInBits() - 1)
352+
return false;
353+
if (!mi_match(MI.getOperand(2).getReg(), MRI, m_SpecificICstSplat(ShlRHS)))
354+
return false;
355+
356+
// Check we're trying to extend a vector compare.
357+
if (auto *Cmp = getOpcodeDef<GFCmp>(ShlLHS, MRI)) {
358+
MatchInfo = Cmp->getReg(0);
359+
return true;
360+
}
361+
if (auto *Cmp = getOpcodeDef<GICmp>(ShlLHS, MRI)) {
362+
MatchInfo = Cmp->getReg(0);
363+
return true;
364+
}
365+
return false;
366+
}
367+
332368
#define AARCH64POSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
333369
#include "AArch64GenPostLegalizeGICombiner.inc"
334370
#undef AARCH64POSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS

llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll

Lines changed: 45 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -57,32 +57,30 @@ define <8 x i16> @combine_vec_udiv_nonuniform(<8 x i16> %x) {
5757
; GISEL-LABEL: combine_vec_udiv_nonuniform:
5858
; GISEL: // %bb.0:
5959
; GISEL-NEXT: adrp x8, .LCPI1_4
60-
; GISEL-NEXT: adrp x9, .LCPI1_0
60+
; GISEL-NEXT: adrp x9, .LCPI1_5
6161
; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI1_4]
6262
; GISEL-NEXT: adrp x8, .LCPI1_3
63-
; GISEL-NEXT: ldr q5, [x9, :lo12:.LCPI1_0]
6463
; GISEL-NEXT: neg v1.8h, v1.8h
6564
; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI1_3]
6665
; GISEL-NEXT: adrp x8, .LCPI1_2
6766
; GISEL-NEXT: ushl v1.8h, v0.8h, v1.8h
6867
; GISEL-NEXT: umull2 v3.4s, v1.8h, v2.8h
6968
; GISEL-NEXT: umull v1.4s, v1.4h, v2.4h
7069
; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI1_2]
71-
; GISEL-NEXT: adrp x8, .LCPI1_5
70+
; GISEL-NEXT: adrp x8, .LCPI1_1
7271
; GISEL-NEXT: uzp2 v1.8h, v1.8h, v3.8h
7372
; GISEL-NEXT: sub v3.8h, v0.8h, v1.8h
7473
; GISEL-NEXT: umull2 v4.4s, v3.8h, v2.8h
7574
; GISEL-NEXT: umull v2.4s, v3.4h, v2.4h
76-
; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI1_5]
77-
; GISEL-NEXT: adrp x8, .LCPI1_1
78-
; GISEL-NEXT: cmeq v3.8h, v3.8h, v5.8h
75+
; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI1_1]
76+
; GISEL-NEXT: adrp x8, .LCPI1_0
77+
; GISEL-NEXT: neg v3.8h, v3.8h
7978
; GISEL-NEXT: uzp2 v2.8h, v2.8h, v4.8h
80-
; GISEL-NEXT: ldr q4, [x8, :lo12:.LCPI1_1]
81-
; GISEL-NEXT: shl v3.8h, v3.8h, #15
79+
; GISEL-NEXT: ldr q4, [x9, :lo12:.LCPI1_5]
80+
; GISEL-NEXT: ldr q5, [x8, :lo12:.LCPI1_0]
8281
; GISEL-NEXT: add v1.8h, v2.8h, v1.8h
83-
; GISEL-NEXT: neg v2.8h, v4.8h
84-
; GISEL-NEXT: ushl v1.8h, v1.8h, v2.8h
85-
; GISEL-NEXT: sshr v2.8h, v3.8h, #15
82+
; GISEL-NEXT: cmeq v2.8h, v4.8h, v5.8h
83+
; GISEL-NEXT: ushl v1.8h, v1.8h, v3.8h
8684
; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b
8785
; GISEL-NEXT: ret
8886
%1 = udiv <8 x i16> %x, <i16 23, i16 34, i16 -23, i16 56, i16 128, i16 -1, i16 -256, i16 -32768>
@@ -108,25 +106,23 @@ define <8 x i16> @combine_vec_udiv_nonuniform2(<8 x i16> %x) {
108106
; GISEL-LABEL: combine_vec_udiv_nonuniform2:
109107
; GISEL: // %bb.0:
110108
; GISEL-NEXT: adrp x8, .LCPI2_3
111-
; GISEL-NEXT: adrp x9, .LCPI2_4
112-
; GISEL-NEXT: adrp x10, .LCPI2_0
109+
; GISEL-NEXT: adrp x9, .LCPI2_1
113110
; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI2_3]
114111
; GISEL-NEXT: adrp x8, .LCPI2_2
115-
; GISEL-NEXT: ldr q4, [x10, :lo12:.LCPI2_0]
112+
; GISEL-NEXT: ldr q4, [x9, :lo12:.LCPI2_1]
116113
; GISEL-NEXT: neg v1.8h, v1.8h
117114
; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI2_2]
118-
; GISEL-NEXT: adrp x8, .LCPI2_1
115+
; GISEL-NEXT: adrp x8, .LCPI2_4
119116
; GISEL-NEXT: ushl v1.8h, v0.8h, v1.8h
117+
; GISEL-NEXT: neg v4.8h, v4.8h
120118
; GISEL-NEXT: umull2 v3.4s, v1.8h, v2.8h
121-
; GISEL-NEXT: ldr q5, [x8, :lo12:.LCPI2_1]
122119
; GISEL-NEXT: umull v1.4s, v1.4h, v2.4h
123-
; GISEL-NEXT: ldr q2, [x9, :lo12:.LCPI2_4]
124-
; GISEL-NEXT: cmeq v2.8h, v2.8h, v4.8h
120+
; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI2_4]
121+
; GISEL-NEXT: adrp x8, .LCPI2_0
125122
; GISEL-NEXT: uzp2 v1.8h, v1.8h, v3.8h
126-
; GISEL-NEXT: neg v3.8h, v5.8h
127-
; GISEL-NEXT: shl v2.8h, v2.8h, #15
128-
; GISEL-NEXT: ushl v1.8h, v1.8h, v3.8h
129-
; GISEL-NEXT: sshr v2.8h, v2.8h, #15
123+
; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI2_0]
124+
; GISEL-NEXT: cmeq v2.8h, v2.8h, v3.8h
125+
; GISEL-NEXT: ushl v1.8h, v1.8h, v4.8h
130126
; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b
131127
; GISEL-NEXT: ret
132128
%1 = udiv <8 x i16> %x, <i16 -34, i16 35, i16 36, i16 -37, i16 38, i16 -39, i16 40, i16 -41>
@@ -151,23 +147,21 @@ define <8 x i16> @combine_vec_udiv_nonuniform3(<8 x i16> %x) {
151147
; GISEL-LABEL: combine_vec_udiv_nonuniform3:
152148
; GISEL: // %bb.0:
153149
; GISEL-NEXT: adrp x8, .LCPI3_2
154-
; GISEL-NEXT: adrp x9, .LCPI3_0
150+
; GISEL-NEXT: adrp x9, .LCPI3_3
155151
; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI3_2]
156-
; GISEL-NEXT: adrp x8, .LCPI3_3
157-
; GISEL-NEXT: ldr q3, [x9, :lo12:.LCPI3_0]
152+
; GISEL-NEXT: adrp x8, .LCPI3_1
153+
; GISEL-NEXT: ldr q4, [x9, :lo12:.LCPI3_3]
158154
; GISEL-NEXT: umull2 v2.4s, v0.8h, v1.8h
159155
; GISEL-NEXT: umull v1.4s, v0.4h, v1.4h
160156
; GISEL-NEXT: uzp2 v1.8h, v1.8h, v2.8h
161-
; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI3_3]
162-
; GISEL-NEXT: adrp x8, .LCPI3_1
163-
; GISEL-NEXT: cmeq v2.8h, v2.8h, v3.8h
164-
; GISEL-NEXT: sub v4.8h, v0.8h, v1.8h
165-
; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI3_1]
166-
; GISEL-NEXT: shl v2.8h, v2.8h, #15
167-
; GISEL-NEXT: usra v1.8h, v4.8h, #1
168-
; GISEL-NEXT: neg v3.8h, v3.8h
169-
; GISEL-NEXT: sshr v2.8h, v2.8h, #15
170-
; GISEL-NEXT: ushl v1.8h, v1.8h, v3.8h
157+
; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI3_1]
158+
; GISEL-NEXT: adrp x8, .LCPI3_0
159+
; GISEL-NEXT: neg v2.8h, v2.8h
160+
; GISEL-NEXT: sub v3.8h, v0.8h, v1.8h
161+
; GISEL-NEXT: usra v1.8h, v3.8h, #1
162+
; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI3_0]
163+
; GISEL-NEXT: ushl v1.8h, v1.8h, v2.8h
164+
; GISEL-NEXT: cmeq v2.8h, v4.8h, v3.8h
171165
; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b
172166
; GISEL-NEXT: ret
173167
%1 = udiv <8 x i16> %x, <i16 7, i16 23, i16 25, i16 27, i16 31, i16 47, i16 63, i16 127>
@@ -197,21 +191,19 @@ define <16 x i8> @combine_vec_udiv_nonuniform4(<16 x i8> %x) {
197191
; GISEL-LABEL: combine_vec_udiv_nonuniform4:
198192
; GISEL: // %bb.0:
199193
; GISEL-NEXT: adrp x8, .LCPI4_2
200-
; GISEL-NEXT: adrp x9, .LCPI4_0
194+
; GISEL-NEXT: adrp x9, .LCPI4_1
201195
; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI4_2]
202196
; GISEL-NEXT: adrp x8, .LCPI4_3
203-
; GISEL-NEXT: ldr q4, [x9, :lo12:.LCPI4_0]
197+
; GISEL-NEXT: ldr q4, [x9, :lo12:.LCPI4_1]
204198
; GISEL-NEXT: umull2 v2.8h, v0.16b, v1.16b
205199
; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI4_3]
206200
; GISEL-NEXT: umull v1.8h, v0.8b, v1.8b
207-
; GISEL-NEXT: adrp x8, .LCPI4_1
208-
; GISEL-NEXT: cmeq v3.16b, v3.16b, v4.16b
201+
; GISEL-NEXT: adrp x8, .LCPI4_0
202+
; GISEL-NEXT: neg v4.16b, v4.16b
209203
; GISEL-NEXT: uzp2 v1.16b, v1.16b, v2.16b
210-
; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI4_1]
211-
; GISEL-NEXT: shl v3.16b, v3.16b, #7
212-
; GISEL-NEXT: neg v2.16b, v2.16b
213-
; GISEL-NEXT: ushl v1.16b, v1.16b, v2.16b
214-
; GISEL-NEXT: sshr v2.16b, v3.16b, #7
204+
; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI4_0]
205+
; GISEL-NEXT: cmeq v2.16b, v3.16b, v2.16b
206+
; GISEL-NEXT: ushl v1.16b, v1.16b, v4.16b
215207
; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b
216208
; GISEL-NEXT: ret
217209
%div = udiv <16 x i8> %x, <i8 -64, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
@@ -248,28 +240,26 @@ define <8 x i16> @pr38477(<8 x i16> %a0) {
248240
; GISEL-LABEL: pr38477:
249241
; GISEL: // %bb.0:
250242
; GISEL-NEXT: adrp x8, .LCPI5_3
251-
; GISEL-NEXT: adrp x9, .LCPI5_0
243+
; GISEL-NEXT: adrp x9, .LCPI5_4
252244
; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI5_3]
253245
; GISEL-NEXT: adrp x8, .LCPI5_2
254-
; GISEL-NEXT: ldr q5, [x9, :lo12:.LCPI5_0]
255246
; GISEL-NEXT: umull2 v2.4s, v0.8h, v1.8h
256247
; GISEL-NEXT: umull v1.4s, v0.4h, v1.4h
257248
; GISEL-NEXT: uzp2 v1.8h, v1.8h, v2.8h
258249
; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI5_2]
259-
; GISEL-NEXT: adrp x8, .LCPI5_4
250+
; GISEL-NEXT: adrp x8, .LCPI5_1
260251
; GISEL-NEXT: sub v3.8h, v0.8h, v1.8h
261252
; GISEL-NEXT: umull2 v4.4s, v3.8h, v2.8h
262253
; GISEL-NEXT: umull v2.4s, v3.4h, v2.4h
263-
; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI5_4]
264-
; GISEL-NEXT: adrp x8, .LCPI5_1
265-
; GISEL-NEXT: cmeq v3.8h, v3.8h, v5.8h
254+
; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI5_1]
255+
; GISEL-NEXT: adrp x8, .LCPI5_0
256+
; GISEL-NEXT: neg v3.8h, v3.8h
266257
; GISEL-NEXT: uzp2 v2.8h, v2.8h, v4.8h
267-
; GISEL-NEXT: ldr q4, [x8, :lo12:.LCPI5_1]
268-
; GISEL-NEXT: shl v3.8h, v3.8h, #15
258+
; GISEL-NEXT: ldr q4, [x9, :lo12:.LCPI5_4]
259+
; GISEL-NEXT: ldr q5, [x8, :lo12:.LCPI5_0]
269260
; GISEL-NEXT: add v1.8h, v2.8h, v1.8h
270-
; GISEL-NEXT: neg v2.8h, v4.8h
271-
; GISEL-NEXT: ushl v1.8h, v1.8h, v2.8h
272-
; GISEL-NEXT: sshr v2.8h, v3.8h, #15
261+
; GISEL-NEXT: cmeq v2.8h, v4.8h, v5.8h
262+
; GISEL-NEXT: ushl v1.8h, v1.8h, v3.8h
273263
; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b
274264
; GISEL-NEXT: ret
275265
%1 = udiv <8 x i16> %a0, <i16 1, i16 119, i16 73, i16 -111, i16 -3, i16 118, i16 32, i16 31>
Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s
3+
---
4+
name: sext_of_fcmp_v4s32
5+
legalized: true
6+
liveins:
7+
- { reg: '$q0' }
8+
- { reg: '$x0' }
9+
- { reg: '$x1' }
10+
- { reg: '$x2' }
11+
body: |
12+
bb.1:
13+
liveins: $q0, $x0, $x1, $x2
14+
15+
; CHECK-LABEL: name: sext_of_fcmp_v4s32
16+
; CHECK: liveins: $q0, $x0, $x1, $x2
17+
; CHECK-NEXT: {{ $}}
18+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
19+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.000000e+00
20+
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
21+
; CHECK-NEXT: [[FCMP:%[0-9]+]]:_(<4 x s32>) = G_FCMP floatpred(ogt), [[COPY]](<4 x s32>), [[BUILD_VECTOR]]
22+
; CHECK-NEXT: $q0 = COPY [[FCMP]](<4 x s32>)
23+
; CHECK-NEXT: RET_ReallyLR implicit $q0
24+
%0:_(<4 x s32>) = COPY $q0
25+
%6:_(s32) = G_FCONSTANT float 6.000000e+00
26+
%5:_(<4 x s32>) = G_BUILD_VECTOR %6(s32), %6(s32), %6(s32), %6(s32)
27+
%17:_(<4 x s32>) = G_FCMP floatpred(ogt), %0(<4 x s32>), %5
28+
%19:_(s32) = G_CONSTANT i32 31
29+
%20:_(<4 x s32>) = G_BUILD_VECTOR %19(s32), %19(s32), %19(s32), %19(s32)
30+
%18:_(<4 x s32>) = G_SHL %17, %20(<4 x s32>)
31+
%11:_(<4 x s32>) = G_ASHR %18, %20(<4 x s32>)
32+
$q0 = COPY %11(<4 x s32>)
33+
RET_ReallyLR implicit $q0
34+
35+
...
36+
---
37+
name: sext_of_icmp_v4s32
38+
legalized: true
39+
liveins:
40+
- { reg: '$q0' }
41+
- { reg: '$x0' }
42+
- { reg: '$x1' }
43+
- { reg: '$x2' }
44+
body: |
45+
bb.1:
46+
liveins: $q0, $x0, $x1, $x2
47+
48+
; CHECK-LABEL: name: sext_of_icmp_v4s32
49+
; CHECK: liveins: $q0, $x0, $x1, $x2
50+
; CHECK-NEXT: {{ $}}
51+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
52+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
53+
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
54+
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<4 x s32>) = G_ICMP intpred(eq), [[COPY]](<4 x s32>), [[BUILD_VECTOR]]
55+
; CHECK-NEXT: $q0 = COPY [[ICMP]](<4 x s32>)
56+
; CHECK-NEXT: RET_ReallyLR implicit $q0
57+
%0:_(<4 x s32>) = COPY $q0
58+
%6:_(s32) = G_CONSTANT i32 42
59+
%5:_(<4 x s32>) = G_BUILD_VECTOR %6(s32), %6(s32), %6(s32), %6(s32)
60+
%17:_(<4 x s32>) = G_ICMP intpred(eq), %0(<4 x s32>), %5
61+
%19:_(s32) = G_CONSTANT i32 31
62+
%20:_(<4 x s32>) = G_BUILD_VECTOR %19(s32), %19(s32), %19(s32), %19(s32)
63+
%18:_(<4 x s32>) = G_SHL %17, %20(<4 x s32>)
64+
%11:_(<4 x s32>) = G_ASHR %18, %20(<4 x s32>)
65+
$q0 = COPY %11(<4 x s32>)
66+
RET_ReallyLR implicit $q0
67+
68+
...
69+
---
70+
name: sext_of_fcmp_wrong_shift
71+
legalized: true
72+
liveins:
73+
- { reg: '$q0' }
74+
- { reg: '$x0' }
75+
- { reg: '$x1' }
76+
- { reg: '$x2' }
77+
body: |
78+
bb.1:
79+
liveins: $q0, $x0, $x1, $x2
80+
81+
; CHECK-LABEL: name: sext_of_fcmp_wrong_shift
82+
; CHECK: liveins: $q0, $x0, $x1, $x2
83+
; CHECK-NEXT: {{ $}}
84+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
85+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.000000e+00
86+
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
87+
; CHECK-NEXT: [[FCMP:%[0-9]+]]:_(<4 x s32>) = G_FCMP floatpred(ogt), [[COPY]](<4 x s32>), [[BUILD_VECTOR]]
88+
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 29
89+
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32), [[C1]](s32), [[C1]](s32)
90+
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(<4 x s32>) = G_SHL [[FCMP]], [[BUILD_VECTOR1]](<4 x s32>)
91+
; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(<4 x s32>) = G_ASHR [[SHL]], [[BUILD_VECTOR1]](<4 x s32>)
92+
; CHECK-NEXT: $q0 = COPY [[ASHR]](<4 x s32>)
93+
; CHECK-NEXT: RET_ReallyLR implicit $q0
94+
%0:_(<4 x s32>) = COPY $q0
95+
%6:_(s32) = G_FCONSTANT float 6.000000e+00
96+
%5:_(<4 x s32>) = G_BUILD_VECTOR %6(s32), %6(s32), %6(s32), %6(s32)
97+
%17:_(<4 x s32>) = G_FCMP floatpred(ogt), %0(<4 x s32>), %5
98+
%19:_(s32) = G_CONSTANT i32 29
99+
%20:_(<4 x s32>) = G_BUILD_VECTOR %19(s32), %19(s32), %19(s32), %19(s32)
100+
%18:_(<4 x s32>) = G_SHL %17, %20(<4 x s32>)
101+
%11:_(<4 x s32>) = G_ASHR %18, %20(<4 x s32>)
102+
$q0 = COPY %11(<4 x s32>)
103+
RET_ReallyLR implicit $q0
104+
105+
...
106+
---
107+
name: sext_of_fcmp_mismatch_shift
108+
legalized: true
109+
liveins:
110+
- { reg: '$q0' }
111+
- { reg: '$x0' }
112+
- { reg: '$x1' }
113+
- { reg: '$x2' }
114+
body: |
115+
bb.1:
116+
liveins: $q0, $x0, $x1, $x2
117+
118+
; CHECK-LABEL: name: sext_of_fcmp_mismatch_shift
119+
; CHECK: liveins: $q0, $x0, $x1, $x2
120+
; CHECK-NEXT: {{ $}}
121+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
122+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.000000e+00
123+
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
124+
; CHECK-NEXT: [[FCMP:%[0-9]+]]:_(<4 x s32>) = G_FCMP floatpred(ogt), [[COPY]](<4 x s32>), [[BUILD_VECTOR]]
125+
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 29
126+
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32), [[C1]](s32), [[C1]](s32)
127+
; CHECK-NEXT: %cst2:_(s32) = G_CONSTANT i32 31
128+
; CHECK-NEXT: %cstv2:_(<4 x s32>) = G_BUILD_VECTOR %cst2(s32), %cst2(s32), %cst2(s32), %cst2(s32)
129+
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(<4 x s32>) = G_SHL [[FCMP]], [[BUILD_VECTOR1]](<4 x s32>)
130+
; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(<4 x s32>) = G_ASHR [[SHL]], %cstv2(<4 x s32>)
131+
; CHECK-NEXT: $q0 = COPY [[ASHR]](<4 x s32>)
132+
; CHECK-NEXT: RET_ReallyLR implicit $q0
133+
%0:_(<4 x s32>) = COPY $q0
134+
%6:_(s32) = G_FCONSTANT float 6.000000e+00
135+
%5:_(<4 x s32>) = G_BUILD_VECTOR %6(s32), %6(s32), %6(s32), %6(s32)
136+
%17:_(<4 x s32>) = G_FCMP floatpred(ogt), %0(<4 x s32>), %5
137+
%19:_(s32) = G_CONSTANT i32 29
138+
%20:_(<4 x s32>) = G_BUILD_VECTOR %19(s32), %19(s32), %19(s32), %19(s32)
139+
%cst2:_(s32) = G_CONSTANT i32 31
140+
%cstv2:_(<4 x s32>) = G_BUILD_VECTOR %cst2(s32), %cst2(s32), %cst2(s32), %cst2(s32)
141+
%18:_(<4 x s32>) = G_SHL %17, %20(<4 x s32>)
142+
%11:_(<4 x s32>) = G_ASHR %18, %cstv2(<4 x s32>)
143+
$q0 = COPY %11(<4 x s32>)
144+
RET_ReallyLR implicit $q0
145+
146+
...

0 commit comments

Comments
 (0)