Skip to content

Commit 07ccf65

Browse files
committed
x[AArch64][GlobalISel] Enable vector support for G_SELECT->G_FMAXIMUM/MINIMUM.
Vector support seems to work immediately, as long as we run the combine before legalization (so the vector SELECTs don't get lowered) and the legalizer rules are there to enable generation. Differential Revision: https://reviews.llvm.org/D135047
1 parent 54608b4 commit 07ccf65

File tree

8 files changed

+475
-22
lines changed

8 files changed

+475
-22
lines changed

llvm/include/llvm/Target/GlobalISel/Combine.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1031,7 +1031,7 @@ def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines,
10311031
form_bitfield_extract, constant_fold, fabs_fneg_fold,
10321032
intdiv_combines, mulh_combines, redundant_neg_operands,
10331033
and_or_disjoint_mask, fma_combines, fold_binop_into_select,
1034-
sub_add_reg]>;
1034+
sub_add_reg, select_to_minmax]>;
10351035

10361036
// A combine group used to for prelegalizer combiners at -O0. The combines in
10371037
// this group have been selected based on experiments to balance code size and

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5985,8 +5985,7 @@ bool CombinerHelper::matchFPSelectToMinMax(Register Dst, Register Cond,
59855985
// And turn it into fminnum/fmaxnum or fmin/fmax based off of the condition.
59865986
LLT DstTy = MRI.getType(Dst);
59875987
// Bail out early on pointers, since we'll never want to fold to a min/max.
5988-
// TODO: Handle vectors.
5989-
if (DstTy.isPointer() || DstTy.isVector())
5988+
if (DstTy.isPointer())
59905989
return false;
59915990
// Match a floating point compare with a less-than/greater-than predicate.
59925991
// TODO: Allow multiple users of the compare if they are all selects.

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -803,10 +803,16 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
803803
.libcallFor({s128})
804804
.minScalar(0, MinFPScalar);
805805

806-
// TODO: Vector types.
807806
getActionDefinitionsBuilder({G_FMAXIMUM, G_FMINIMUM})
808-
.legalFor({MinFPScalar, s32, s64})
809-
.minScalar(0, MinFPScalar);
807+
.legalFor({MinFPScalar, s32, s64, v2s32, v4s32, v2s64})
808+
.legalIf([=](const LegalityQuery &Query) {
809+
const auto &Ty = Query.Types[0];
810+
return (Ty == v8s16 || Ty == v4s16) && HasFP16;
811+
})
812+
.minScalar(0, MinFPScalar)
813+
.clampNumElements(0, v4s16, v8s16)
814+
.clampNumElements(0, v2s32, v4s32)
815+
.clampNumElements(0, v2s64, v2s64);
810816

811817
// TODO: Libcall support for s128.
812818
// TODO: s16 should be legal with full FP16 support.

llvm/test/CodeGen/AArch64/GlobalISel/legalize-fmaximum.mir

Lines changed: 77 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,17 @@ body: |
99
bb.0:
1010
liveins: $h0, $h1
1111
; FP16-LABEL: name: s16_legal_with_full_fp16
12-
; FP16: %a:_(s16) = COPY $h0
12+
; FP16: liveins: $h0, $h1
13+
; FP16-NEXT: {{ $}}
14+
; FP16-NEXT: %a:_(s16) = COPY $h0
1315
; FP16-NEXT: %b:_(s16) = COPY $h1
1416
; FP16-NEXT: %legalize_me:_(s16) = G_FMAXIMUM %a, %b
1517
; FP16-NEXT: $h0 = COPY %legalize_me(s16)
1618
; FP16-NEXT: RET_ReallyLR implicit $h0
1719
; NO-FP16-LABEL: name: s16_legal_with_full_fp16
18-
; NO-FP16: %a:_(s16) = COPY $h0
20+
; NO-FP16: liveins: $h0, $h1
21+
; NO-FP16-NEXT: {{ $}}
22+
; NO-FP16-NEXT: %a:_(s16) = COPY $h0
1923
; NO-FP16-NEXT: %b:_(s16) = COPY $h1
2024
; NO-FP16-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT %a(s16)
2125
; NO-FP16-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT %b(s16)
@@ -37,13 +41,17 @@ body: |
3741
bb.0:
3842
liveins: $s0, $s1
3943
; FP16-LABEL: name: s32_legal
40-
; FP16: %a:_(s32) = COPY $s0
44+
; FP16: liveins: $s0, $s1
45+
; FP16-NEXT: {{ $}}
46+
; FP16-NEXT: %a:_(s32) = COPY $s0
4147
; FP16-NEXT: %b:_(s32) = COPY $s1
4248
; FP16-NEXT: %legalize_me:_(s32) = G_FMAXIMUM %a, %b
4349
; FP16-NEXT: $s0 = COPY %legalize_me(s32)
4450
; FP16-NEXT: RET_ReallyLR implicit $s0
4551
; NO-FP16-LABEL: name: s32_legal
46-
; NO-FP16: %a:_(s32) = COPY $s0
52+
; NO-FP16: liveins: $s0, $s1
53+
; NO-FP16-NEXT: {{ $}}
54+
; NO-FP16-NEXT: %a:_(s32) = COPY $s0
4755
; NO-FP16-NEXT: %b:_(s32) = COPY $s1
4856
; NO-FP16-NEXT: %legalize_me:_(s32) = G_FMAXIMUM %a, %b
4957
; NO-FP16-NEXT: $s0 = COPY %legalize_me(s32)
@@ -62,13 +70,17 @@ body: |
6270
bb.0:
6371
liveins: $d0, $d1
6472
; FP16-LABEL: name: s64_legal
65-
; FP16: %a:_(s64) = COPY $d0
73+
; FP16: liveins: $d0, $d1
74+
; FP16-NEXT: {{ $}}
75+
; FP16-NEXT: %a:_(s64) = COPY $d0
6676
; FP16-NEXT: %b:_(s64) = COPY $d1
6777
; FP16-NEXT: %legalize_me:_(s64) = G_FMAXIMUM %a, %b
6878
; FP16-NEXT: $d0 = COPY %legalize_me(s64)
6979
; FP16-NEXT: RET_ReallyLR implicit $d0
7080
; NO-FP16-LABEL: name: s64_legal
71-
; NO-FP16: %a:_(s64) = COPY $d0
81+
; NO-FP16: liveins: $d0, $d1
82+
; NO-FP16-NEXT: {{ $}}
83+
; NO-FP16-NEXT: %a:_(s64) = COPY $d0
7284
; NO-FP16-NEXT: %b:_(s64) = COPY $d1
7385
; NO-FP16-NEXT: %legalize_me:_(s64) = G_FMAXIMUM %a, %b
7486
; NO-FP16-NEXT: $d0 = COPY %legalize_me(s64)
@@ -78,3 +90,62 @@ body: |
7890
%legalize_me:_(s64) = G_FMAXIMUM %a, %b
7991
$d0 = COPY %legalize_me(s64)
8092
RET_ReallyLR implicit $d0
93+
...
94+
---
95+
name: v2s32
96+
alignment: 4
97+
body: |
98+
bb.0:
99+
liveins: $d0, $d1
100+
; FP16-LABEL: name: v2s32
101+
; FP16: liveins: $d0, $d1
102+
; FP16-NEXT: {{ $}}
103+
; FP16-NEXT: %a:_(<2 x s32>) = COPY $d0
104+
; FP16-NEXT: %b:_(<2 x s32>) = COPY $d1
105+
; FP16-NEXT: %maximum:_(<2 x s32>) = G_FMAXIMUM %a, %b
106+
; FP16-NEXT: $d0 = COPY %maximum(<2 x s32>)
107+
; FP16-NEXT: RET_ReallyLR implicit $d0
108+
; NO-FP16-LABEL: name: v2s32
109+
; NO-FP16: liveins: $d0, $d1
110+
; NO-FP16-NEXT: {{ $}}
111+
; NO-FP16-NEXT: %a:_(<2 x s32>) = COPY $d0
112+
; NO-FP16-NEXT: %b:_(<2 x s32>) = COPY $d1
113+
; NO-FP16-NEXT: %maximum:_(<2 x s32>) = G_FMAXIMUM %a, %b
114+
; NO-FP16-NEXT: $d0 = COPY %maximum(<2 x s32>)
115+
; NO-FP16-NEXT: RET_ReallyLR implicit $d0
116+
%a:_(<2 x s32>) = COPY $d0
117+
%b:_(<2 x s32>) = COPY $d1
118+
%maximum:_(<2 x s32>) = G_FMAXIMUM %a, %b
119+
$d0 = COPY %maximum(<2 x s32>)
120+
RET_ReallyLR implicit $d0
121+
122+
...
123+
---
124+
name: v4s32
125+
alignment: 4
126+
body: |
127+
bb.0:
128+
liveins: $q0, $q1
129+
; FP16-LABEL: name: v4s32
130+
; FP16: liveins: $q0, $q1
131+
; FP16-NEXT: {{ $}}
132+
; FP16-NEXT: %a:_(<4 x s32>) = COPY $q0
133+
; FP16-NEXT: %b:_(<4 x s32>) = COPY $q1
134+
; FP16-NEXT: %maximum:_(<4 x s32>) = G_FMAXIMUM %a, %b
135+
; FP16-NEXT: $q0 = COPY %maximum(<4 x s32>)
136+
; FP16-NEXT: RET_ReallyLR implicit $q0
137+
; NO-FP16-LABEL: name: v4s32
138+
; NO-FP16: liveins: $q0, $q1
139+
; NO-FP16-NEXT: {{ $}}
140+
; NO-FP16-NEXT: %a:_(<4 x s32>) = COPY $q0
141+
; NO-FP16-NEXT: %b:_(<4 x s32>) = COPY $q1
142+
; NO-FP16-NEXT: %maximum:_(<4 x s32>) = G_FMAXIMUM %a, %b
143+
; NO-FP16-NEXT: $q0 = COPY %maximum(<4 x s32>)
144+
; NO-FP16-NEXT: RET_ReallyLR implicit $q0
145+
%a:_(<4 x s32>) = COPY $q0
146+
%b:_(<4 x s32>) = COPY $q1
147+
%maximum:_(<4 x s32>) = G_FMAXIMUM %a, %b
148+
$q0 = COPY %maximum(<4 x s32>)
149+
RET_ReallyLR implicit $q0
150+
151+
...

llvm/test/CodeGen/AArch64/GlobalISel/legalize-fminimum.mir

Lines changed: 92 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,17 @@ body: |
99
bb.0:
1010
liveins: $h0, $h1
1111
; FP16-LABEL: name: s16_legal_with_full_fp16
12-
; FP16: %a:_(s16) = COPY $h0
12+
; FP16: liveins: $h0, $h1
13+
; FP16-NEXT: {{ $}}
14+
; FP16-NEXT: %a:_(s16) = COPY $h0
1315
; FP16-NEXT: %b:_(s16) = COPY $h1
1416
; FP16-NEXT: %legalize_me:_(s16) = G_FMINIMUM %a, %b
1517
; FP16-NEXT: $h0 = COPY %legalize_me(s16)
1618
; FP16-NEXT: RET_ReallyLR implicit $h0
1719
; NO-FP16-LABEL: name: s16_legal_with_full_fp16
18-
; NO-FP16: %a:_(s16) = COPY $h0
20+
; NO-FP16: liveins: $h0, $h1
21+
; NO-FP16-NEXT: {{ $}}
22+
; NO-FP16-NEXT: %a:_(s16) = COPY $h0
1923
; NO-FP16-NEXT: %b:_(s16) = COPY $h1
2024
; NO-FP16-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT %a(s16)
2125
; NO-FP16-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT %b(s16)
@@ -37,13 +41,17 @@ body: |
3741
bb.0:
3842
liveins: $s0, $s1
3943
; FP16-LABEL: name: s32_legal
40-
; FP16: %a:_(s32) = COPY $s0
44+
; FP16: liveins: $s0, $s1
45+
; FP16-NEXT: {{ $}}
46+
; FP16-NEXT: %a:_(s32) = COPY $s0
4147
; FP16-NEXT: %b:_(s32) = COPY $s1
4248
; FP16-NEXT: %legalize_me:_(s32) = G_FMINIMUM %a, %b
4349
; FP16-NEXT: $s0 = COPY %legalize_me(s32)
4450
; FP16-NEXT: RET_ReallyLR implicit $s0
4551
; NO-FP16-LABEL: name: s32_legal
46-
; NO-FP16: %a:_(s32) = COPY $s0
52+
; NO-FP16: liveins: $s0, $s1
53+
; NO-FP16-NEXT: {{ $}}
54+
; NO-FP16-NEXT: %a:_(s32) = COPY $s0
4755
; NO-FP16-NEXT: %b:_(s32) = COPY $s1
4856
; NO-FP16-NEXT: %legalize_me:_(s32) = G_FMINIMUM %a, %b
4957
; NO-FP16-NEXT: $s0 = COPY %legalize_me(s32)
@@ -62,13 +70,17 @@ body: |
6270
bb.0:
6371
liveins: $d0, $d1
6472
; FP16-LABEL: name: s64_legal
65-
; FP16: %a:_(s64) = COPY $d0
73+
; FP16: liveins: $d0, $d1
74+
; FP16-NEXT: {{ $}}
75+
; FP16-NEXT: %a:_(s64) = COPY $d0
6676
; FP16-NEXT: %b:_(s64) = COPY $d1
6777
; FP16-NEXT: %legalize_me:_(s64) = G_FMINIMUM %a, %b
6878
; FP16-NEXT: $d0 = COPY %legalize_me(s64)
6979
; FP16-NEXT: RET_ReallyLR implicit $d0
7080
; NO-FP16-LABEL: name: s64_legal
71-
; NO-FP16: %a:_(s64) = COPY $d0
81+
; NO-FP16: liveins: $d0, $d1
82+
; NO-FP16-NEXT: {{ $}}
83+
; NO-FP16-NEXT: %a:_(s64) = COPY $d0
7284
; NO-FP16-NEXT: %b:_(s64) = COPY $d1
7385
; NO-FP16-NEXT: %legalize_me:_(s64) = G_FMINIMUM %a, %b
7486
; NO-FP16-NEXT: $d0 = COPY %legalize_me(s64)
@@ -78,3 +90,77 @@ body: |
7890
%legalize_me:_(s64) = G_FMINIMUM %a, %b
7991
$d0 = COPY %legalize_me(s64)
8092
RET_ReallyLR implicit $d0
93+
...
94+
---
95+
name: v4s32
96+
alignment: 4
97+
body: |
98+
bb.0:
99+
liveins: $q0, $q1
100+
; FP16-LABEL: name: v4s32
101+
; FP16: liveins: $q0, $q1
102+
; FP16-NEXT: {{ $}}
103+
; FP16-NEXT: %a:_(<4 x s32>) = COPY $q0
104+
; FP16-NEXT: %b:_(<4 x s32>) = COPY $q1
105+
; FP16-NEXT: %minimum:_(<4 x s32>) = G_FMINIMUM %a, %b
106+
; FP16-NEXT: $q0 = COPY %minimum(<4 x s32>)
107+
; FP16-NEXT: RET_ReallyLR implicit $q0
108+
; NO-FP16-LABEL: name: v4s32
109+
; NO-FP16: liveins: $q0, $q1
110+
; NO-FP16-NEXT: {{ $}}
111+
; NO-FP16-NEXT: %a:_(<4 x s32>) = COPY $q0
112+
; NO-FP16-NEXT: %b:_(<4 x s32>) = COPY $q1
113+
; NO-FP16-NEXT: %minimum:_(<4 x s32>) = G_FMINIMUM %a, %b
114+
; NO-FP16-NEXT: $q0 = COPY %minimum(<4 x s32>)
115+
; NO-FP16-NEXT: RET_ReallyLR implicit $q0
116+
%a:_(<4 x s32>) = COPY $q0
117+
%b:_(<4 x s32>) = COPY $q1
118+
%minimum:_(<4 x s32>) = G_FMINIMUM %a, %b
119+
$q0 = COPY %minimum(<4 x s32>)
120+
RET_ReallyLR implicit $q0
121+
122+
...
123+
124+
---
125+
name: v8s32
126+
alignment: 4
127+
body: |
128+
bb.0:
129+
liveins: $q0, $q1, $q2, $q3
130+
; FP16-LABEL: name: v8s32
131+
; FP16: liveins: $q0, $q1, $q2, $q3
132+
; FP16-NEXT: {{ $}}
133+
; FP16-NEXT: %a:_(<4 x s32>) = COPY $q0
134+
; FP16-NEXT: %b:_(<4 x s32>) = COPY $q1
135+
; FP16-NEXT: %c:_(<4 x s32>) = COPY $q2
136+
; FP16-NEXT: %d:_(<4 x s32>) = COPY $q3
137+
; FP16-NEXT: [[FMINIMUM:%[0-9]+]]:_(<4 x s32>) = G_FMINIMUM %a, %c
138+
; FP16-NEXT: [[FMINIMUM1:%[0-9]+]]:_(<4 x s32>) = G_FMINIMUM %b, %d
139+
; FP16-NEXT: $q0 = COPY [[FMINIMUM]](<4 x s32>)
140+
; FP16-NEXT: $q1 = COPY [[FMINIMUM1]](<4 x s32>)
141+
; FP16-NEXT: RET_ReallyLR implicit $q0
142+
; NO-FP16-LABEL: name: v8s32
143+
; NO-FP16: liveins: $q0, $q1, $q2, $q3
144+
; NO-FP16-NEXT: {{ $}}
145+
; NO-FP16-NEXT: %a:_(<4 x s32>) = COPY $q0
146+
; NO-FP16-NEXT: %b:_(<4 x s32>) = COPY $q1
147+
; NO-FP16-NEXT: %c:_(<4 x s32>) = COPY $q2
148+
; NO-FP16-NEXT: %d:_(<4 x s32>) = COPY $q3
149+
; NO-FP16-NEXT: [[FMINIMUM:%[0-9]+]]:_(<4 x s32>) = G_FMINIMUM %a, %c
150+
; NO-FP16-NEXT: [[FMINIMUM1:%[0-9]+]]:_(<4 x s32>) = G_FMINIMUM %b, %d
151+
; NO-FP16-NEXT: $q0 = COPY [[FMINIMUM]](<4 x s32>)
152+
; NO-FP16-NEXT: $q1 = COPY [[FMINIMUM1]](<4 x s32>)
153+
; NO-FP16-NEXT: RET_ReallyLR implicit $q0
154+
%a:_(<4 x s32>) = COPY $q0
155+
%b:_(<4 x s32>) = COPY $q1
156+
%c:_(<4 x s32>) = COPY $q2
157+
%d:_(<4 x s32>) = COPY $q3
158+
%v1:_(<8 x s32>) = G_CONCAT_VECTORS %a, %b
159+
%v2:_(<8 x s32>) = G_CONCAT_VECTORS %c, %d
160+
%minimum:_(<8 x s32>) = G_FMINIMUM %v1, %v2
161+
%uv1:_(<4 x s32>), %uv2:_(<4 x s32>) = G_UNMERGE_VALUES %minimum
162+
$q0 = COPY %uv1(<4 x s32>)
163+
$q1 = COPY %uv2(<4 x s32>)
164+
RET_ReallyLR implicit $q0
165+
166+
...

llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -510,11 +510,11 @@
510510
# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
511511
# DEBUG-NEXT: G_FMINIMUM (opcode {{[0-9]+}}): 1 type index
512512
# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
513-
# DEBUG-NEXT: .. the first uncovered type index: 1, OK
514-
# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
513+
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
514+
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
515515
# DEBUG-NEXT: G_FMAXIMUM (opcode {{[0-9]+}}): 1 type index
516-
# DEBUG-NEXT: .. the first uncovered type index: 1, OK
517-
# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
516+
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
517+
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
518518
# DEBUG-NEXT: G_PTR_ADD (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
519519
# DEBUG-NEXT: .. the first uncovered type index: 2, OK
520520
# DEBUG-NEXT: .. the first uncovered imm index: 0, OK

0 commit comments

Comments
 (0)