Skip to content

Commit 6676027

Browse files
committed
[RISCV] Implement support for bf16 select when zfbfmin is enabled
These test cases previously caused an error. RISCVInstrInfo::copyPhysReg also needed a tweak in order to account for copying bf16 values in FPR16 registers. Differential Revision: https://reviews.llvm.org/D156883
1 parent 2e65a42 commit 6676027

File tree

4 files changed

+538
-2
lines changed

4 files changed

+538
-2
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -376,6 +376,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
376376
setOperationAction(ISD::FP_EXTEND, MVT::f32, Custom);
377377
setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom);
378378
setOperationAction(ISD::ConstantFP, MVT::bf16, Expand);
379+
setOperationAction(ISD::SELECT_CC, MVT::bf16, Expand);
380+
setOperationAction(ISD::SELECT, MVT::bf16, Promote);
381+
setOperationAction(ISD::SETCC, MVT::bf16, Promote);
379382
}
380383

381384
if (Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin()) {

llvm/lib/Target/RISCV/RISCVInstrInfo.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -330,9 +330,10 @@ void RISCVInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
330330
if (STI.hasStdExtZfh()) {
331331
Opc = RISCV::FSGNJ_H;
332332
} else {
333-
assert(STI.hasStdExtF() && STI.hasStdExtZfhmin() &&
333+
assert(STI.hasStdExtF() &&
334+
(STI.hasStdExtZfhmin() || STI.hasStdExtZfbfmin()) &&
334335
"Unexpected extensions");
335-
// Zfhmin subset doesn't have FSGNJ_H, replaces FSGNJ_H with FSGNJ_S.
336+
// Zfhmin/Zfbfmin doesn't have FSGNJ_H, replace FSGNJ_H with FSGNJ_S.
336337
DstReg = TRI->getMatchingSuperReg(DstReg, RISCV::sub_16,
337338
&RISCV::FPR32RegClass);
338339
SrcReg = TRI->getMatchingSuperReg(SrcReg, RISCV::sub_16,
Lines changed: 327 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,327 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2+
; RUN: llc -mtriple=riscv32 -mattr=+experimental-zfbfmin -verify-machineinstrs \
3+
; RUN: -target-abi ilp32f < %s | FileCheck %s
4+
; RUN: llc -mtriple=riscv64 -mattr=+experimental-zfbfmin -verify-machineinstrs \
5+
; RUN: -target-abi lp64f < %s | FileCheck %s
6+
7+
define bfloat @select_fcmp_false(bfloat %a, bfloat %b) nounwind {
8+
; CHECK-LABEL: select_fcmp_false:
9+
; CHECK: # %bb.0:
10+
; CHECK-NEXT: fmv.s fa0, fa1
11+
; CHECK-NEXT: ret
12+
%1 = fcmp false bfloat %a, %b
13+
%2 = select i1 %1, bfloat %a, bfloat %b
14+
ret bfloat %2
15+
}
16+
17+
define bfloat @select_fcmp_oeq(bfloat %a, bfloat %b) nounwind {
18+
; CHECK-LABEL: select_fcmp_oeq:
19+
; CHECK: # %bb.0:
20+
; CHECK-NEXT: fcvt.s.bf16 fa4, fa1
21+
; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
22+
; CHECK-NEXT: feq.s a0, fa5, fa4
23+
; CHECK-NEXT: bnez a0, .LBB1_2
24+
; CHECK-NEXT: # %bb.1:
25+
; CHECK-NEXT: fmv.s fa5, fa4
26+
; CHECK-NEXT: .LBB1_2:
27+
; CHECK-NEXT: fcvt.bf16.s fa0, fa5
28+
; CHECK-NEXT: ret
29+
%1 = fcmp oeq bfloat %a, %b
30+
%2 = select i1 %1, bfloat %a, bfloat %b
31+
ret bfloat %2
32+
}
33+
34+
define bfloat @select_fcmp_ogt(bfloat %a, bfloat %b) nounwind {
35+
; CHECK-LABEL: select_fcmp_ogt:
36+
; CHECK: # %bb.0:
37+
; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
38+
; CHECK-NEXT: fcvt.s.bf16 fa4, fa1
39+
; CHECK-NEXT: flt.s a0, fa4, fa5
40+
; CHECK-NEXT: bnez a0, .LBB2_2
41+
; CHECK-NEXT: # %bb.1:
42+
; CHECK-NEXT: fmv.s fa5, fa4
43+
; CHECK-NEXT: .LBB2_2:
44+
; CHECK-NEXT: fcvt.bf16.s fa0, fa5
45+
; CHECK-NEXT: ret
46+
%1 = fcmp ogt bfloat %a, %b
47+
%2 = select i1 %1, bfloat %a, bfloat %b
48+
ret bfloat %2
49+
}
50+
51+
define bfloat @select_fcmp_oge(bfloat %a, bfloat %b) nounwind {
52+
; CHECK-LABEL: select_fcmp_oge:
53+
; CHECK: # %bb.0:
54+
; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
55+
; CHECK-NEXT: fcvt.s.bf16 fa4, fa1
56+
; CHECK-NEXT: fle.s a0, fa4, fa5
57+
; CHECK-NEXT: bnez a0, .LBB3_2
58+
; CHECK-NEXT: # %bb.1:
59+
; CHECK-NEXT: fmv.s fa5, fa4
60+
; CHECK-NEXT: .LBB3_2:
61+
; CHECK-NEXT: fcvt.bf16.s fa0, fa5
62+
; CHECK-NEXT: ret
63+
%1 = fcmp oge bfloat %a, %b
64+
%2 = select i1 %1, bfloat %a, bfloat %b
65+
ret bfloat %2
66+
}
67+
68+
define bfloat @select_fcmp_olt(bfloat %a, bfloat %b) nounwind {
69+
; CHECK-LABEL: select_fcmp_olt:
70+
; CHECK: # %bb.0:
71+
; CHECK-NEXT: fcvt.s.bf16 fa4, fa1
72+
; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
73+
; CHECK-NEXT: flt.s a0, fa5, fa4
74+
; CHECK-NEXT: bnez a0, .LBB4_2
75+
; CHECK-NEXT: # %bb.1:
76+
; CHECK-NEXT: fmv.s fa5, fa4
77+
; CHECK-NEXT: .LBB4_2:
78+
; CHECK-NEXT: fcvt.bf16.s fa0, fa5
79+
; CHECK-NEXT: ret
80+
%1 = fcmp olt bfloat %a, %b
81+
%2 = select i1 %1, bfloat %a, bfloat %b
82+
ret bfloat %2
83+
}
84+
85+
define bfloat @select_fcmp_ole(bfloat %a, bfloat %b) nounwind {
86+
; CHECK-LABEL: select_fcmp_ole:
87+
; CHECK: # %bb.0:
88+
; CHECK-NEXT: fcvt.s.bf16 fa4, fa1
89+
; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
90+
; CHECK-NEXT: fle.s a0, fa5, fa4
91+
; CHECK-NEXT: bnez a0, .LBB5_2
92+
; CHECK-NEXT: # %bb.1:
93+
; CHECK-NEXT: fmv.s fa5, fa4
94+
; CHECK-NEXT: .LBB5_2:
95+
; CHECK-NEXT: fcvt.bf16.s fa0, fa5
96+
; CHECK-NEXT: ret
97+
%1 = fcmp ole bfloat %a, %b
98+
%2 = select i1 %1, bfloat %a, bfloat %b
99+
ret bfloat %2
100+
}
101+
102+
define bfloat @select_fcmp_one(bfloat %a, bfloat %b) nounwind {
103+
; CHECK-LABEL: select_fcmp_one:
104+
; CHECK: # %bb.0:
105+
; CHECK-NEXT: fcvt.s.bf16 fa4, fa1
106+
; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
107+
; CHECK-NEXT: flt.s a0, fa5, fa4
108+
; CHECK-NEXT: flt.s a1, fa4, fa5
109+
; CHECK-NEXT: or a0, a1, a0
110+
; CHECK-NEXT: bnez a0, .LBB6_2
111+
; CHECK-NEXT: # %bb.1:
112+
; CHECK-NEXT: fmv.s fa5, fa4
113+
; CHECK-NEXT: .LBB6_2:
114+
; CHECK-NEXT: fcvt.bf16.s fa0, fa5
115+
; CHECK-NEXT: ret
116+
%1 = fcmp one bfloat %a, %b
117+
%2 = select i1 %1, bfloat %a, bfloat %b
118+
ret bfloat %2
119+
}
120+
121+
define bfloat @select_fcmp_ord(bfloat %a, bfloat %b) nounwind {
122+
; CHECK-LABEL: select_fcmp_ord:
123+
; CHECK: # %bb.0:
124+
; CHECK-NEXT: fcvt.s.bf16 fa5, fa1
125+
; CHECK-NEXT: feq.s a0, fa5, fa5
126+
; CHECK-NEXT: fcvt.s.bf16 fa4, fa0
127+
; CHECK-NEXT: feq.s a1, fa4, fa4
128+
; CHECK-NEXT: and a0, a1, a0
129+
; CHECK-NEXT: bnez a0, .LBB7_2
130+
; CHECK-NEXT: # %bb.1:
131+
; CHECK-NEXT: fmv.s fa4, fa5
132+
; CHECK-NEXT: .LBB7_2:
133+
; CHECK-NEXT: fcvt.bf16.s fa0, fa4
134+
; CHECK-NEXT: ret
135+
%1 = fcmp ord bfloat %a, %b
136+
%2 = select i1 %1, bfloat %a, bfloat %b
137+
ret bfloat %2
138+
}
139+
140+
define bfloat @select_fcmp_ueq(bfloat %a, bfloat %b) nounwind {
141+
; CHECK-LABEL: select_fcmp_ueq:
142+
; CHECK: # %bb.0:
143+
; CHECK-NEXT: fcvt.s.bf16 fa4, fa1
144+
; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
145+
; CHECK-NEXT: flt.s a0, fa5, fa4
146+
; CHECK-NEXT: flt.s a1, fa4, fa5
147+
; CHECK-NEXT: or a0, a1, a0
148+
; CHECK-NEXT: beqz a0, .LBB8_2
149+
; CHECK-NEXT: # %bb.1:
150+
; CHECK-NEXT: fmv.s fa5, fa4
151+
; CHECK-NEXT: .LBB8_2:
152+
; CHECK-NEXT: fcvt.bf16.s fa0, fa5
153+
; CHECK-NEXT: ret
154+
%1 = fcmp ueq bfloat %a, %b
155+
%2 = select i1 %1, bfloat %a, bfloat %b
156+
ret bfloat %2
157+
}
158+
159+
define bfloat @select_fcmp_ugt(bfloat %a, bfloat %b) nounwind {
160+
; CHECK-LABEL: select_fcmp_ugt:
161+
; CHECK: # %bb.0:
162+
; CHECK-NEXT: fcvt.s.bf16 fa4, fa1
163+
; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
164+
; CHECK-NEXT: fle.s a0, fa5, fa4
165+
; CHECK-NEXT: beqz a0, .LBB9_2
166+
; CHECK-NEXT: # %bb.1:
167+
; CHECK-NEXT: fmv.s fa5, fa4
168+
; CHECK-NEXT: .LBB9_2:
169+
; CHECK-NEXT: fcvt.bf16.s fa0, fa5
170+
; CHECK-NEXT: ret
171+
%1 = fcmp ugt bfloat %a, %b
172+
%2 = select i1 %1, bfloat %a, bfloat %b
173+
ret bfloat %2
174+
}
175+
176+
define bfloat @select_fcmp_uge(bfloat %a, bfloat %b) nounwind {
177+
; CHECK-LABEL: select_fcmp_uge:
178+
; CHECK: # %bb.0:
179+
; CHECK-NEXT: fcvt.s.bf16 fa4, fa1
180+
; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
181+
; CHECK-NEXT: flt.s a0, fa5, fa4
182+
; CHECK-NEXT: beqz a0, .LBB10_2
183+
; CHECK-NEXT: # %bb.1:
184+
; CHECK-NEXT: fmv.s fa5, fa4
185+
; CHECK-NEXT: .LBB10_2:
186+
; CHECK-NEXT: fcvt.bf16.s fa0, fa5
187+
; CHECK-NEXT: ret
188+
%1 = fcmp uge bfloat %a, %b
189+
%2 = select i1 %1, bfloat %a, bfloat %b
190+
ret bfloat %2
191+
}
192+
193+
define bfloat @select_fcmp_ult(bfloat %a, bfloat %b) nounwind {
194+
; CHECK-LABEL: select_fcmp_ult:
195+
; CHECK: # %bb.0:
196+
; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
197+
; CHECK-NEXT: fcvt.s.bf16 fa4, fa1
198+
; CHECK-NEXT: fle.s a0, fa4, fa5
199+
; CHECK-NEXT: beqz a0, .LBB11_2
200+
; CHECK-NEXT: # %bb.1:
201+
; CHECK-NEXT: fmv.s fa5, fa4
202+
; CHECK-NEXT: .LBB11_2:
203+
; CHECK-NEXT: fcvt.bf16.s fa0, fa5
204+
; CHECK-NEXT: ret
205+
%1 = fcmp ult bfloat %a, %b
206+
%2 = select i1 %1, bfloat %a, bfloat %b
207+
ret bfloat %2
208+
}
209+
210+
define bfloat @select_fcmp_ule(bfloat %a, bfloat %b) nounwind {
211+
; CHECK-LABEL: select_fcmp_ule:
212+
; CHECK: # %bb.0:
213+
; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
214+
; CHECK-NEXT: fcvt.s.bf16 fa4, fa1
215+
; CHECK-NEXT: flt.s a0, fa4, fa5
216+
; CHECK-NEXT: beqz a0, .LBB12_2
217+
; CHECK-NEXT: # %bb.1:
218+
; CHECK-NEXT: fmv.s fa5, fa4
219+
; CHECK-NEXT: .LBB12_2:
220+
; CHECK-NEXT: fcvt.bf16.s fa0, fa5
221+
; CHECK-NEXT: ret
222+
%1 = fcmp ule bfloat %a, %b
223+
%2 = select i1 %1, bfloat %a, bfloat %b
224+
ret bfloat %2
225+
}
226+
227+
define bfloat @select_fcmp_une(bfloat %a, bfloat %b) nounwind {
228+
; CHECK-LABEL: select_fcmp_une:
229+
; CHECK: # %bb.0:
230+
; CHECK-NEXT: fcvt.s.bf16 fa4, fa1
231+
; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
232+
; CHECK-NEXT: feq.s a0, fa5, fa4
233+
; CHECK-NEXT: beqz a0, .LBB13_2
234+
; CHECK-NEXT: # %bb.1:
235+
; CHECK-NEXT: fmv.s fa5, fa4
236+
; CHECK-NEXT: .LBB13_2:
237+
; CHECK-NEXT: fcvt.bf16.s fa0, fa5
238+
; CHECK-NEXT: ret
239+
%1 = fcmp une bfloat %a, %b
240+
%2 = select i1 %1, bfloat %a, bfloat %b
241+
ret bfloat %2
242+
}
243+
244+
define bfloat @select_fcmp_uno(bfloat %a, bfloat %b) nounwind {
245+
; CHECK-LABEL: select_fcmp_uno:
246+
; CHECK: # %bb.0:
247+
; CHECK-NEXT: fcvt.s.bf16 fa5, fa1
248+
; CHECK-NEXT: feq.s a0, fa5, fa5
249+
; CHECK-NEXT: fcvt.s.bf16 fa4, fa0
250+
; CHECK-NEXT: feq.s a1, fa4, fa4
251+
; CHECK-NEXT: and a0, a1, a0
252+
; CHECK-NEXT: beqz a0, .LBB14_2
253+
; CHECK-NEXT: # %bb.1:
254+
; CHECK-NEXT: fmv.s fa4, fa5
255+
; CHECK-NEXT: .LBB14_2:
256+
; CHECK-NEXT: fcvt.bf16.s fa0, fa4
257+
; CHECK-NEXT: ret
258+
%1 = fcmp uno bfloat %a, %b
259+
%2 = select i1 %1, bfloat %a, bfloat %b
260+
ret bfloat %2
261+
}
262+
263+
define bfloat @select_fcmp_true(bfloat %a, bfloat %b) nounwind {
264+
; CHECK-LABEL: select_fcmp_true:
265+
; CHECK: # %bb.0:
266+
; CHECK-NEXT: ret
267+
%1 = fcmp true bfloat %a, %b
268+
%2 = select i1 %1, bfloat %a, bfloat %b
269+
ret bfloat %2
270+
}
271+
272+
; Ensure that ISel succeeds for a select+fcmp that has an i32 result type.
273+
define i32 @i32_select_fcmp_oeq(bfloat %a, bfloat %b, i32 %c, i32 %d) nounwind {
274+
; CHECK-LABEL: i32_select_fcmp_oeq:
275+
; CHECK: # %bb.0:
276+
; CHECK-NEXT: fcvt.s.bf16 fa5, fa1
277+
; CHECK-NEXT: fcvt.s.bf16 fa4, fa0
278+
; CHECK-NEXT: feq.s a2, fa4, fa5
279+
; CHECK-NEXT: bnez a2, .LBB16_2
280+
; CHECK-NEXT: # %bb.1:
281+
; CHECK-NEXT: mv a0, a1
282+
; CHECK-NEXT: .LBB16_2:
283+
; CHECK-NEXT: ret
284+
%1 = fcmp oeq bfloat %a, %b
285+
%2 = select i1 %1, i32 %c, i32 %d
286+
ret i32 %2
287+
}
288+
289+
define i32 @select_fcmp_oeq_1_2(bfloat %a, bfloat %b) {
290+
; CHECK-LABEL: select_fcmp_oeq_1_2:
291+
; CHECK: # %bb.0:
292+
; CHECK-NEXT: fcvt.s.bf16 fa5, fa1
293+
; CHECK-NEXT: fcvt.s.bf16 fa4, fa0
294+
; CHECK-NEXT: feq.s a0, fa4, fa5
295+
; CHECK-NEXT: li a1, 2
296+
; CHECK-NEXT: sub a0, a1, a0
297+
; CHECK-NEXT: ret
298+
%1 = fcmp fast oeq bfloat %a, %b
299+
%2 = select i1 %1, i32 1, i32 2
300+
ret i32 %2
301+
}
302+
303+
define signext i32 @select_fcmp_uge_negone_zero(bfloat %a, bfloat %b) nounwind {
304+
; CHECK-LABEL: select_fcmp_uge_negone_zero:
305+
; CHECK: # %bb.0:
306+
; CHECK-NEXT: fcvt.s.bf16 fa5, fa1
307+
; CHECK-NEXT: fcvt.s.bf16 fa4, fa0
308+
; CHECK-NEXT: fle.s a0, fa4, fa5
309+
; CHECK-NEXT: addi a0, a0, -1
310+
; CHECK-NEXT: ret
311+
%1 = fcmp ugt bfloat %a, %b
312+
%2 = select i1 %1, i32 -1, i32 0
313+
ret i32 %2
314+
}
315+
316+
define signext i32 @select_fcmp_uge_1_2(bfloat %a, bfloat %b) nounwind {
317+
; CHECK-LABEL: select_fcmp_uge_1_2:
318+
; CHECK: # %bb.0:
319+
; CHECK-NEXT: fcvt.s.bf16 fa5, fa1
320+
; CHECK-NEXT: fcvt.s.bf16 fa4, fa0
321+
; CHECK-NEXT: fle.s a0, fa4, fa5
322+
; CHECK-NEXT: addi a0, a0, 1
323+
; CHECK-NEXT: ret
324+
%1 = fcmp ugt bfloat %a, %b
325+
%2 = select i1 %1, i32 1, i32 2
326+
ret i32 %2
327+
}

0 commit comments

Comments
 (0)