File tree Expand file tree Collapse file tree 3 files changed +41
-4
lines changed Expand file tree Collapse file tree 3 files changed +41
-4
lines changed Original file line number Diff line number Diff line change @@ -261,6 +261,9 @@ class NVPTXTargetLowering : public TargetLowering {
261
261
return true ;
262
262
}
263
263
264
+ bool isFAbsFree (EVT VT) const override { return true ; }
265
+ bool isFNegFree (EVT VT) const override { return true ; }
266
+
264
267
private:
265
268
const NVPTXSubtarget &STI; // cache the subtarget here
266
269
SDValue getParamSymbol (SelectionDAG &DAG, int idx, EVT) const ;
Original file line number Diff line number Diff line change @@ -182,8 +182,8 @@ define <2 x bfloat> @test_fneg(<2 x bfloat> %a) #0 {
182
182
; CHECK-NEXT: .reg .b32 %r<3>;
183
183
; CHECK-EMPTY:
184
184
; CHECK-NEXT: // %bb.0:
185
- ; CHECK-NEXT: ld.param.u32 %r1, [test_fneg_param_0];
186
- ; CHECK-NEXT: xor.b32 %r2, %r1, -2147450880 ;
185
+ ; CHECK-NEXT: ld.param.b32 %r1, [test_fneg_param_0];
186
+ ; CHECK-NEXT: neg.bf16x2 %r2, %r1;
187
187
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
188
188
; CHECK-NEXT: ret;
189
189
%r = fneg <2 x bfloat> %a
@@ -532,8 +532,8 @@ define <2 x bfloat> @test_fabs(<2 x bfloat> %a) #0 {
532
532
; CHECK-NEXT: .reg .b32 %r<3>;
533
533
; CHECK-EMPTY:
534
534
; CHECK-NEXT: // %bb.0:
535
- ; CHECK-NEXT: ld.param.u32 %r1, [test_fabs_param_0];
536
- ; CHECK-NEXT: and.b32 %r2, %r1, 2147450879 ;
535
+ ; CHECK-NEXT: ld.param.b32 %r1, [test_fabs_param_0];
536
+ ; CHECK-NEXT: abs.bf16x2 %r2, %r1;
537
537
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
538
538
; CHECK-NEXT: ret;
539
539
%r = call <2 x bfloat> @llvm.fabs.f16 (<2 x bfloat> %a )
Original file line number Diff line number Diff line change
1
+ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2
+ ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_35 -verify-machineinstrs | FileCheck %s
3
+ ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_35 | %ptxas-verify %}
4
+ target triple = "nvptx64-nvidia-cuda"
5
+
6
+ define float @fabs_free (i32 %in ) {
7
+ ; CHECK-LABEL: fabs_free(
8
+ ; CHECK: {
9
+ ; CHECK-NEXT: .reg .f32 %f<3>;
10
+ ; CHECK-EMPTY:
11
+ ; CHECK-NEXT: // %bb.0:
12
+ ; CHECK-NEXT: ld.param.f32 %f1, [fabs_free_param_0];
13
+ ; CHECK-NEXT: abs.f32 %f2, %f1;
14
+ ; CHECK-NEXT: st.param.f32 [func_retval0], %f2;
15
+ ; CHECK-NEXT: ret;
16
+ %b = bitcast i32 %in to float
17
+ %f = call float @llvm.fabs.f32 (float %b )
18
+ ret float %f
19
+ }
20
+
21
+ define float @fneg_free (i32 %in ) {
22
+ ; CHECK-LABEL: fneg_free(
23
+ ; CHECK: {
24
+ ; CHECK-NEXT: .reg .f32 %f<3>;
25
+ ; CHECK-EMPTY:
26
+ ; CHECK-NEXT: // %bb.0:
27
+ ; CHECK-NEXT: ld.param.f32 %f1, [fneg_free_param_0];
28
+ ; CHECK-NEXT: neg.f32 %f2, %f1;
29
+ ; CHECK-NEXT: st.param.f32 [func_retval0], %f2;
30
+ ; CHECK-NEXT: ret;
31
+ %b = bitcast i32 %in to float
32
+ %f = fneg float %b
33
+ ret float %f
34
+ }
You can’t perform that action at this time.
0 commit comments