Skip to content

Commit 3e7e23d

Browse files
authored
clang/HIP: Add tests that shows fpmath metadata ends up on sqrt calls (llvm#136413)
Make sure the builtin header sqrts work with -fno-hip-f32-correctly-rounded-divide-sqrt, and we end up with properly annotated sqrt intrinsic callsites.
1 parent 1143a04 commit 3e7e23d

File tree

2 files changed

+1482
-1
lines changed

2 files changed

+1482
-1
lines changed

clang/test/Headers/__clang_hip_cmath.hip

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
// RUN: -internal-isystem %S/Inputs/include \
77
// RUN: -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown \
88
// RUN: -target-cpu gfx906 -emit-llvm %s -fcuda-is-device -O1 -o - \
9-
// RUN: -D__HIPCC_RTC__ | FileCheck -check-prefix=DEFAULT %s
9+
// RUN: -D__HIPCC_RTC__ | FileCheck -check-prefixes=DEFAULT,CORRECT-DIV-SQRT %s
1010

1111
// Check that we end up with fast math flags set on intrinsic calls
1212
// RUN: %clang_cc1 -include __clang_hip_runtime_wrapper.h \
@@ -17,6 +17,15 @@
1717
// RUN: -menable-no-nans -o - \
1818
// RUN: -D__HIPCC_RTC__ | FileCheck -check-prefix=FINITEONLY %s
1919

20+
// Check that we end up with fpmath metadata set on sqrt calls
21+
// RUN: %clang_cc1 -include __clang_hip_runtime_wrapper.h \
22+
// RUN: -internal-isystem %S/../../lib/Headers/cuda_wrappers \
23+
// RUN: -internal-isystem %S/Inputs/include \
24+
// RUN: -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown \
25+
// RUN: -target-cpu gfx906 -emit-llvm %s -fcuda-is-device -O1 \
26+
// RUN: -fno-hip-fp32-correctly-rounded-divide-sqrt -o - \
27+
// RUN: -D__HIPCC_RTC__ | FileCheck -check-prefixes=DEFAULT,NO-CORRECT-DIV-SQRT %s
28+
2029
// DEFAULT-LABEL: @test_fma_f16(
2130
// DEFAULT-NEXT: entry:
2231
// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef half @llvm.fma.f16(half [[X:%.*]], half [[Y:%.*]], half [[Z:%.*]])
@@ -142,3 +151,36 @@ namespace user_namespace {
142151
fma(a, b, b);
143152
}
144153
}
154+
155+
// CORRECT-DIV-SQRT-LABEL: @test_sqrt_f32(
156+
// CORRECT-DIV-SQRT-NEXT: entry:
157+
// CORRECT-DIV-SQRT-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.sqrt.f32(float [[X:%.*]])
158+
// CORRECT-DIV-SQRT-NEXT: ret float [[TMP0]]
159+
//
160+
// FINITEONLY-LABEL: @test_sqrt_f32(
161+
// FINITEONLY-NEXT: entry:
162+
// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.sqrt.f32(float nofpclass(nan inf) [[X:%.*]])
163+
// FINITEONLY-NEXT: ret float [[TMP0]]
164+
//
165+
// NO-CORRECT-DIV-SQRT-LABEL: @test_sqrt_f32(
166+
// NO-CORRECT-DIV-SQRT-NEXT: entry:
167+
// NO-CORRECT-DIV-SQRT-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.sqrt.f32(float [[X:%.*]]), !fpmath [[META4:![0-9]+]]
168+
// NO-CORRECT-DIV-SQRT-NEXT: ret float [[TMP0]]
169+
//
170+
extern "C" __device__ float test_sqrt_f32(float x) {
171+
return sqrt(x);
172+
}
173+
174+
// DEFAULT-LABEL: @test_sqrt_f64(
175+
// DEFAULT-NEXT: entry:
176+
// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.sqrt.f64(double [[X:%.*]])
177+
// DEFAULT-NEXT: ret double [[TMP0]]
178+
//
179+
// FINITEONLY-LABEL: @test_sqrt_f64(
180+
// FINITEONLY-NEXT: entry:
181+
// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.sqrt.f64(double nofpclass(nan inf) [[X:%.*]])
182+
// FINITEONLY-NEXT: ret double [[TMP0]]
183+
//
184+
extern "C" __device__ double test_sqrt_f64(double x) {
185+
return sqrt(x);
186+
}

0 commit comments

Comments
 (0)