|
6 | 6 | // RUN: -internal-isystem %S/Inputs/include \
|
7 | 7 | // RUN: -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown \
|
8 | 8 | // RUN: -target-cpu gfx906 -emit-llvm %s -fcuda-is-device -O1 -o - \
|
9 |
| -// RUN: -D__HIPCC_RTC__ | FileCheck -check-prefix=DEFAULT %s |
| 9 | +// RUN: -D__HIPCC_RTC__ | FileCheck -check-prefixes=DEFAULT,CORRECT-DIV-SQRT %s |
10 | 10 |
|
11 | 11 | // Check that we end up with fast math flags set on intrinsic calls
|
12 | 12 | // RUN: %clang_cc1 -include __clang_hip_runtime_wrapper.h \
|
|
17 | 17 | // RUN: -menable-no-nans -o - \
|
18 | 18 | // RUN: -D__HIPCC_RTC__ | FileCheck -check-prefix=FINITEONLY %s
|
19 | 19 |
|
| 20 | +// Check that we end up with fpmath metadata set on sqrt calls |
| 21 | +// RUN: %clang_cc1 -include __clang_hip_runtime_wrapper.h \ |
| 22 | +// RUN: -internal-isystem %S/../../lib/Headers/cuda_wrappers \ |
| 23 | +// RUN: -internal-isystem %S/Inputs/include \ |
| 24 | +// RUN: -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown \ |
| 25 | +// RUN: -target-cpu gfx906 -emit-llvm %s -fcuda-is-device -O1 \ |
| 26 | +// RUN: -fno-hip-fp32-correctly-rounded-divide-sqrt -o - \ |
| 27 | +// RUN: -D__HIPCC_RTC__ | FileCheck -check-prefixes=DEFAULT,NO-CORRECT-DIV-SQRT %s |
| 28 | + |
20 | 29 | // DEFAULT-LABEL: @test_fma_f16(
|
21 | 30 | // DEFAULT-NEXT: entry:
|
22 | 31 | // DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef half @llvm.fma.f16(half [[X:%.*]], half [[Y:%.*]], half [[Z:%.*]])
|
@@ -142,3 +151,36 @@ namespace user_namespace {
|
142 | 151 | fma(a, b, b);
|
143 | 152 | }
|
144 | 153 | }
|
| 154 | + |
| 155 | +// CORRECT-DIV-SQRT-LABEL: @test_sqrt_f32( |
| 156 | +// CORRECT-DIV-SQRT-NEXT: entry: |
| 157 | +// CORRECT-DIV-SQRT-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.sqrt.f32(float [[X:%.*]]) |
| 158 | +// CORRECT-DIV-SQRT-NEXT: ret float [[TMP0]] |
| 159 | +// |
| 160 | +// FINITEONLY-LABEL: @test_sqrt_f32( |
| 161 | +// FINITEONLY-NEXT: entry: |
| 162 | +// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.sqrt.f32(float nofpclass(nan inf) [[X:%.*]]) |
| 163 | +// FINITEONLY-NEXT: ret float [[TMP0]] |
| 164 | +// |
| 165 | +// NO-CORRECT-DIV-SQRT-LABEL: @test_sqrt_f32( |
| 166 | +// NO-CORRECT-DIV-SQRT-NEXT: entry: |
| 167 | +// NO-CORRECT-DIV-SQRT-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.sqrt.f32(float [[X:%.*]]), !fpmath [[META4:![0-9]+]] |
| 168 | +// NO-CORRECT-DIV-SQRT-NEXT: ret float [[TMP0]] |
| 169 | +// |
| 170 | +extern "C" __device__ float test_sqrt_f32(float x) { |
| 171 | + return sqrt(x); |
| 172 | +} |
| 173 | + |
| 174 | +// DEFAULT-LABEL: @test_sqrt_f64( |
| 175 | +// DEFAULT-NEXT: entry: |
| 176 | +// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.sqrt.f64(double [[X:%.*]]) |
| 177 | +// DEFAULT-NEXT: ret double [[TMP0]] |
| 178 | +// |
| 179 | +// FINITEONLY-LABEL: @test_sqrt_f64( |
| 180 | +// FINITEONLY-NEXT: entry: |
| 181 | +// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.sqrt.f64(double nofpclass(nan inf) [[X:%.*]]) |
| 182 | +// FINITEONLY-NEXT: ret double [[TMP0]] |
| 183 | +// |
| 184 | +extern "C" __device__ double test_sqrt_f64(double x) { |
| 185 | + return sqrt(x); |
| 186 | +} |
0 commit comments