30
30
// RUN: | FileCheck %s -allow-deprecated-dag-overlap \
31
31
// RUN: --check-prefixes=ALL,LNX,RDC,CUDA,CUDARDC,CUDA-NEW
32
32
// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -std=c++17 \
33
+ // RUN: -target-sdk-version=9.2 -fcuda-include-gpubinary %t -o - \
34
+ // RUN: | FileCheck %s -allow-deprecated-dag-overlap \
35
+ // RUN: --check-prefixes=ALL,LNX,NORDC,CUDA,CUDANORDC,CUDA-NEW,LNX_17,NORDC17
36
+ // RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -std=c++17 \
33
37
// RUN: -target-sdk-version=9.2 -fgpu-rdc -fcuda-include-gpubinary %t -o - \
34
38
// RUN: | FileCheck %s -allow-deprecated-dag-overlap \
35
- // RUN: --check-prefixes=ALL,LNX,RDC,CUDA,CUDARDC,CUDA-NEW,LNX_17
39
+ // RUN: --check-prefixes=ALL,LNX,RDC,CUDA,CUDARDC,CUDA-NEW,LNX_17,RDC17
36
40
// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
37
41
// RUN: -target-sdk-version=9.2 -o - \
38
42
// RUN: | FileCheck -allow-deprecated-dag-overlap %s -check-prefix=NOGPUBIN
45
49
// RUN: | FileCheck -allow-deprecated-dag-overlap %s -check-prefixes=NOGLOBALS,HIPNOGLOBALS
46
50
// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
47
51
// RUN: -fgpu-rdc -fcuda-include-gpubinary %t -o - -x hip \
48
- // RUN: | FileCheck -allow-deprecated-dag-overlap %s --check-prefixes=ALL,LNX,NORDC ,HIP,HIPEF
52
+ // RUN: | FileCheck -allow-deprecated-dag-overlap %s --check-prefixes=ALL,LNX,RDC ,HIP,HIPEF
49
53
// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -o - -x hip\
50
54
// RUN: | FileCheck -allow-deprecated-dag-overlap %s -check-prefixes=ALL,LNX,NORDC,HIP,HIPNEF
51
55
56
60
#include " Inputs/cuda.h"
57
61
58
62
#ifndef NOGLOBALS
59
- // LNX-DAG: @device_var = internal global i32
63
+ // NORDC-DAG: @device_var = internal global i32
64
+ // RDC-DAG: @device_var = dso_local global i32
60
65
// WIN-DAG: @"?device_var@@3HA" = internal global i32
61
66
__device__ int device_var;
62
67
63
- // LNX-DAG: @constant_var = internal global i32
68
+ // NORDC-DAG: @constant_var = internal global i32
69
+ // RDC-DAG: @constant_var = dso_local global i32
64
70
// WIN-DAG: @"?constant_var@@3HA" = internal global i32
65
71
__constant__ int constant_var;
66
72
67
- // LNX-DAG: @shared_var = internal global i32
73
+ // NORDC-DAG: @shared_var = internal global i32
74
+ // RDC-DAG: @shared_var = dso_local global i32
68
75
// WIN-DAG: @"?shared_var@@3HA" = internal global i32
69
76
__shared__ int shared_var;
70
77
@@ -87,18 +94,21 @@ extern __constant__ int ext_constant_var;
87
94
88
95
// external device-side variables with definitions should generate
89
96
// definitions for the shadows.
90
- // LNX-DAG: @ext_device_var_def = internal global i32 undef,
97
+ // NORDC-DAG: @ext_device_var_def = internal global i32 undef,
98
+ // RDC-DAG: @ext_device_var_def = dso_local global i32 undef,
91
99
// WIN-DAG: @"?ext_device_var_def@@3HA" = internal global i32 undef
92
100
extern __device__ int ext_device_var_def;
93
101
__device__ int ext_device_var_def = 1 ;
94
- // LNX-DAG: @ext_device_var_def = internal global i32 undef,
102
+ // NORDC-DAG: @ext_device_var_def = internal global i32 undef,
103
+ // RDC-DAG: @ext_device_var_def = dso_local global i32 undef,
95
104
// WIN-DAG: @"?ext_constant_var_def@@3HA" = internal global i32 undef
96
105
__constant__ int ext_constant_var_def = 2 ;
97
106
98
107
#if __cplusplus > 201402L
99
- // / FIXME: Reject __device__ constexpr and inline variables in Sema.
100
- // LNX_17: @inline_var = internal global i32 undef, comdat, align 4{{$}}
101
- // LNX_17: @_ZN1C17member_inline_varE = internal constant i32 undef, comdat, align 4{{$}}
108
+ // NORDC17: @inline_var = internal global i32 undef, comdat, align 4{{$}}
109
+ // RDC17: @inline_var = linkonce_odr global i32 undef, comdat, align 4{{$}}
110
+ // NORDC17: @_ZN1C17member_inline_varE = internal constant i32 undef, comdat, align 4{{$}}
111
+ // RDC17: @_ZN1C17member_inline_varE = linkonce_odr constant i32 undef, comdat, align 4{{$}}
102
112
__device__ inline int inline_var = 3 ;
103
113
struct C {
104
114
__device__ static constexpr int member_inline_var = 4 ;
@@ -151,13 +161,13 @@ void use_pointers() {
151
161
// CUDANORDC: @__[[PREFIX]]_gpubin_handle = internal global i8** null
152
162
// HIPNEF: @__[[PREFIX]]_gpubin_handle = linkonce hidden global i8** null
153
163
// * constant unnamed string with NVModuleID
154
- // RDC : [[MODULE_ID_GLOBAL:@.*]] = private constant
164
+ // CUDARDC : [[MODULE_ID_GLOBAL:@.*]] = private constant
155
165
// CUDARDC-SAME: c"[[MODULE_ID:.+]]\00", section "__nv_module_id", align 32
156
166
// * Make sure our constructor was added to global ctor list.
157
167
// LNX: @llvm.global_ctors = appending global {{.*}}@__[[PREFIX]]_module_ctor
158
168
// * Alias to global symbol containing the NVModuleID.
159
- // RDC : @__fatbinwrap[[MODULE_ID]] ={{.*}} alias { i32, i32, i8*, i8* }
160
- // RDC -SAME: { i32, i32, i8*, i8* }* @__[[PREFIX]]_fatbin_wrapper
169
+ // CUDARDC : @__fatbinwrap[[MODULE_ID]] ={{.*}} alias { i32, i32, i8*, i8* }
170
+ // CUDARDC -SAME: { i32, i32, i8*, i8* }* @__[[PREFIX]]_fatbin_wrapper
161
171
162
172
// Test that we build the correct number of calls to cudaSetupArgument followed
163
173
// by a call to cudaLaunch.
@@ -214,25 +224,33 @@ void hostfunc(void) { kernelfunc<<<1, 1>>>(1, 1, 1); }
214
224
// HIP-NEXT: icmp eq i8** {{.*}}, null
215
225
// HIP-NEXT: br i1 {{.*}}, label %if, label %exit
216
226
// HIP: if:
217
- // NORDC: call{{.*}}[[PREFIX]]RegisterFatBinary{{.*}}__[[PREFIX]]_fatbin_wrapper
227
+ // CUDANORDC: call{{.*}}[[PREFIX]]RegisterFatBinary{{.*}}__[[PREFIX]]_fatbin_wrapper
228
+ // .. stores return value in __[[PREFIX]]_gpubin_handle
229
+ // CUDANORDC-NEXT: store{{.*}}__[[PREFIX]]_gpubin_handle
230
+ // .. and then calls __[[PREFIX]]_register_globals
231
+ // HIP: call{{.*}}[[PREFIX]]RegisterFatBinary{{.*}}__[[PREFIX]]_fatbin_wrapper
218
232
// .. stores return value in __[[PREFIX]]_gpubin_handle
219
- // NORDC -NEXT: store{{.*}}__[[PREFIX]]_gpubin_handle
233
+ // HIP -NEXT: store{{.*}}__[[PREFIX]]_gpubin_handle
220
234
// .. and then calls __[[PREFIX]]_register_globals
221
235
// HIP-NEXT: br label %exit
222
236
// HIP: exit:
223
237
// HIP-NEXT: load i8**, i8*** @__hip_gpubin_handle
224
- // NORDC-NEXT: call void @__[[PREFIX]]_register_globals
238
+ // CUDANORDC-NEXT: call void @__[[PREFIX]]_register_globals
239
+ // HIP-NEXT: call void @__[[PREFIX]]_register_globals
225
240
// * In separate mode we also register a destructor.
226
- // NORDC-NEXT: call i32 @atexit(void (i8*)* @__[[PREFIX]]_module_dtor)
241
+ // CUDANORDC-NEXT: call i32 @atexit(void (i8*)* @__[[PREFIX]]_module_dtor)
242
+ // HIP-NEXT: call i32 @atexit(void (i8*)* @__[[PREFIX]]_module_dtor)
227
243
228
244
// With relocatable device code we call __[[PREFIX]]RegisterLinkedBinary%NVModuleID%
229
- // RDC : call{{.*}}__[[PREFIX]]RegisterLinkedBinary[[MODULE_ID]](
230
- // RDC -SAME: __[[PREFIX]]_register_globals, {{.*}}__[[PREFIX]]_fatbin_wrapper
231
- // RDC -SAME: [[MODULE_ID_GLOBAL]]
245
+ // CUDARDC : call{{.*}}__[[PREFIX]]RegisterLinkedBinary[[MODULE_ID]](
246
+ // CUDARDC -SAME: __[[PREFIX]]_register_globals, {{.*}}__[[PREFIX]]_fatbin_wrapper
247
+ // CUDARDC -SAME: [[MODULE_ID_GLOBAL]]
232
248
233
249
// Test that we've created destructor.
234
- // NORDC: define internal void @__[[PREFIX]]_module_dtor
235
- // NORDC: load{{.*}}__[[PREFIX]]_gpubin_handle
250
+ // CUDANORDC: define internal void @__[[PREFIX]]_module_dtor
251
+ // HIP: define internal void @__[[PREFIX]]_module_dtor
252
+ // CUDANORDC: load{{.*}}__[[PREFIX]]_gpubin_handle
253
+ // HIP: load{{.*}}__[[PREFIX]]_gpubin_handle
236
254
// CUDANORDC-NEXT: call void @__[[PREFIX]]UnregisterFatBinary
237
255
// HIP-NEXT: icmp ne i8** {{.*}}, null
238
256
// HIP-NEXT: br i1 {{.*}}, label %if, label %exit
0 commit comments