Skip to content

Commit b008ea3

Browse files
committed
[CUDA][HIP] Fix device variable linkage
For -fgpu-rdc, shadow variables should not be internalized, otherwise they cannot be accessed by other TUs. This is necessary because the shadow variable of external device variables are always emitted as undefined symbols, which need to resolve to a global symbols. Managed variables need to be emitted as undefined symbols in device compilations. Reviewed by: Artem Belevich Differential Revision: https://reviews.llvm.org/D95901
1 parent c981f6f commit b008ea3

File tree

6 files changed

+166
-44
lines changed

6 files changed

+166
-44
lines changed

clang/lib/AST/ASTContext.cpp

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11435,16 +11435,22 @@ operator<<(const StreamingDiagnostic &DB,
1143511435
}
1143611436

1143711437
bool ASTContext::mayExternalizeStaticVar(const Decl *D) const {
11438-
return !getLangOpts().GPURelocatableDeviceCode &&
11439-
((D->hasAttr<CUDADeviceAttr>() &&
11440-
!D->getAttr<CUDADeviceAttr>()->isImplicit()) ||
11441-
(D->hasAttr<CUDAConstantAttr>() &&
11442-
!D->getAttr<CUDAConstantAttr>()->isImplicit())) &&
11443-
isa<VarDecl>(D) && cast<VarDecl>(D)->isFileVarDecl() &&
11444-
cast<VarDecl>(D)->getStorageClass() == SC_Static;
11438+
bool IsStaticVar =
11439+
isa<VarDecl>(D) && cast<VarDecl>(D)->getStorageClass() == SC_Static;
11440+
bool IsExplicitDeviceVar = (D->hasAttr<CUDADeviceAttr>() &&
11441+
!D->getAttr<CUDADeviceAttr>()->isImplicit()) ||
11442+
(D->hasAttr<CUDAConstantAttr>() &&
11443+
!D->getAttr<CUDAConstantAttr>()->isImplicit());
11444+
// CUDA/HIP: static managed variables need to be externalized since it is
11445+
// a declaration in IR, therefore cannot have internal linkage.
11446+
// ToDo: externalize static variables for -fgpu-rdc.
11447+
return IsStaticVar &&
11448+
(D->hasAttr<HIPManagedAttr>() ||
11449+
(!getLangOpts().GPURelocatableDeviceCode && IsExplicitDeviceVar));
1144511450
}
1144611451

1144711452
bool ASTContext::shouldExternalizeStaticVar(const Decl *D) const {
1144811453
return mayExternalizeStaticVar(D) &&
11449-
CUDAStaticDeviceVarReferencedByHost.count(cast<VarDecl>(D));
11454+
(D->hasAttr<HIPManagedAttr>() ||
11455+
CUDAStaticDeviceVarReferencedByHost.count(cast<VarDecl>(D)));
1145011456
}

clang/lib/CodeGen/CGCUDANV.cpp

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -546,6 +546,8 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {
546546
/*Init=*/llvm::ConstantPointerNull::get(Var->getType()),
547547
Twine(Var->getName() + ".managed"), /*InsertBefore=*/nullptr,
548548
llvm::GlobalVariable::NotThreadLocal);
549+
ManagedVar->setDSOLocal(Var->isDSOLocal());
550+
ManagedVar->setVisibility(Var->getVisibility());
549551
replaceManagedVar(Var, ManagedVar);
550552
llvm::Value *Args[] = {
551553
&GpuBinaryHandlePtr,
@@ -932,11 +934,16 @@ CGCUDARuntime *CodeGen::CreateNVCUDARuntime(CodeGenModule &CGM) {
932934

933935
void CGNVCUDARuntime::internalizeDeviceSideVar(
934936
const VarDecl *D, llvm::GlobalValue::LinkageTypes &Linkage) {
935-
// Host-side shadows of external declarations of device-side
936-
// global variables become internal definitions. These have to
937-
// be internal in order to prevent name conflicts with global
938-
// host variables with the same name in a different TUs.
937+
// For -fno-gpu-rdc, host-side shadows of external declarations of device-side
938+
// global variables become internal definitions. These have to be internal in
939+
// order to prevent name conflicts with global host variables with the same
940+
// name in a different TUs.
939941
//
942+
// For -fgpu-rdc, the shadow variables should not be internalized because
943+
// they may be accessed by different TU.
944+
if (CGM.getLangOpts().GPURelocatableDeviceCode)
945+
return;
946+
940947
// __shared__ variables are odd. Shadows do get created, but
941948
// they are not registered with the CUDA runtime, so they
942949
// can't really be used to access their device-side

clang/lib/CodeGen/CodeGenModule.cpp

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4169,8 +4169,12 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D,
41694169
bool NeedsGlobalDtor =
41704170
D->needsDestruction(getContext()) == QualType::DK_cxx_destructor;
41714171

4172+
bool IsHIPManagedVarOnDevice =
4173+
getLangOpts().CUDAIsDevice && D->hasAttr<HIPManagedAttr>();
4174+
41724175
const VarDecl *InitDecl;
4173-
const Expr *InitExpr = D->getAnyInitializer(InitDecl);
4176+
const Expr *InitExpr =
4177+
IsHIPManagedVarOnDevice ? nullptr : D->getAnyInitializer(InitDecl);
41744178

41754179
Optional<ConstantEmitter> emitter;
41764180

@@ -4190,8 +4194,6 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D,
41904194
(D->getType()->isCUDADeviceBuiltinSurfaceType() ||
41914195
D->getType()->isCUDADeviceBuiltinTextureType() ||
41924196
D->hasAttr<HIPManagedAttr>());
4193-
// HIP pinned shadow of initialized host-side global variables are also
4194-
// left undefined.
41954197
if (getLangOpts().CUDA &&
41964198
(IsCUDASharedVar || IsCUDAShadowVar || IsCUDADeviceShadowVar))
41974199
Init = llvm::UndefValue::get(getTypes().ConvertType(ASTTy));
@@ -4302,7 +4304,10 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D,
43024304
}
43034305
}
43044306

4305-
GV->setInitializer(Init);
4307+
// HIP managed variables need to be emitted as declarations in device
4308+
// compilation.
4309+
if (!IsHIPManagedVarOnDevice)
4310+
GV->setInitializer(Init);
43064311
if (emitter)
43074312
emitter->finalize(GV);
43084313

clang/test/CodeGenCUDA/device-stub.cu

Lines changed: 40 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,13 @@
3030
// RUN: | FileCheck %s -allow-deprecated-dag-overlap \
3131
// RUN: --check-prefixes=ALL,LNX,RDC,CUDA,CUDARDC,CUDA-NEW
3232
// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -std=c++17 \
33+
// RUN: -target-sdk-version=9.2 -fcuda-include-gpubinary %t -o - \
34+
// RUN: | FileCheck %s -allow-deprecated-dag-overlap \
35+
// RUN: --check-prefixes=ALL,LNX,NORDC,CUDA,CUDANORDC,CUDA-NEW,LNX_17,NORDC17
36+
// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -std=c++17 \
3337
// RUN: -target-sdk-version=9.2 -fgpu-rdc -fcuda-include-gpubinary %t -o - \
3438
// RUN: | FileCheck %s -allow-deprecated-dag-overlap \
35-
// RUN: --check-prefixes=ALL,LNX,RDC,CUDA,CUDARDC,CUDA-NEW,LNX_17
39+
// RUN: --check-prefixes=ALL,LNX,RDC,CUDA,CUDARDC,CUDA-NEW,LNX_17,RDC17
3640
// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
3741
// RUN: -target-sdk-version=9.2 -o - \
3842
// RUN: | FileCheck -allow-deprecated-dag-overlap %s -check-prefix=NOGPUBIN
@@ -45,7 +49,7 @@
4549
// RUN: | FileCheck -allow-deprecated-dag-overlap %s -check-prefixes=NOGLOBALS,HIPNOGLOBALS
4650
// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
4751
// RUN: -fgpu-rdc -fcuda-include-gpubinary %t -o - -x hip \
48-
// RUN: | FileCheck -allow-deprecated-dag-overlap %s --check-prefixes=ALL,LNX,NORDC,HIP,HIPEF
52+
// RUN: | FileCheck -allow-deprecated-dag-overlap %s --check-prefixes=ALL,LNX,RDC,HIP,HIPEF
4953
// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -o - -x hip\
5054
// RUN: | FileCheck -allow-deprecated-dag-overlap %s -check-prefixes=ALL,LNX,NORDC,HIP,HIPNEF
5155

@@ -56,15 +60,18 @@
5660
#include "Inputs/cuda.h"
5761

5862
#ifndef NOGLOBALS
59-
// LNX-DAG: @device_var = internal global i32
63+
// NORDC-DAG: @device_var = internal global i32
64+
// RDC-DAG: @device_var = dso_local global i32
6065
// WIN-DAG: @"?device_var@@3HA" = internal global i32
6166
__device__ int device_var;
6267

63-
// LNX-DAG: @constant_var = internal global i32
68+
// NORDC-DAG: @constant_var = internal global i32
69+
// RDC-DAG: @constant_var = dso_local global i32
6470
// WIN-DAG: @"?constant_var@@3HA" = internal global i32
6571
__constant__ int constant_var;
6672

67-
// LNX-DAG: @shared_var = internal global i32
73+
// NORDC-DAG: @shared_var = internal global i32
74+
// RDC-DAG: @shared_var = dso_local global i32
6875
// WIN-DAG: @"?shared_var@@3HA" = internal global i32
6976
__shared__ int shared_var;
7077

@@ -87,18 +94,21 @@ extern __constant__ int ext_constant_var;
8794

8895
// external device-side variables with definitions should generate
8996
// definitions for the shadows.
90-
// LNX-DAG: @ext_device_var_def = internal global i32 undef,
97+
// NORDC-DAG: @ext_device_var_def = internal global i32 undef,
98+
// RDC-DAG: @ext_device_var_def = dso_local global i32 undef,
9199
// WIN-DAG: @"?ext_device_var_def@@3HA" = internal global i32 undef
92100
extern __device__ int ext_device_var_def;
93101
__device__ int ext_device_var_def = 1;
94-
// LNX-DAG: @ext_device_var_def = internal global i32 undef,
102+
// NORDC-DAG: @ext_device_var_def = internal global i32 undef,
103+
// RDC-DAG: @ext_device_var_def = dso_local global i32 undef,
95104
// WIN-DAG: @"?ext_constant_var_def@@3HA" = internal global i32 undef
96105
__constant__ int ext_constant_var_def = 2;
97106

98107
#if __cplusplus > 201402L
99-
/// FIXME: Reject __device__ constexpr and inline variables in Sema.
100-
// LNX_17: @inline_var = internal global i32 undef, comdat, align 4{{$}}
101-
// LNX_17: @_ZN1C17member_inline_varE = internal constant i32 undef, comdat, align 4{{$}}
108+
// NORDC17: @inline_var = internal global i32 undef, comdat, align 4{{$}}
109+
// RDC17: @inline_var = linkonce_odr global i32 undef, comdat, align 4{{$}}
110+
// NORDC17: @_ZN1C17member_inline_varE = internal constant i32 undef, comdat, align 4{{$}}
111+
// RDC17: @_ZN1C17member_inline_varE = linkonce_odr constant i32 undef, comdat, align 4{{$}}
102112
__device__ inline int inline_var = 3;
103113
struct C {
104114
__device__ static constexpr int member_inline_var = 4;
@@ -151,13 +161,13 @@ void use_pointers() {
151161
// CUDANORDC: @__[[PREFIX]]_gpubin_handle = internal global i8** null
152162
// HIPNEF: @__[[PREFIX]]_gpubin_handle = linkonce hidden global i8** null
153163
// * constant unnamed string with NVModuleID
154-
// RDC: [[MODULE_ID_GLOBAL:@.*]] = private constant
164+
// CUDARDC: [[MODULE_ID_GLOBAL:@.*]] = private constant
155165
// CUDARDC-SAME: c"[[MODULE_ID:.+]]\00", section "__nv_module_id", align 32
156166
// * Make sure our constructor was added to global ctor list.
157167
// LNX: @llvm.global_ctors = appending global {{.*}}@__[[PREFIX]]_module_ctor
158168
// * Alias to global symbol containing the NVModuleID.
159-
// RDC: @__fatbinwrap[[MODULE_ID]] ={{.*}} alias { i32, i32, i8*, i8* }
160-
// RDC-SAME: { i32, i32, i8*, i8* }* @__[[PREFIX]]_fatbin_wrapper
169+
// CUDARDC: @__fatbinwrap[[MODULE_ID]] ={{.*}} alias { i32, i32, i8*, i8* }
170+
// CUDARDC-SAME: { i32, i32, i8*, i8* }* @__[[PREFIX]]_fatbin_wrapper
161171

162172
// Test that we build the correct number of calls to cudaSetupArgument followed
163173
// by a call to cudaLaunch.
@@ -214,25 +224,33 @@ void hostfunc(void) { kernelfunc<<<1, 1>>>(1, 1, 1); }
214224
// HIP-NEXT: icmp eq i8** {{.*}}, null
215225
// HIP-NEXT: br i1 {{.*}}, label %if, label %exit
216226
// HIP: if:
217-
// NORDC: call{{.*}}[[PREFIX]]RegisterFatBinary{{.*}}__[[PREFIX]]_fatbin_wrapper
227+
// CUDANORDC: call{{.*}}[[PREFIX]]RegisterFatBinary{{.*}}__[[PREFIX]]_fatbin_wrapper
228+
// .. stores return value in __[[PREFIX]]_gpubin_handle
229+
// CUDANORDC-NEXT: store{{.*}}__[[PREFIX]]_gpubin_handle
230+
// .. and then calls __[[PREFIX]]_register_globals
231+
// HIP: call{{.*}}[[PREFIX]]RegisterFatBinary{{.*}}__[[PREFIX]]_fatbin_wrapper
218232
// .. stores return value in __[[PREFIX]]_gpubin_handle
219-
// NORDC-NEXT: store{{.*}}__[[PREFIX]]_gpubin_handle
233+
// HIP-NEXT: store{{.*}}__[[PREFIX]]_gpubin_handle
220234
// .. and then calls __[[PREFIX]]_register_globals
221235
// HIP-NEXT: br label %exit
222236
// HIP: exit:
223237
// HIP-NEXT: load i8**, i8*** @__hip_gpubin_handle
224-
// NORDC-NEXT: call void @__[[PREFIX]]_register_globals
238+
// CUDANORDC-NEXT: call void @__[[PREFIX]]_register_globals
239+
// HIP-NEXT: call void @__[[PREFIX]]_register_globals
225240
// * In separate mode we also register a destructor.
226-
// NORDC-NEXT: call i32 @atexit(void (i8*)* @__[[PREFIX]]_module_dtor)
241+
// CUDANORDC-NEXT: call i32 @atexit(void (i8*)* @__[[PREFIX]]_module_dtor)
242+
// HIP-NEXT: call i32 @atexit(void (i8*)* @__[[PREFIX]]_module_dtor)
227243

228244
// With relocatable device code we call __[[PREFIX]]RegisterLinkedBinary%NVModuleID%
229-
// RDC: call{{.*}}__[[PREFIX]]RegisterLinkedBinary[[MODULE_ID]](
230-
// RDC-SAME: __[[PREFIX]]_register_globals, {{.*}}__[[PREFIX]]_fatbin_wrapper
231-
// RDC-SAME: [[MODULE_ID_GLOBAL]]
245+
// CUDARDC: call{{.*}}__[[PREFIX]]RegisterLinkedBinary[[MODULE_ID]](
246+
// CUDARDC-SAME: __[[PREFIX]]_register_globals, {{.*}}__[[PREFIX]]_fatbin_wrapper
247+
// CUDARDC-SAME: [[MODULE_ID_GLOBAL]]
232248

233249
// Test that we've created destructor.
234-
// NORDC: define internal void @__[[PREFIX]]_module_dtor
235-
// NORDC: load{{.*}}__[[PREFIX]]_gpubin_handle
250+
// CUDANORDC: define internal void @__[[PREFIX]]_module_dtor
251+
// HIP: define internal void @__[[PREFIX]]_module_dtor
252+
// CUDANORDC: load{{.*}}__[[PREFIX]]_gpubin_handle
253+
// HIP: load{{.*}}__[[PREFIX]]_gpubin_handle
236254
// CUDANORDC-NEXT: call void @__[[PREFIX]]UnregisterFatBinary
237255
// HIP-NEXT: icmp ne i8** {{.*}}, null
238256
// HIP-NEXT: br i1 {{.*}}, label %if, label %exit
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
// RUN: %clang_cc1 -triple nvptx -fcuda-is-device \
2+
// RUN: -emit-llvm -o - -x hip %s \
3+
// RUN: | FileCheck -check-prefixes=DEV,NORDC %s
4+
// RUN: %clang_cc1 -triple nvptx -fcuda-is-device \
5+
// RUN: -fgpu-rdc -emit-llvm -o - -x hip %s \
6+
// RUN: | FileCheck -check-prefixes=DEV,RDC %s
7+
// RUN: %clang_cc1 -triple nvptx \
8+
// RUN: -emit-llvm -o - -x hip %s \
9+
// RUN: | FileCheck -check-prefixes=HOST,NORDC-H %s
10+
// RUN: %clang_cc1 -triple nvptx \
11+
// RUN: -fgpu-rdc -emit-llvm -o - -x hip %s \
12+
// RUN: | FileCheck -check-prefixes=HOST,RDC-H %s
13+
14+
#include "Inputs/cuda.h"
15+
16+
// DEV-DAG: @v1 = dso_local addrspace(1) externally_initialized global i32 0
17+
// NORDC-H-DAG: @v1 = internal global i32 undef
18+
// RDC-H-DAG: @v1 = dso_local global i32 undef
19+
__device__ int v1;
20+
// DEV-DAG: @v2 = dso_local addrspace(4) externally_initialized global i32 0
21+
// NORDC-H-DAG: @v2 = internal global i32 undef
22+
// RDC-H-DAG: @v2 = dso_local global i32 undef
23+
__constant__ int v2;
24+
// DEV-DAG: @v3 = external addrspace(1) externally_initialized global i32
25+
// NORDC-H-DAG: @v3 = internal global i32 0
26+
// RDC-H-DAG: @v3 = dso_local global i32 0
27+
__managed__ int v3;
28+
29+
// DEV-DAG: @ev1 = external addrspace(1) global i32
30+
// HOST-DAG: @ev1 = external global i32
31+
extern __device__ int ev1;
32+
// DEV-DAG: @ev2 = external addrspace(4) global i32
33+
// HOST-DAG: @ev2 = external global i32
34+
extern __constant__ int ev2;
35+
// DEV-DAG: @ev3 = external addrspace(1) global i32
36+
// HOST-DAG: @ev3 = external global i32
37+
extern __managed__ int ev3;
38+
39+
// NORDC-DAG: @_ZL3sv1 = dso_local addrspace(1) externally_initialized global i32 0
40+
// RDC-DAG: @_ZL3sv1 = internal addrspace(1) global i32 0
41+
// HOST-DAG: @_ZL3sv1 = internal global i32 undef
42+
static __device__ int sv1;
43+
// NORDC-DAG: @_ZL3sv2 = dso_local addrspace(4) externally_initialized global i32 0
44+
// RDC-DAG: @_ZL3sv2 = internal addrspace(4) global i32 0
45+
// HOST-DAG: @_ZL3sv2 = internal global i32 undef
46+
static __constant__ int sv2;
47+
// DEV-DAG: @_ZL3sv3 = external addrspace(1) externally_initialized global i32
48+
// HOST-DAG: @_ZL3sv3 = internal global i32 0
49+
static __managed__ int sv3;
50+
51+
__device__ __host__ int work(int *x);
52+
53+
__device__ __host__ int fun1() {
54+
return work(&ev1) + work(&ev2) + work(&ev3) + work(&sv1) + work(&sv2) + work(&sv3);
55+
}
56+
57+
// HOST: hipRegisterVar({{.*}}@v1
58+
// HOST: hipRegisterVar({{.*}}@v2
59+
// HOST: hipRegisterManagedVar({{.*}}@v3
60+
// HOST-NOT: hipRegisterVar({{.*}}@ev1
61+
// HOST-NOT: hipRegisterVar({{.*}}@ev2
62+
// HOST-NOT: hipRegisterManagedVar({{.*}}@ev3
63+
// HOST: hipRegisterVar({{.*}}@_ZL3sv1
64+
// HOST: hipRegisterVar({{.*}}@_ZL3sv2
65+
// HOST: hipRegisterManagedVar({{.*}}@_ZL3sv3

clang/test/CodeGenCUDA/managed-var.cu

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,17 +10,19 @@
1010

1111
// RUN: %clang_cc1 -triple x86_64-gnu-linux -std=c++11 \
1212
// RUN: -emit-llvm -o - -x hip %s | FileCheck \
13-
// RUN: -check-prefixes=HOST %s
13+
// RUN: -check-prefixes=HOST,NORDC %s
1414

1515
// RUN: %clang_cc1 -triple x86_64-gnu-linux -std=c++11 \
1616
// RUN: -emit-llvm -fgpu-rdc -o - -x hip %s | FileCheck \
17-
// RUN: -check-prefixes=HOST %s
17+
// RUN: -check-prefixes=HOST,RDC %s
1818

1919
#include "Inputs/cuda.h"
2020

21-
// DEV-DAG: @x = {{.*}}addrspace(1) externally_initialized global i32 undef
22-
// HOST-DAG: @x = internal global i32 1
23-
// HOST-DAG: @x.managed = internal global i32* null
21+
// DEV-DAG: @x = external addrspace(1) externally_initialized global i32
22+
// NORDC-DAG: @x = internal global i32 1
23+
// RDC-DAG: @x = dso_local global i32 1
24+
// NORDC-DAG: @x.managed = internal global i32* null
25+
// RDC-DAG: @x.managed = dso_local global i32* null
2426
// HOST-DAG: @[[DEVNAMEX:[0-9]+]] = {{.*}}c"x\00"
2527

2628
struct vec {
@@ -31,11 +33,28 @@ __managed__ int x = 1;
3133
__managed__ vec v[100];
3234
__managed__ vec v2[100] = {{1, 1, 1}};
3335

36+
// DEV-DAG: @ex = external addrspace(1) global i32
37+
// HOST-DAG: @ex = external global i32
38+
extern __managed__ int ex;
39+
40+
// DEV-DAG: @_ZL2sx = external addrspace(1) externally_initialized global i32
41+
// HOST-DAG: @_ZL2sx = internal global i32 1
42+
// HOST-DAG: @_ZL2sx.managed = internal global i32* null
43+
static __managed__ int sx = 1;
44+
45+
// HOST-NOT: @ex.managed
46+
47+
// Force ex and sx mitted in device compilation.
3448
__global__ void foo(int *z) {
35-
*z = x;
49+
*z = x + ex + sx;
3650
v[1].x = 2;
3751
}
3852

53+
// Force ex and sx emitted in host compilatioin.
54+
int foo2() {
55+
return ex + sx;
56+
}
57+
3958
// HOST-LABEL: define {{.*}}@_Z4loadv()
4059
// HOST: %ld.managed = load i32*, i32** @x.managed, align 4
4160
// HOST: %0 = load i32, i32* %ld.managed, align 4
@@ -97,4 +116,6 @@ float addr_taken2() {
97116
}
98117

99118
// HOST-DAG: __hipRegisterManagedVar({{.*}}@x.managed {{.*}}@x {{.*}}@[[DEVNAMEX]]{{.*}}, i64 4, i32 4)
119+
// HOST-DAG: __hipRegisterManagedVar({{.*}}@_ZL2sx.managed {{.*}}@_ZL2sx
120+
// HOST-NOT: __hipRegisterManagedVar({{.*}}@ex.managed {{.*}}@ex
100121
// HOST-DAG: declare void @__hipRegisterManagedVar(i8**, i8*, i8*, i8*, i64, i32)

0 commit comments

Comments
 (0)