Skip to content

Commit d1419c9

Browse files
committed
[OPENMP]Fix PR47621: Variable used by task inside a template function is not made firstprivate by default
Need to fix a check for the variable if it is declared in the inner OpenMP region to be able to firstprivatize it. Differential Revision: https://reviews.llvm.org/D88240
1 parent 81a4088 commit d1419c9

File tree

3 files changed

+64
-10
lines changed

3 files changed

+64
-10
lines changed

clang/lib/Sema/SemaOpenMP.cpp

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,7 @@ class DSAStackTy {
159159
OpenMPDirectiveKind Directive = OMPD_unknown;
160160
DeclarationNameInfo DirectiveName;
161161
Scope *CurScope = nullptr;
162+
DeclContext *Context = nullptr;
162163
SourceLocation ConstructLoc;
163164
/// Set of 'depend' clauses with 'sink|source' dependence kind. Required to
164165
/// get the data (loop counters etc.) about enclosing loop-based construct.
@@ -918,6 +919,7 @@ class DSAStackTy {
918919
const SharingMapTy *Top = getTopOfStackOrNull();
919920
return Top ? Top->CurScope : nullptr;
920921
}
922+
void setContext(DeclContext *DC) { getTopOfStack().Context = DC; }
921923
SourceLocation getConstructLoc() const {
922924
const SharingMapTy *Top = getTopOfStackOrNull();
923925
return Top ? Top->ConstructLoc : SourceLocation();
@@ -1531,11 +1533,17 @@ bool DSAStackTy::isOpenMPLocal(VarDecl *D, const_iterator I) const {
15311533
for (const_iterator E = end(); I != E; ++I) {
15321534
if (isImplicitOrExplicitTaskingRegion(I->Directive) ||
15331535
isOpenMPTargetExecutionDirective(I->Directive)) {
1534-
Scope *TopScope = I->CurScope ? I->CurScope->getParent() : nullptr;
1535-
Scope *CurScope = getCurScope();
1536-
while (CurScope && CurScope != TopScope && !CurScope->isDeclScope(D))
1537-
CurScope = CurScope->getParent();
1538-
return CurScope != TopScope;
1536+
if (I->CurScope) {
1537+
Scope *TopScope = I->CurScope->getParent();
1538+
Scope *CurScope = getCurScope();
1539+
while (CurScope && CurScope != TopScope && !CurScope->isDeclScope(D))
1540+
CurScope = CurScope->getParent();
1541+
return CurScope != TopScope;
1542+
}
1543+
for (DeclContext *DC = D->getDeclContext(); DC; DC = DC->getParent())
1544+
if (I->Context == DC)
1545+
return true;
1546+
return false;
15391547
}
15401548
}
15411549
return false;
@@ -4148,6 +4156,7 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) {
41484156
default:
41494157
llvm_unreachable("Unknown OpenMP directive");
41504158
}
4159+
DSAStack->setContext(CurContext);
41514160
}
41524161

41534162
int Sema::getNumberOfConstructScopes(unsigned Level) const {

clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
// RUN: %clang_cc1 -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -fopenmp-cuda-mode -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
2-
// RUN: %clang_cc1 -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -fopenmp-cuda-mode -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -triple nvptx64-unknown-unknown -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s
3-
// RUN: %clang_cc1 -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -fopenmp-cuda-mode -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
4-
// RUN: %clang_cc1 -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -fopenmp-cuda-mode -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -triple nvptx-unknown-unknown -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s
5-
// RUN: %clang_cc1 -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -fopenmp-cuda-mode -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -fexceptions -fcxx-exceptions -aux-triple powerpc64le-unknown-unknown -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s
1+
// RUN: %clang_cc1 -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
2+
// RUN: %clang_cc1 -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -triple nvptx64-unknown-unknown -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s
3+
// RUN: %clang_cc1 -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
4+
// RUN: %clang_cc1 -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -triple nvptx-unknown-unknown -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s
5+
// RUN: %clang_cc1 -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -fexceptions -fcxx-exceptions -aux-triple powerpc64le-unknown-unknown -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s
66
// expected-no-diagnostics
77
#ifndef HEADER
88
#define HEADER

clang/test/OpenMP/task_codegen.cpp

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -389,4 +389,49 @@ struct S1 {
389389
// CHECK-LABEL: taskinit
390390
// CHECK: call i8* @__kmpc_omp_task_alloc(
391391

392+
template <typename T = void>
393+
void foobar() {
394+
float a;
395+
#pragma omp parallel
396+
#pragma omp single
397+
{
398+
double b;
399+
#pragma omp task
400+
a += b;
401+
}
402+
}
403+
404+
// CHECK: define void @{{.+}}xxxx{{.+}}()
405+
void xxxx() {
406+
// CHECK: call void @{{.+}}foobar{{.+}}()
407+
foobar();
408+
}
409+
// CHECK: define {{.*}}void @{{.+}}foobar{{.+}}()
410+
// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float*)* [[PAR_OUTLINED:@.+]] to void (i32*, i32*, ...)*), float* %{{.+}})
411+
412+
// CHECK: define internal void [[PAR_OUTLINED]](i32* {{.+}}, i32* {{.+}}, float* {{.*}}[[A_ADDR:%.+]])
413+
// UNTIEDRT: [[A_ADDR_REF:%.+]] = alloca float*,
414+
// CHECK: [[B_ADDR:%.+]] = alloca double,
415+
// UNTIEDRT: [[A_ADDR:%.+]] = load float*, float** [[A_ADDR_REF]],
416+
417+
// Copy `a` to the list of shared variables
418+
// CHECK: [[SHARED_A:%.+]] = getelementptr inbounds %{{.+}}, [[SHAREDS_TY:%.+]]* [[SHAREDS:%.+]], i32 0, i32 0
419+
// CHECK: store float* [[A_ADDR]], float** [[SHARED_A]],
420+
421+
// Allocate task.
422+
// CHECK: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* {{.+}}, i32 {{.+}}, i32 1, i64 48, i64 8, i32 (i32, i8*)* bitcast (i32 (i32, [[T_TASK_TY:%.+]]*)* @{{.+}} to i32 (i32, i8*)*))
423+
// CHECK: [[TD:%.+]] = bitcast i8* [[RES]] to [[T_TASK_TY]]*
424+
// Copy shared vars.
425+
// CHECK: [[TD_TASK:%.+]] = getelementptr inbounds [[T_TASK_TY]], [[T_TASK_TY]]* [[TD]], i32 0, i32 0
426+
// CHECK: [[TD_TASK_SHARES_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[TD_TASK]], i32 0, i32 0
427+
// CHECK: [[TD_TASK_SHARES:%.+]] = load i8*, i8** [[TD_TASK_SHARES_REF]],
428+
// CHECK: [[SHAREDS_BC:%.+]] = bitcast [[SHAREDS_TY]]* [[SHAREDS]] to i8*
429+
// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TD_TASK_SHARES]], i8* align 8 [[SHAREDS_BC]], i64 8, i1 false)
430+
431+
// Copy firstprivate value of `b`.
432+
// CHECK: [[TD_TASK_PRIVS:%.+]] = getelementptr inbounds [[T_TASK_TY]], [[T_TASK_TY]]* [[TD]], i32 0, i32 1
433+
// CHECK: [[TD_TASK_PRIVS_B:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[TD_TASK_PRIVS]], i32 0, i32 0
434+
// CHECK: [[B_VAL:%.+]] = load double, double* [[B_ADDR]],
435+
// CHECK: store double [[B_VAL]], double* [[TD_TASK_PRIVS_B]],
436+
392437
#endif

0 commit comments

Comments
 (0)