Skip to content

Commit fb80e67

Browse files
committed
[OPENMP50]Codegen for scan directive in simd loops.
Added codegen for scandirectives in simd loop. The codegen transforms original code: ``` int x = 0; #pragma omp simd reduction(inscan, +: x) for (..) { <first part> #pragma omp scan inclusive(x) <second part> } ``` into ``` int x = 0; for (..) { int x_priv = 0; <first part> x = x_priv + x; x_priv = x; <second part> } ``` and ``` int x = 0; #pragma omp simd reduction(inscan, +: x) for (..) { <first part> #pragma omp scan exclusive(x) <second part> } ``` into ``` int x = 0; for (..) { int x_priv = 0; <second part> int temp = x; x = x_priv + x; x_priv = temp; <first part> } ``` Differential revision: https://reviews.llvm.org/D78232
1 parent 948b206 commit fb80e67

File tree

3 files changed

+420
-21
lines changed

3 files changed

+420
-21
lines changed

clang/lib/CodeGen/CGStmtOpenMP.cpp

+112-3
Original file line numberDiff line numberDiff line change
@@ -2075,6 +2075,15 @@ void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D,
20752075
if (const auto *C = D.getSingleClause<OMPOrderClause>())
20762076
if (C->getKind() == OMPC_ORDER_concurrent)
20772077
LoopStack.setParallel(/*Enable=*/true);
2078+
if ((D.getDirectiveKind() == OMPD_simd ||
2079+
(getLangOpts().OpenMPSimd &&
2080+
isOpenMPSimdDirective(D.getDirectiveKind()))) &&
2081+
llvm::any_of(D.getClausesOfKind<OMPReductionClause>(),
2082+
[](const OMPReductionClause *C) {
2083+
return C->getModifier() == OMPC_REDUCTION_inscan;
2084+
}))
2085+
// Disable parallel access in case of prefix sum.
2086+
LoopStack.setParallel(/*Enable=*/false);
20782087
}
20792088

20802089
void CodeGenFunction::EmitOMPSimdFinal(
@@ -2270,6 +2279,8 @@ static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S,
22702279
}
22712280

22722281
void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
2282+
ParentLoopDirectiveForScanRegion ScanRegion(*this, S);
2283+
OMPFirstScanLoop = true;
22732284
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
22742285
emitOMPSimdRegion(CGF, S, Action);
22752286
};
@@ -4191,14 +4202,15 @@ void CodeGenFunction::EmitOMPDepobjDirective(const OMPDepobjDirective &S) {
41914202
}
41924203

41934204
void CodeGenFunction::EmitOMPScanDirective(const OMPScanDirective &S) {
4194-
// Do not emit code for non-simd directives in simd-only mode.
4195-
if (getLangOpts().OpenMPSimd && !OMPParentLoopDirectiveForScan)
4205+
if (!OMPParentLoopDirectiveForScan)
41964206
return;
41974207
const OMPExecutableDirective &ParentDir = *OMPParentLoopDirectiveForScan;
4208+
bool IsInclusive = S.hasClausesOfKind<OMPInclusiveClause>();
41984209
SmallVector<const Expr *, 4> Shareds;
41994210
SmallVector<const Expr *, 4> Privates;
42004211
SmallVector<const Expr *, 4> LHSs;
42014212
SmallVector<const Expr *, 4> RHSs;
4213+
SmallVector<const Expr *, 4> ReductionOps;
42024214
SmallVector<const Expr *, 4> CopyOps;
42034215
SmallVector<const Expr *, 4> CopyArrayTemps;
42044216
SmallVector<const Expr *, 4> CopyArrayElems;
@@ -4209,13 +4221,109 @@ void CodeGenFunction::EmitOMPScanDirective(const OMPScanDirective &S) {
42094221
Privates.append(C->privates().begin(), C->privates().end());
42104222
LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
42114223
RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
4224+
ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
42124225
CopyOps.append(C->copy_ops().begin(), C->copy_ops().end());
42134226
CopyArrayTemps.append(C->copy_array_temps().begin(),
42144227
C->copy_array_temps().end());
42154228
CopyArrayElems.append(C->copy_array_elems().begin(),
42164229
C->copy_array_elems().end());
42174230
}
4218-
bool IsInclusive = S.hasClausesOfKind<OMPInclusiveClause>();
4231+
if (ParentDir.getDirectiveKind() == OMPD_simd ||
4232+
(getLangOpts().OpenMPSimd &&
4233+
isOpenMPSimdDirective(ParentDir.getDirectiveKind()))) {
4234+
// For simd directive and simd-based directives in simd only mode, use the
4235+
// following codegen:
4236+
// int x = 0;
4237+
// #pragma omp simd reduction(inscan, +: x)
4238+
// for (..) {
4239+
// <first part>
4240+
// #pragma omp scan inclusive(x)
4241+
// <second part>
4242+
// }
4243+
// is transformed to:
4244+
// int x = 0;
4245+
// for (..) {
4246+
// int x_priv = 0;
4247+
// <first part>
4248+
// x = x_priv + x;
4249+
// x_priv = x;
4250+
// <second part>
4251+
// }
4252+
// and
4253+
// int x = 0;
4254+
// #pragma omp simd reduction(inscan, +: x)
4255+
// for (..) {
4256+
// <first part>
4257+
// #pragma omp scan exclusive(x)
4258+
// <second part>
4259+
// }
4260+
// to
4261+
// int x = 0;
4262+
// for (..) {
4263+
// int x_priv = 0;
4264+
// <second part>
4265+
// int temp = x;
4266+
// x = x_priv + x;
4267+
// x_priv = temp;
4268+
// <first part>
4269+
// }
4270+
llvm::BasicBlock *OMPScanReduce = createBasicBlock("omp.inscan.reduce");
4271+
EmitBranch(IsInclusive
4272+
? OMPScanReduce
4273+
: BreakContinueStack.back().ContinueBlock.getBlock());
4274+
EmitBlock(OMPScanDispatch);
4275+
{
4276+
// New scope for correct construction/destruction of temp variables for
4277+
// exclusive scan.
4278+
LexicalScope Scope(*this, S.getSourceRange());
4279+
EmitBranch(IsInclusive ? OMPBeforeScanBlock : OMPAfterScanBlock);
4280+
EmitBlock(OMPScanReduce);
4281+
if (!IsInclusive) {
4282+
// Create temp var and copy LHS value to this temp value.
4283+
// TMP = LHS;
4284+
for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
4285+
const Expr *PrivateExpr = Privates[I];
4286+
const Expr *TempExpr = CopyArrayTemps[I];
4287+
EmitAutoVarDecl(
4288+
*cast<VarDecl>(cast<DeclRefExpr>(TempExpr)->getDecl()));
4289+
LValue DestLVal = EmitLValue(TempExpr);
4290+
LValue SrcLVal = EmitLValue(LHSs[I]);
4291+
EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this),
4292+
SrcLVal.getAddress(*this),
4293+
cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
4294+
cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
4295+
CopyOps[I]);
4296+
}
4297+
}
4298+
CGM.getOpenMPRuntime().emitReduction(
4299+
*this, ParentDir.getEndLoc(), Privates, LHSs, RHSs, ReductionOps,
4300+
{/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_simd});
4301+
for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
4302+
const Expr *PrivateExpr = Privates[I];
4303+
LValue DestLVal;
4304+
LValue SrcLVal;
4305+
if (IsInclusive) {
4306+
DestLVal = EmitLValue(RHSs[I]);
4307+
SrcLVal = EmitLValue(LHSs[I]);
4308+
} else {
4309+
const Expr *TempExpr = CopyArrayTemps[I];
4310+
DestLVal = EmitLValue(RHSs[I]);
4311+
SrcLVal = EmitLValue(TempExpr);
4312+
}
4313+
EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this),
4314+
SrcLVal.getAddress(*this),
4315+
cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
4316+
cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
4317+
CopyOps[I]);
4318+
}
4319+
}
4320+
EmitBranch(IsInclusive ? OMPAfterScanBlock : OMPBeforeScanBlock);
4321+
OMPScanExitBlock = IsInclusive
4322+
? BreakContinueStack.back().ContinueBlock.getBlock()
4323+
: OMPScanReduce;
4324+
EmitBlock(OMPAfterScanBlock);
4325+
return;
4326+
}
42194327
if (!IsInclusive) {
42204328
EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock());
42214329
EmitBlock(OMPScanExitBlock);
@@ -6313,6 +6421,7 @@ void CodeGenFunction::EmitSimpleOMPExecutableDirective(
63136421
}
63146422
if (isOpenMPSimdDirective(D.getDirectiveKind())) {
63156423
(void)GlobalsScope.Privatize();
6424+
ParentLoopDirectiveForScanRegion ScanRegion(CGF, D);
63166425
emitOMPSimdRegion(CGF, cast<OMPLoopDirective>(D), Action);
63176426
} else {
63186427
if (const auto *LD = dyn_cast<OMPLoopDirective>(&D)) {

clang/lib/Sema/SemaOpenMP.cpp

+31-18
Original file line numberDiff line numberDiff line change
@@ -15150,24 +15150,37 @@ static bool actOnOMPReductionKindClause(
1515015150
S.ActOnFinishFullExpr(CopyOpRes.get(), /*DiscardedValue=*/true);
1515115151
if (!CopyOpRes.isUsable())
1515215152
continue;
15153-
// Build temp array for prefix sum.
15154-
auto *Dim = new (S.Context)
15155-
OpaqueValueExpr(ELoc, S.Context.getSizeType(), VK_RValue);
15156-
QualType ArrayTy =
15157-
S.Context.getVariableArrayType(PrivateTy, Dim, ArrayType::Normal,
15158-
/*IndexTypeQuals=*/0, {ELoc, ELoc});
15159-
VarDecl *TempArrayVD =
15160-
buildVarDecl(S, ELoc, ArrayTy, D->getName(),
15161-
D->hasAttrs() ? &D->getAttrs() : nullptr);
15162-
// Add a constructor to the temp decl.
15163-
S.ActOnUninitializedDecl(TempArrayVD);
15164-
TempArrayRes = buildDeclRefExpr(S, TempArrayVD, ArrayTy, ELoc);
15165-
TempArrayElem =
15166-
S.DefaultFunctionArrayLvalueConversion(TempArrayRes.get());
15167-
auto *Idx = new (S.Context)
15168-
OpaqueValueExpr(ELoc, S.Context.getSizeType(), VK_RValue);
15169-
TempArrayElem = S.CreateBuiltinArraySubscriptExpr(TempArrayElem.get(),
15170-
ELoc, Idx, ELoc);
15153+
// For simd directive and simd-based directives in simd mode no need to
15154+
// construct temp array, need just a single temp element.
15155+
if (Stack->getCurrentDirective() == OMPD_simd ||
15156+
(S.getLangOpts().OpenMPSimd &&
15157+
isOpenMPSimdDirective(Stack->getCurrentDirective()))) {
15158+
VarDecl *TempArrayVD =
15159+
buildVarDecl(S, ELoc, PrivateTy, D->getName(),
15160+
D->hasAttrs() ? &D->getAttrs() : nullptr);
15161+
// Add a constructor to the temp decl.
15162+
S.ActOnUninitializedDecl(TempArrayVD);
15163+
TempArrayRes = buildDeclRefExpr(S, TempArrayVD, PrivateTy, ELoc);
15164+
} else {
15165+
// Build temp array for prefix sum.
15166+
auto *Dim = new (S.Context)
15167+
OpaqueValueExpr(ELoc, S.Context.getSizeType(), VK_RValue);
15168+
QualType ArrayTy =
15169+
S.Context.getVariableArrayType(PrivateTy, Dim, ArrayType::Normal,
15170+
/*IndexTypeQuals=*/0, {ELoc, ELoc});
15171+
VarDecl *TempArrayVD =
15172+
buildVarDecl(S, ELoc, ArrayTy, D->getName(),
15173+
D->hasAttrs() ? &D->getAttrs() : nullptr);
15174+
// Add a constructor to the temp decl.
15175+
S.ActOnUninitializedDecl(TempArrayVD);
15176+
TempArrayRes = buildDeclRefExpr(S, TempArrayVD, ArrayTy, ELoc);
15177+
TempArrayElem =
15178+
S.DefaultFunctionArrayLvalueConversion(TempArrayRes.get());
15179+
auto *Idx = new (S.Context)
15180+
OpaqueValueExpr(ELoc, S.Context.getSizeType(), VK_RValue);
15181+
TempArrayElem = S.CreateBuiltinArraySubscriptExpr(TempArrayElem.get(),
15182+
ELoc, Idx, ELoc);
15183+
}
1517115184
}
1517215185

1517315186
// OpenMP [2.15.4.6, Restrictions, p.2]

0 commit comments

Comments
 (0)