@@ -2075,6 +2075,15 @@ void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D,
2075
2075
if (const auto *C = D.getSingleClause <OMPOrderClause>())
2076
2076
if (C->getKind () == OMPC_ORDER_concurrent)
2077
2077
LoopStack.setParallel (/* Enable=*/ true );
2078
+ if ((D.getDirectiveKind () == OMPD_simd ||
2079
+ (getLangOpts ().OpenMPSimd &&
2080
+ isOpenMPSimdDirective (D.getDirectiveKind ()))) &&
2081
+ llvm::any_of (D.getClausesOfKind <OMPReductionClause>(),
2082
+ [](const OMPReductionClause *C) {
2083
+ return C->getModifier () == OMPC_REDUCTION_inscan;
2084
+ }))
2085
+ // Disable parallel access in case of prefix sum.
2086
+ LoopStack.setParallel (/* Enable=*/ false );
2078
2087
}
2079
2088
2080
2089
void CodeGenFunction::EmitOMPSimdFinal (
@@ -2270,6 +2279,8 @@ static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S,
2270
2279
}
2271
2280
2272
2281
void CodeGenFunction::EmitOMPSimdDirective (const OMPSimdDirective &S) {
2282
+ ParentLoopDirectiveForScanRegion ScanRegion (*this , S);
2283
+ OMPFirstScanLoop = true ;
2273
2284
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2274
2285
emitOMPSimdRegion (CGF, S, Action);
2275
2286
};
@@ -4191,14 +4202,15 @@ void CodeGenFunction::EmitOMPDepobjDirective(const OMPDepobjDirective &S) {
4191
4202
}
4192
4203
4193
4204
void CodeGenFunction::EmitOMPScanDirective (const OMPScanDirective &S) {
4194
- // Do not emit code for non-simd directives in simd-only mode.
4195
- if (getLangOpts().OpenMPSimd && !OMPParentLoopDirectiveForScan)
4205
+ if (!OMPParentLoopDirectiveForScan)
4196
4206
return ;
4197
4207
const OMPExecutableDirective &ParentDir = *OMPParentLoopDirectiveForScan;
4208
+ bool IsInclusive = S.hasClausesOfKind <OMPInclusiveClause>();
4198
4209
SmallVector<const Expr *, 4 > Shareds;
4199
4210
SmallVector<const Expr *, 4 > Privates;
4200
4211
SmallVector<const Expr *, 4 > LHSs;
4201
4212
SmallVector<const Expr *, 4 > RHSs;
4213
+ SmallVector<const Expr *, 4 > ReductionOps;
4202
4214
SmallVector<const Expr *, 4 > CopyOps;
4203
4215
SmallVector<const Expr *, 4 > CopyArrayTemps;
4204
4216
SmallVector<const Expr *, 4 > CopyArrayElems;
@@ -4209,13 +4221,109 @@ void CodeGenFunction::EmitOMPScanDirective(const OMPScanDirective &S) {
4209
4221
Privates.append (C->privates ().begin (), C->privates ().end ());
4210
4222
LHSs.append (C->lhs_exprs ().begin (), C->lhs_exprs ().end ());
4211
4223
RHSs.append (C->rhs_exprs ().begin (), C->rhs_exprs ().end ());
4224
+ ReductionOps.append (C->reduction_ops ().begin (), C->reduction_ops ().end ());
4212
4225
CopyOps.append (C->copy_ops ().begin (), C->copy_ops ().end ());
4213
4226
CopyArrayTemps.append (C->copy_array_temps ().begin (),
4214
4227
C->copy_array_temps ().end ());
4215
4228
CopyArrayElems.append (C->copy_array_elems ().begin (),
4216
4229
C->copy_array_elems ().end ());
4217
4230
}
4218
- bool IsInclusive = S.hasClausesOfKind<OMPInclusiveClause>();
4231
+ if (ParentDir.getDirectiveKind () == OMPD_simd ||
4232
+ (getLangOpts ().OpenMPSimd &&
4233
+ isOpenMPSimdDirective (ParentDir.getDirectiveKind ()))) {
4234
+ // For simd directive and simd-based directives in simd only mode, use the
4235
+ // following codegen:
4236
+ // int x = 0;
4237
+ // #pragma omp simd reduction(inscan, +: x)
4238
+ // for (..) {
4239
+ // <first part>
4240
+ // #pragma omp scan inclusive(x)
4241
+ // <second part>
4242
+ // }
4243
+ // is transformed to:
4244
+ // int x = 0;
4245
+ // for (..) {
4246
+ // int x_priv = 0;
4247
+ // <first part>
4248
+ // x = x_priv + x;
4249
+ // x_priv = x;
4250
+ // <second part>
4251
+ // }
4252
+ // and
4253
+ // int x = 0;
4254
+ // #pragma omp simd reduction(inscan, +: x)
4255
+ // for (..) {
4256
+ // <first part>
4257
+ // #pragma omp scan exclusive(x)
4258
+ // <second part>
4259
+ // }
4260
+ // to
4261
+ // int x = 0;
4262
+ // for (..) {
4263
+ // int x_priv = 0;
4264
+ // <second part>
4265
+ // int temp = x;
4266
+ // x = x_priv + x;
4267
+ // x_priv = temp;
4268
+ // <first part>
4269
+ // }
4270
+ llvm::BasicBlock *OMPScanReduce = createBasicBlock (" omp.inscan.reduce" );
4271
+ EmitBranch (IsInclusive
4272
+ ? OMPScanReduce
4273
+ : BreakContinueStack.back ().ContinueBlock .getBlock ());
4274
+ EmitBlock (OMPScanDispatch);
4275
+ {
4276
+ // New scope for correct construction/destruction of temp variables for
4277
+ // exclusive scan.
4278
+ LexicalScope Scope (*this , S.getSourceRange ());
4279
+ EmitBranch (IsInclusive ? OMPBeforeScanBlock : OMPAfterScanBlock);
4280
+ EmitBlock (OMPScanReduce);
4281
+ if (!IsInclusive) {
4282
+ // Create temp var and copy LHS value to this temp value.
4283
+ // TMP = LHS;
4284
+ for (unsigned I = 0 , E = CopyArrayElems.size (); I < E; ++I) {
4285
+ const Expr *PrivateExpr = Privates[I];
4286
+ const Expr *TempExpr = CopyArrayTemps[I];
4287
+ EmitAutoVarDecl (
4288
+ *cast<VarDecl>(cast<DeclRefExpr>(TempExpr)->getDecl ()));
4289
+ LValue DestLVal = EmitLValue (TempExpr);
4290
+ LValue SrcLVal = EmitLValue (LHSs[I]);
4291
+ EmitOMPCopy (PrivateExpr->getType (), DestLVal.getAddress (*this ),
4292
+ SrcLVal.getAddress (*this ),
4293
+ cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl ()),
4294
+ cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl ()),
4295
+ CopyOps[I]);
4296
+ }
4297
+ }
4298
+ CGM.getOpenMPRuntime ().emitReduction (
4299
+ *this , ParentDir.getEndLoc (), Privates, LHSs, RHSs, ReductionOps,
4300
+ {/* WithNowait=*/ true , /* SimpleReduction=*/ true , OMPD_simd});
4301
+ for (unsigned I = 0 , E = CopyArrayElems.size (); I < E; ++I) {
4302
+ const Expr *PrivateExpr = Privates[I];
4303
+ LValue DestLVal;
4304
+ LValue SrcLVal;
4305
+ if (IsInclusive) {
4306
+ DestLVal = EmitLValue (RHSs[I]);
4307
+ SrcLVal = EmitLValue (LHSs[I]);
4308
+ } else {
4309
+ const Expr *TempExpr = CopyArrayTemps[I];
4310
+ DestLVal = EmitLValue (RHSs[I]);
4311
+ SrcLVal = EmitLValue (TempExpr);
4312
+ }
4313
+ EmitOMPCopy (PrivateExpr->getType (), DestLVal.getAddress (*this ),
4314
+ SrcLVal.getAddress (*this ),
4315
+ cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl ()),
4316
+ cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl ()),
4317
+ CopyOps[I]);
4318
+ }
4319
+ }
4320
+ EmitBranch (IsInclusive ? OMPAfterScanBlock : OMPBeforeScanBlock);
4321
+ OMPScanExitBlock = IsInclusive
4322
+ ? BreakContinueStack.back ().ContinueBlock .getBlock ()
4323
+ : OMPScanReduce;
4324
+ EmitBlock (OMPAfterScanBlock);
4325
+ return ;
4326
+ }
4219
4327
if (!IsInclusive) {
4220
4328
EmitBranch (BreakContinueStack.back ().ContinueBlock .getBlock ());
4221
4329
EmitBlock (OMPScanExitBlock);
@@ -6313,6 +6421,7 @@ void CodeGenFunction::EmitSimpleOMPExecutableDirective(
6313
6421
}
6314
6422
if (isOpenMPSimdDirective (D.getDirectiveKind ())) {
6315
6423
(void )GlobalsScope.Privatize ();
6424
+ ParentLoopDirectiveForScanRegion ScanRegion (CGF, D);
6316
6425
emitOMPSimdRegion (CGF, cast<OMPLoopDirective>(D), Action);
6317
6426
} else {
6318
6427
if (const auto *LD = dyn_cast<OMPLoopDirective>(&D)) {
0 commit comments