|
24 | 24 | #include "polly/Support/VirtualInstruction.h"
|
25 | 25 | #include "llvm/ADT/ArrayRef.h"
|
26 | 26 | #include "llvm/ADT/EquivalenceClasses.h"
|
| 27 | +#include "llvm/ADT/PostOrderIterator.h" |
27 | 28 | #include "llvm/ADT/Statistic.h"
|
28 | 29 | #include "llvm/Analysis/AliasAnalysis.h"
|
29 | 30 | #include "llvm/Analysis/Loads.h"
|
@@ -222,6 +223,179 @@ void ScopBuilder::buildScalarDependences(ScopStmt *UserStmt,
|
222 | 223 | ensureValueRead(Op.get(), UserStmt);
|
223 | 224 | }
|
224 | 225 |
|
| 226 | +// Create a sequence of two schedules. Either argument may be null and is |
| 227 | +// interpreted as the empty schedule. Can also return null if both schedules are |
| 228 | +// empty. |
| 229 | +static isl::schedule combineInSequence(isl::schedule Prev, isl::schedule Succ) { |
| 230 | + if (!Prev) |
| 231 | + return Succ; |
| 232 | + if (!Succ) |
| 233 | + return Prev; |
| 234 | + |
| 235 | + return Prev.sequence(Succ); |
| 236 | +} |
| 237 | + |
| 238 | +// Create an isl_multi_union_aff that defines an identity mapping from the |
| 239 | +// elements of USet to their N-th dimension. |
| 240 | +// |
| 241 | +// # Example: |
| 242 | +// |
| 243 | +// Domain: { A[i,j]; B[i,j,k] } |
| 244 | +// N: 1 |
| 245 | +// |
| 246 | +// Resulting Mapping: { {A[i,j] -> [(j)]; B[i,j,k] -> [(j)] } |
| 247 | +// |
| 248 | +// @param USet A union set describing the elements for which to generate a |
| 249 | +// mapping. |
| 250 | +// @param N The dimension to map to. |
| 251 | +// @returns A mapping from USet to its N-th dimension. |
| 252 | +static isl::multi_union_pw_aff mapToDimension(isl::union_set USet, int N) { |
| 253 | + assert(N >= 0); |
| 254 | + assert(USet); |
| 255 | + assert(!USet.is_empty()); |
| 256 | + |
| 257 | + auto Result = isl::union_pw_multi_aff::empty(USet.get_space()); |
| 258 | + |
| 259 | + for (isl::set S : USet.get_set_list()) { |
| 260 | + int Dim = S.dim(isl::dim::set); |
| 261 | + auto PMA = isl::pw_multi_aff::project_out_map(S.get_space(), isl::dim::set, |
| 262 | + N, Dim - N); |
| 263 | + if (N > 1) |
| 264 | + PMA = PMA.drop_dims(isl::dim::out, 0, N - 1); |
| 265 | + |
| 266 | + Result = Result.add_pw_multi_aff(PMA); |
| 267 | + } |
| 268 | + |
| 269 | + return isl::multi_union_pw_aff(isl::union_pw_multi_aff(Result)); |
| 270 | +} |
| 271 | + |
| 272 | +void ScopBuilder::buildSchedule() { |
| 273 | + Loop *L = getLoopSurroundingScop(*scop, LI); |
| 274 | + LoopStackTy LoopStack({LoopStackElementTy(L, nullptr, 0)}); |
| 275 | + buildSchedule(scop->getRegion().getNode(), LoopStack); |
| 276 | + assert(LoopStack.size() == 1 && LoopStack.back().L == L); |
| 277 | + scop->setScheduleTree(LoopStack[0].Schedule); |
| 278 | +} |
| 279 | + |
| 280 | +/// To generate a schedule for the elements in a Region we traverse the Region |
| 281 | +/// in reverse-post-order and add the contained RegionNodes in traversal order |
| 282 | +/// to the schedule of the loop that is currently at the top of the LoopStack. |
| 283 | +/// For loop-free codes, this results in a correct sequential ordering. |
| 284 | +/// |
| 285 | +/// Example: |
| 286 | +/// bb1(0) |
| 287 | +/// / \. |
| 288 | +/// bb2(1) bb3(2) |
| 289 | +/// \ / \. |
| 290 | +/// bb4(3) bb5(4) |
| 291 | +/// \ / |
| 292 | +/// bb6(5) |
| 293 | +/// |
| 294 | +/// Including loops requires additional processing. Whenever a loop header is |
| 295 | +/// encountered, the corresponding loop is added to the @p LoopStack. Starting |
| 296 | +/// from an empty schedule, we first process all RegionNodes that are within |
| 297 | +/// this loop and complete the sequential schedule at this loop-level before |
| 298 | +/// processing about any other nodes. To implement this |
| 299 | +/// loop-nodes-first-processing, the reverse post-order traversal is |
| 300 | +/// insufficient. Hence, we additionally check if the traversal yields |
| 301 | +/// sub-regions or blocks that are outside the last loop on the @p LoopStack. |
| 302 | +/// These region-nodes are then queue and only traverse after the all nodes |
| 303 | +/// within the current loop have been processed. |
| 304 | +void ScopBuilder::buildSchedule(Region *R, LoopStackTy &LoopStack) { |
| 305 | + Loop *OuterScopLoop = getLoopSurroundingScop(*scop, LI); |
| 306 | + |
| 307 | + ReversePostOrderTraversal<Region *> RTraversal(R); |
| 308 | + std::deque<RegionNode *> WorkList(RTraversal.begin(), RTraversal.end()); |
| 309 | + std::deque<RegionNode *> DelayList; |
| 310 | + bool LastRNWaiting = false; |
| 311 | + |
| 312 | + // Iterate over the region @p R in reverse post-order but queue |
| 313 | + // sub-regions/blocks iff they are not part of the last encountered but not |
| 314 | + // completely traversed loop. The variable LastRNWaiting is a flag to indicate |
| 315 | + // that we queued the last sub-region/block from the reverse post-order |
| 316 | + // iterator. If it is set we have to explore the next sub-region/block from |
| 317 | + // the iterator (if any) to guarantee progress. If it is not set we first try |
| 318 | + // the next queued sub-region/blocks. |
| 319 | + while (!WorkList.empty() || !DelayList.empty()) { |
| 320 | + RegionNode *RN; |
| 321 | + |
| 322 | + if ((LastRNWaiting && !WorkList.empty()) || DelayList.empty()) { |
| 323 | + RN = WorkList.front(); |
| 324 | + WorkList.pop_front(); |
| 325 | + LastRNWaiting = false; |
| 326 | + } else { |
| 327 | + RN = DelayList.front(); |
| 328 | + DelayList.pop_front(); |
| 329 | + } |
| 330 | + |
| 331 | + Loop *L = getRegionNodeLoop(RN, LI); |
| 332 | + if (!scop->contains(L)) |
| 333 | + L = OuterScopLoop; |
| 334 | + |
| 335 | + Loop *LastLoop = LoopStack.back().L; |
| 336 | + if (LastLoop != L) { |
| 337 | + if (LastLoop && !LastLoop->contains(L)) { |
| 338 | + LastRNWaiting = true; |
| 339 | + DelayList.push_back(RN); |
| 340 | + continue; |
| 341 | + } |
| 342 | + LoopStack.push_back({L, nullptr, 0}); |
| 343 | + } |
| 344 | + buildSchedule(RN, LoopStack); |
| 345 | + } |
| 346 | +} |
| 347 | + |
| 348 | +void ScopBuilder::buildSchedule(RegionNode *RN, LoopStackTy &LoopStack) { |
| 349 | + if (RN->isSubRegion()) { |
| 350 | + auto *LocalRegion = RN->getNodeAs<Region>(); |
| 351 | + if (!scop->isNonAffineSubRegion(LocalRegion)) { |
| 352 | + buildSchedule(LocalRegion, LoopStack); |
| 353 | + return; |
| 354 | + } |
| 355 | + } |
| 356 | + |
| 357 | + assert(LoopStack.rbegin() != LoopStack.rend()); |
| 358 | + auto LoopData = LoopStack.rbegin(); |
| 359 | + LoopData->NumBlocksProcessed += getNumBlocksInRegionNode(RN); |
| 360 | + |
| 361 | + for (auto *Stmt : scop->getStmtListFor(RN)) { |
| 362 | + isl::union_set UDomain{Stmt->getDomain()}; |
| 363 | + auto StmtSchedule = isl::schedule::from_domain(UDomain); |
| 364 | + LoopData->Schedule = combineInSequence(LoopData->Schedule, StmtSchedule); |
| 365 | + } |
| 366 | + |
| 367 | + // Check if we just processed the last node in this loop. If we did, finalize |
| 368 | + // the loop by: |
| 369 | + // |
| 370 | + // - adding new schedule dimensions |
| 371 | + // - folding the resulting schedule into the parent loop schedule |
| 372 | + // - dropping the loop schedule from the LoopStack. |
| 373 | + // |
| 374 | + // Then continue to check surrounding loops, which might also have been |
| 375 | + // completed by this node. |
| 376 | + size_t Dimension = LoopStack.size(); |
| 377 | + while (LoopData->L && |
| 378 | + LoopData->NumBlocksProcessed == getNumBlocksInLoop(LoopData->L)) { |
| 379 | + isl::schedule Schedule = LoopData->Schedule; |
| 380 | + auto NumBlocksProcessed = LoopData->NumBlocksProcessed; |
| 381 | + |
| 382 | + assert(std::next(LoopData) != LoopStack.rend()); |
| 383 | + ++LoopData; |
| 384 | + --Dimension; |
| 385 | + |
| 386 | + if (Schedule) { |
| 387 | + isl::union_set Domain = Schedule.get_domain(); |
| 388 | + isl::multi_union_pw_aff MUPA = mapToDimension(Domain, Dimension); |
| 389 | + Schedule = Schedule.insert_partial_schedule(MUPA); |
| 390 | + LoopData->Schedule = combineInSequence(LoopData->Schedule, Schedule); |
| 391 | + } |
| 392 | + |
| 393 | + LoopData->NumBlocksProcessed += NumBlocksProcessed; |
| 394 | + } |
| 395 | + // Now pop all loops processed up there from the LoopStack |
| 396 | + LoopStack.erase(LoopStack.begin() + Dimension, LoopStack.end()); |
| 397 | +} |
| 398 | + |
225 | 399 | void ScopBuilder::buildEscapingDependences(Instruction *Inst) {
|
226 | 400 | // Check for uses of this instruction outside the scop. Because we do not
|
227 | 401 | // iterate over such instructions and therefore did not "ensure" the existence
|
@@ -2554,7 +2728,7 @@ void ScopBuilder::buildScop(Region &R, AssumptionCache &AC) {
|
2554 | 2728 | return;
|
2555 | 2729 | }
|
2556 | 2730 |
|
2557 |
| - scop->buildSchedule(LI); |
| 2731 | + buildSchedule(); |
2558 | 2732 |
|
2559 | 2733 | finalizeAccesses();
|
2560 | 2734 |
|
|
0 commit comments