@@ -69,7 +69,6 @@ STATISTIC(NumMemSetInfer, "Number of memsets inferred");
69
69
STATISTIC (NumMoveToCpy, " Number of memmoves converted to memcpy" );
70
70
STATISTIC (NumCpyToSet, " Number of memcpys converted to memset" );
71
71
STATISTIC (NumCallSlot, " Number of call slot optimizations performed" );
72
- STATISTIC (NumStackMove, " Number of stack-move optimizations performed" );
73
72
74
73
namespace {
75
74
@@ -731,23 +730,6 @@ bool MemCpyOptPass::processStoreOfLoad(StoreInst *SI, LoadInst *LI,
731
730
return true ;
732
731
}
733
732
734
- // If this is a load-store pair from a stack slot to a stack slot, we
735
- // might be able to perform the stack-move optimization just as we do for
736
- // memcpys from an alloca to an alloca.
737
- if (auto *DestAlloca = dyn_cast<AllocaInst>(SI->getPointerOperand ())) {
738
- if (auto *SrcAlloca = dyn_cast<AllocaInst>(LI->getPointerOperand ())) {
739
- if (performStackMoveOptzn (LI, SI, DestAlloca, SrcAlloca,
740
- DL.getTypeStoreSize (T), BAA)) {
741
- // Avoid invalidating the iterator.
742
- BBI = SI->getNextNonDebugInstruction ()->getIterator ();
743
- eraseInstruction (SI);
744
- eraseInstruction (LI);
745
- ++NumMemCpyInstr;
746
- return true ;
747
- }
748
- }
749
- }
750
-
751
733
return false ;
752
734
}
753
735
@@ -1426,217 +1408,6 @@ bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
1426
1408
return true ;
1427
1409
}
1428
1410
1429
- // Attempts to optimize the pattern whereby memory is copied from an alloca to
1430
- // another alloca, where the two allocas don't have conflicting mod/ref. If
1431
- // successful, the two allocas can be merged into one and the transfer can be
1432
- // deleted. This pattern is generated frequently in Rust, due to the ubiquity of
1433
- // move operations in that language.
1434
- //
1435
- // Once we determine that the optimization is safe to perform, we replace all
1436
- // uses of the destination alloca with the source alloca. We also "shrink wrap"
1437
- // the lifetime markers of the single merged alloca to before the first use
1438
- // and after the last use. Note that the "shrink wrapping" procedure is a safe
1439
- // transformation only because we restrict the scope of this optimization to
1440
- // allocas that aren't captured.
1441
- bool MemCpyOptPass::performStackMoveOptzn (Instruction *Load, Instruction *Store,
1442
- AllocaInst *DestAlloca,
1443
- AllocaInst *SrcAlloca, uint64_t Size,
1444
- BatchAAResults &BAA) {
1445
- LLVM_DEBUG (dbgs () << " Stack Move: Attempting to optimize:\n "
1446
- << *Store << " \n " );
1447
-
1448
- // Make sure the two allocas are in the same address space.
1449
- if (SrcAlloca->getAddressSpace () != DestAlloca->getAddressSpace ()) {
1450
- LLVM_DEBUG (dbgs () << " Stack Move: Address space mismatch\n " );
1451
- return false ;
1452
- }
1453
-
1454
- // 1. Check that copy is full. Calculate the static size of the allocas to be
1455
- // merged, bail out if we can't.
1456
- const DataLayout &DL = DestAlloca->getModule ()->getDataLayout ();
1457
- std::optional<TypeSize> SrcSize = SrcAlloca->getAllocationSize (DL);
1458
- if (!SrcSize || SrcSize->isScalable () || Size != SrcSize->getFixedValue ()) {
1459
- LLVM_DEBUG (dbgs () << " Stack Move: Source alloca size mismatch\n " );
1460
- return false ;
1461
- }
1462
- std::optional<TypeSize> DestSize = DestAlloca->getAllocationSize (DL);
1463
- if (!DestSize || DestSize->isScalable () ||
1464
- Size != DestSize->getFixedValue ()) {
1465
- LLVM_DEBUG (dbgs () << " Stack Move: Destination alloca size mismatch\n " );
1466
- return false ;
1467
- }
1468
-
1469
- // 2-1. Check that src and dest are static allocas, which are not affected by
1470
- // stacksave/stackrestore.
1471
- if (!SrcAlloca->isStaticAlloca () || !DestAlloca->isStaticAlloca () ||
1472
- SrcAlloca->getParent () != Load->getParent () ||
1473
- SrcAlloca->getParent () != Store->getParent ())
1474
- return false ;
1475
-
1476
- // 2-2. Check that src and dest are never captured, unescaped allocas. Also
1477
- // collect lifetime markers first/last users in order to shrink wrap the
1478
- // lifetimes, and instructions with noalias metadata to remove them.
1479
-
1480
- SmallVector<Instruction *, 4 > LifetimeMarkers;
1481
- Instruction *FirstUser = nullptr , *LastUser = nullptr ;
1482
- SmallSet<Instruction *, 4 > NoAliasInstrs;
1483
-
1484
- // Recursively track the user and check whether modified alias exist.
1485
- auto IsDereferenceableOrNull = [](Value *V, const DataLayout &DL) -> bool {
1486
- bool CanBeNull, CanBeFreed;
1487
- return V->getPointerDereferenceableBytes (DL, CanBeNull, CanBeFreed);
1488
- };
1489
-
1490
- auto CaptureTrackingWithModRef =
1491
- [&](Instruction *AI,
1492
- function_ref<bool (Instruction *)> ModRefCallback) -> bool {
1493
- SmallVector<Instruction *, 8 > Worklist;
1494
- Worklist.push_back (AI);
1495
- unsigned MaxUsesToExplore = getDefaultMaxUsesToExploreForCaptureTracking ();
1496
- Worklist.reserve (MaxUsesToExplore);
1497
- SmallSet<const Use *, 20 > Visited;
1498
- while (!Worklist.empty ()) {
1499
- Instruction *I = Worklist.back ();
1500
- Worklist.pop_back ();
1501
- for (const Use &U : I->uses ()) {
1502
- if (Visited.size () >= MaxUsesToExplore) {
1503
- LLVM_DEBUG (
1504
- dbgs ()
1505
- << " Stack Move: Exceeded max uses to see ModRef, bailing\n " );
1506
- return false ;
1507
- }
1508
- if (!Visited.insert (&U).second )
1509
- continue ;
1510
- switch (DetermineUseCaptureKind (U, IsDereferenceableOrNull)) {
1511
- case UseCaptureKind::MAY_CAPTURE:
1512
- return false ;
1513
- case UseCaptureKind::PASSTHROUGH:
1514
- // Instructions cannot have non-instruction users.
1515
- Worklist.push_back (cast<Instruction>(U.getUser ()));
1516
- continue ;
1517
- case UseCaptureKind::NO_CAPTURE: {
1518
- auto *UI = cast<Instruction>(U.getUser ());
1519
- if (DestAlloca->getParent () != UI->getParent ())
1520
- return false ;
1521
- if (!FirstUser || UI->comesBefore (FirstUser))
1522
- FirstUser = UI;
1523
- if (!LastUser || LastUser->comesBefore (UI))
1524
- LastUser = UI;
1525
- if (UI->isLifetimeStartOrEnd ()) {
1526
- // We note the locations of these intrinsic calls so that we can
1527
- // delete them later if the optimization succeeds, this is safe
1528
- // since both llvm.lifetime.start and llvm.lifetime.end intrinsics
1529
- // conceptually fill all the bytes of the alloca with an undefined
1530
- // value.
1531
- int64_t Size = cast<ConstantInt>(UI->getOperand (0 ))->getSExtValue ();
1532
- if (Size < 0 || Size == DestSize) {
1533
- LifetimeMarkers.push_back (UI);
1534
- continue ;
1535
- }
1536
- }
1537
- if (UI->hasMetadata (LLVMContext::MD_noalias))
1538
- NoAliasInstrs.insert (UI);
1539
- if (!ModRefCallback (UI))
1540
- return false ;
1541
- }
1542
- }
1543
- }
1544
- }
1545
- return true ;
1546
- };
1547
-
1548
- // 3. Check that dest has no Mod/Ref, except full size lifetime intrinsics,
1549
- // from the alloca to the Store.
1550
- ModRefInfo DestModRef = ModRefInfo::NoModRef;
1551
- MemoryLocation DestLoc (DestAlloca, LocationSize::precise (Size));
1552
- auto DestModRefCallback = [&](Instruction *UI) -> bool {
1553
- // We don't care about the store itself.
1554
- if (UI == Store)
1555
- return true ;
1556
- ModRefInfo Res = BAA.getModRefInfo (UI, DestLoc);
1557
- // FIXME: For multi-BB cases, we need to see reachability from it to
1558
- // store.
1559
- // Bailout if Dest may have any ModRef before Store.
1560
- if (UI->comesBefore (Store) && isModOrRefSet (Res))
1561
- return false ;
1562
- DestModRef |= BAA.getModRefInfo (UI, DestLoc);
1563
-
1564
- return true ;
1565
- };
1566
-
1567
- if (!CaptureTrackingWithModRef (DestAlloca, DestModRefCallback))
1568
- return false ;
1569
-
1570
- // 3. Check that, from after the Load to the end of the BB,
1571
- // 3-1. if the dest has any Mod, src has no Ref, and
1572
- // 3-2. if the dest has any Ref, src has no Mod except full-sized lifetimes.
1573
- MemoryLocation SrcLoc (SrcAlloca, LocationSize::precise (Size));
1574
-
1575
- auto SrcModRefCallback = [&](Instruction *UI) -> bool {
1576
- // Any ModRef before Load doesn't matter, also Load and Store can be
1577
- // ignored.
1578
- if (UI->comesBefore (Load) || UI == Load || UI == Store)
1579
- return true ;
1580
- ModRefInfo Res = BAA.getModRefInfo (UI, SrcLoc);
1581
- if ((isModSet (DestModRef) && isRefSet (Res)) ||
1582
- (isRefSet (DestModRef) && isModSet (Res)))
1583
- return false ;
1584
-
1585
- return true ;
1586
- };
1587
-
1588
- if (!CaptureTrackingWithModRef (SrcAlloca, SrcModRefCallback))
1589
- return false ;
1590
-
1591
- // We can do the transformation. First, align the allocas appropriately.
1592
- SrcAlloca->setAlignment (
1593
- std::max (SrcAlloca->getAlign (), DestAlloca->getAlign ()));
1594
-
1595
- // Merge the two allocas.
1596
- DestAlloca->replaceAllUsesWith (SrcAlloca);
1597
- eraseInstruction (DestAlloca);
1598
-
1599
- // Drop metadata on the source alloca.
1600
- SrcAlloca->dropUnknownNonDebugMetadata ();
1601
-
1602
- // Do "shrink wrap" the lifetimes, if the original lifetime intrinsics exists.
1603
- if (!LifetimeMarkers.empty ()) {
1604
- LLVMContext &C = SrcAlloca->getContext ();
1605
- IRBuilder<> Builder (C);
1606
-
1607
- ConstantInt *AllocaSize = ConstantInt::get (Type::getInt64Ty (C), Size);
1608
- // Create a new lifetime start marker before the first user of src or alloca
1609
- // users.
1610
- Builder.SetInsertPoint (FirstUser->getParent (), FirstUser->getIterator ());
1611
- Builder.CreateLifetimeStart (SrcAlloca, AllocaSize);
1612
-
1613
- // Create a new lifetime end marker after the last user of src or alloca
1614
- // users.
1615
- // FIXME: If the last user is the terminator for the bb, we can insert
1616
- // lifetime.end marker to the immidiate post-dominator, but currently do
1617
- // nothing.
1618
- if (!LastUser->isTerminator ()) {
1619
- Builder.SetInsertPoint (LastUser->getParent (), ++LastUser->getIterator ());
1620
- Builder.CreateLifetimeEnd (SrcAlloca, AllocaSize);
1621
- }
1622
-
1623
- // Remove all other lifetime markers.
1624
- for (Instruction *I : LifetimeMarkers)
1625
- eraseInstruction (I);
1626
- }
1627
-
1628
- // As this transformation can cause memory accesses that didn't previously
1629
- // alias to begin to alias one another, we remove !noalias metadata from any
1630
- // uses of either alloca. This is conservative, but more precision doesn't
1631
- // seem worthwhile right now.
1632
- for (Instruction *I : NoAliasInstrs)
1633
- I->setMetadata (LLVMContext::MD_noalias, nullptr );
1634
-
1635
- LLVM_DEBUG (dbgs () << " Stack Move: Performed staack-move optimization\n " );
1636
- NumStackMove++;
1637
- return true ;
1638
- }
1639
-
1640
1411
// / Perform simplification of memcpy's. If we have memcpy A
1641
1412
// / which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite
1642
1413
// / B to be a memcpy from X to Z (or potentially a memmove, depending on
@@ -1693,14 +1464,13 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
1693
1464
MemoryAccess *SrcClobber = MSSA->getWalker ()->getClobberingMemoryAccess (
1694
1465
AnyClobber, MemoryLocation::getForSource (M), BAA);
1695
1466
1696
- // There are five possible optimizations we can do for memcpy:
1467
+ // There are four possible optimizations we can do for memcpy:
1697
1468
// a) memcpy-memcpy xform which exposes redundance for DSE.
1698
1469
// b) call-memcpy xform for return slot optimization.
1699
1470
// c) memcpy from freshly alloca'd space or space that has just started
1700
1471
// its lifetime copies undefined data, and we can therefore eliminate
1701
1472
// the memcpy in favor of the data that was already at the destination.
1702
1473
// d) memcpy from a just-memset'd source can be turned into memset.
1703
- // e) elimination of memcpy via stack-move optimization.
1704
1474
if (auto *MD = dyn_cast<MemoryDef>(SrcClobber)) {
1705
1475
if (Instruction *MI = MD->getMemoryInst ()) {
1706
1476
if (auto *CopySize = dyn_cast<ConstantInt>(M->getLength ())) {
@@ -1719,8 +1489,7 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
1719
1489
}
1720
1490
}
1721
1491
if (auto *MDep = dyn_cast<MemCpyInst>(MI))
1722
- if (processMemCpyMemCpyDependence (M, MDep, BAA))
1723
- return true ;
1492
+ return processMemCpyMemCpyDependence (M, MDep, BAA);
1724
1493
if (auto *MDep = dyn_cast<MemSetInst>(MI)) {
1725
1494
if (performMemCpyToMemSetOptzn (M, MDep, BAA)) {
1726
1495
LLVM_DEBUG (dbgs () << " Converted memcpy to memset\n " );
@@ -1739,27 +1508,6 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
1739
1508
}
1740
1509
}
1741
1510
1742
- // If the transfer is from a stack slot to a stack slot, then we may be able
1743
- // to perform the stack-move optimization. See the comments in
1744
- // performStackMoveOptzn() for more details.
1745
- auto *DestAlloca = dyn_cast<AllocaInst>(M->getDest ());
1746
- if (!DestAlloca)
1747
- return false ;
1748
- auto *SrcAlloca = dyn_cast<AllocaInst>(M->getSource ());
1749
- if (!SrcAlloca)
1750
- return false ;
1751
- ConstantInt *Len = dyn_cast<ConstantInt>(M->getLength ());
1752
- if (Len == nullptr )
1753
- return false ;
1754
- if (performStackMoveOptzn (M, M, DestAlloca, SrcAlloca, Len->getZExtValue (),
1755
- BAA)) {
1756
- // Avoid invalidating the iterator.
1757
- BBI = M->getNextNonDebugInstruction ()->getIterator ();
1758
- eraseInstruction (M);
1759
- ++NumMemCpyInstr;
1760
- return true ;
1761
- }
1762
-
1763
1511
return false ;
1764
1512
}
1765
1513
0 commit comments