Skip to content

Commit 99ed472

Browse files
vitalybukatru
authored andcommitted
Revert "Reapply: [MemCpyOpt] implement single BB stack-move optimization which unify the static unescaped allocas"""
Breaks Asan and LTO. This reverts commit ea72b51. (cherry picked from commit 0065388)
1 parent 8ff2643 commit 99ed472

File tree

4 files changed

+114
-299
lines changed

4 files changed

+114
-299
lines changed

llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
namespace llvm {
2121

2222
class AAResults;
23-
class AllocaInst;
2423
class BatchAAResults;
2524
class AssumptionCache;
2625
class CallBase;
@@ -78,9 +77,6 @@ class MemCpyOptPass : public PassInfoMixin<MemCpyOptPass> {
7877
Instruction *tryMergingIntoMemset(Instruction *I, Value *StartPtr,
7978
Value *ByteVal);
8079
bool moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI);
81-
bool performStackMoveOptzn(Instruction *Load, Instruction *Store,
82-
AllocaInst *DestAlloca, AllocaInst *SrcAlloca,
83-
uint64_t Size, BatchAAResults &BAA);
8480

8581
void eraseInstruction(Instruction *I);
8682
bool iterateOnFunction(Function &F);

llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp

Lines changed: 2 additions & 254 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,6 @@ STATISTIC(NumMemSetInfer, "Number of memsets inferred");
6969
STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy");
7070
STATISTIC(NumCpyToSet, "Number of memcpys converted to memset");
7171
STATISTIC(NumCallSlot, "Number of call slot optimizations performed");
72-
STATISTIC(NumStackMove, "Number of stack-move optimizations performed");
7372

7473
namespace {
7574

@@ -731,23 +730,6 @@ bool MemCpyOptPass::processStoreOfLoad(StoreInst *SI, LoadInst *LI,
731730
return true;
732731
}
733732

734-
// If this is a load-store pair from a stack slot to a stack slot, we
735-
// might be able to perform the stack-move optimization just as we do for
736-
// memcpys from an alloca to an alloca.
737-
if (auto *DestAlloca = dyn_cast<AllocaInst>(SI->getPointerOperand())) {
738-
if (auto *SrcAlloca = dyn_cast<AllocaInst>(LI->getPointerOperand())) {
739-
if (performStackMoveOptzn(LI, SI, DestAlloca, SrcAlloca,
740-
DL.getTypeStoreSize(T), BAA)) {
741-
// Avoid invalidating the iterator.
742-
BBI = SI->getNextNonDebugInstruction()->getIterator();
743-
eraseInstruction(SI);
744-
eraseInstruction(LI);
745-
++NumMemCpyInstr;
746-
return true;
747-
}
748-
}
749-
}
750-
751733
return false;
752734
}
753735

@@ -1426,217 +1408,6 @@ bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
14261408
return true;
14271409
}
14281410

1429-
// Attempts to optimize the pattern whereby memory is copied from an alloca to
1430-
// another alloca, where the two allocas don't have conflicting mod/ref. If
1431-
// successful, the two allocas can be merged into one and the transfer can be
1432-
// deleted. This pattern is generated frequently in Rust, due to the ubiquity of
1433-
// move operations in that language.
1434-
//
1435-
// Once we determine that the optimization is safe to perform, we replace all
1436-
// uses of the destination alloca with the source alloca. We also "shrink wrap"
1437-
// the lifetime markers of the single merged alloca to before the first use
1438-
// and after the last use. Note that the "shrink wrapping" procedure is a safe
1439-
// transformation only because we restrict the scope of this optimization to
1440-
// allocas that aren't captured.
1441-
bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
1442-
AllocaInst *DestAlloca,
1443-
AllocaInst *SrcAlloca, uint64_t Size,
1444-
BatchAAResults &BAA) {
1445-
LLVM_DEBUG(dbgs() << "Stack Move: Attempting to optimize:\n"
1446-
<< *Store << "\n");
1447-
1448-
// Make sure the two allocas are in the same address space.
1449-
if (SrcAlloca->getAddressSpace() != DestAlloca->getAddressSpace()) {
1450-
LLVM_DEBUG(dbgs() << "Stack Move: Address space mismatch\n");
1451-
return false;
1452-
}
1453-
1454-
// 1. Check that copy is full. Calculate the static size of the allocas to be
1455-
// merged, bail out if we can't.
1456-
const DataLayout &DL = DestAlloca->getModule()->getDataLayout();
1457-
std::optional<TypeSize> SrcSize = SrcAlloca->getAllocationSize(DL);
1458-
if (!SrcSize || SrcSize->isScalable() || Size != SrcSize->getFixedValue()) {
1459-
LLVM_DEBUG(dbgs() << "Stack Move: Source alloca size mismatch\n");
1460-
return false;
1461-
}
1462-
std::optional<TypeSize> DestSize = DestAlloca->getAllocationSize(DL);
1463-
if (!DestSize || DestSize->isScalable() ||
1464-
Size != DestSize->getFixedValue()) {
1465-
LLVM_DEBUG(dbgs() << "Stack Move: Destination alloca size mismatch\n");
1466-
return false;
1467-
}
1468-
1469-
// 2-1. Check that src and dest are static allocas, which are not affected by
1470-
// stacksave/stackrestore.
1471-
if (!SrcAlloca->isStaticAlloca() || !DestAlloca->isStaticAlloca() ||
1472-
SrcAlloca->getParent() != Load->getParent() ||
1473-
SrcAlloca->getParent() != Store->getParent())
1474-
return false;
1475-
1476-
// 2-2. Check that src and dest are never captured, unescaped allocas. Also
1477-
// collect lifetime markers first/last users in order to shrink wrap the
1478-
// lifetimes, and instructions with noalias metadata to remove them.
1479-
1480-
SmallVector<Instruction *, 4> LifetimeMarkers;
1481-
Instruction *FirstUser = nullptr, *LastUser = nullptr;
1482-
SmallSet<Instruction *, 4> NoAliasInstrs;
1483-
1484-
// Recursively track the user and check whether modified alias exist.
1485-
auto IsDereferenceableOrNull = [](Value *V, const DataLayout &DL) -> bool {
1486-
bool CanBeNull, CanBeFreed;
1487-
return V->getPointerDereferenceableBytes(DL, CanBeNull, CanBeFreed);
1488-
};
1489-
1490-
auto CaptureTrackingWithModRef =
1491-
[&](Instruction *AI,
1492-
function_ref<bool(Instruction *)> ModRefCallback) -> bool {
1493-
SmallVector<Instruction *, 8> Worklist;
1494-
Worklist.push_back(AI);
1495-
unsigned MaxUsesToExplore = getDefaultMaxUsesToExploreForCaptureTracking();
1496-
Worklist.reserve(MaxUsesToExplore);
1497-
SmallSet<const Use *, 20> Visited;
1498-
while (!Worklist.empty()) {
1499-
Instruction *I = Worklist.back();
1500-
Worklist.pop_back();
1501-
for (const Use &U : I->uses()) {
1502-
if (Visited.size() >= MaxUsesToExplore) {
1503-
LLVM_DEBUG(
1504-
dbgs()
1505-
<< "Stack Move: Exceeded max uses to see ModRef, bailing\n");
1506-
return false;
1507-
}
1508-
if (!Visited.insert(&U).second)
1509-
continue;
1510-
switch (DetermineUseCaptureKind(U, IsDereferenceableOrNull)) {
1511-
case UseCaptureKind::MAY_CAPTURE:
1512-
return false;
1513-
case UseCaptureKind::PASSTHROUGH:
1514-
// Instructions cannot have non-instruction users.
1515-
Worklist.push_back(cast<Instruction>(U.getUser()));
1516-
continue;
1517-
case UseCaptureKind::NO_CAPTURE: {
1518-
auto *UI = cast<Instruction>(U.getUser());
1519-
if (DestAlloca->getParent() != UI->getParent())
1520-
return false;
1521-
if (!FirstUser || UI->comesBefore(FirstUser))
1522-
FirstUser = UI;
1523-
if (!LastUser || LastUser->comesBefore(UI))
1524-
LastUser = UI;
1525-
if (UI->isLifetimeStartOrEnd()) {
1526-
// We note the locations of these intrinsic calls so that we can
1527-
// delete them later if the optimization succeeds, this is safe
1528-
// since both llvm.lifetime.start and llvm.lifetime.end intrinsics
1529-
// conceptually fill all the bytes of the alloca with an undefined
1530-
// value.
1531-
int64_t Size = cast<ConstantInt>(UI->getOperand(0))->getSExtValue();
1532-
if (Size < 0 || Size == DestSize) {
1533-
LifetimeMarkers.push_back(UI);
1534-
continue;
1535-
}
1536-
}
1537-
if (UI->hasMetadata(LLVMContext::MD_noalias))
1538-
NoAliasInstrs.insert(UI);
1539-
if (!ModRefCallback(UI))
1540-
return false;
1541-
}
1542-
}
1543-
}
1544-
}
1545-
return true;
1546-
};
1547-
1548-
// 3. Check that dest has no Mod/Ref, except full size lifetime intrinsics,
1549-
// from the alloca to the Store.
1550-
ModRefInfo DestModRef = ModRefInfo::NoModRef;
1551-
MemoryLocation DestLoc(DestAlloca, LocationSize::precise(Size));
1552-
auto DestModRefCallback = [&](Instruction *UI) -> bool {
1553-
// We don't care about the store itself.
1554-
if (UI == Store)
1555-
return true;
1556-
ModRefInfo Res = BAA.getModRefInfo(UI, DestLoc);
1557-
// FIXME: For multi-BB cases, we need to see reachability from it to
1558-
// store.
1559-
// Bailout if Dest may have any ModRef before Store.
1560-
if (UI->comesBefore(Store) && isModOrRefSet(Res))
1561-
return false;
1562-
DestModRef |= BAA.getModRefInfo(UI, DestLoc);
1563-
1564-
return true;
1565-
};
1566-
1567-
if (!CaptureTrackingWithModRef(DestAlloca, DestModRefCallback))
1568-
return false;
1569-
1570-
// 3. Check that, from after the Load to the end of the BB,
1571-
// 3-1. if the dest has any Mod, src has no Ref, and
1572-
// 3-2. if the dest has any Ref, src has no Mod except full-sized lifetimes.
1573-
MemoryLocation SrcLoc(SrcAlloca, LocationSize::precise(Size));
1574-
1575-
auto SrcModRefCallback = [&](Instruction *UI) -> bool {
1576-
// Any ModRef before Load doesn't matter, also Load and Store can be
1577-
// ignored.
1578-
if (UI->comesBefore(Load) || UI == Load || UI == Store)
1579-
return true;
1580-
ModRefInfo Res = BAA.getModRefInfo(UI, SrcLoc);
1581-
if ((isModSet(DestModRef) && isRefSet(Res)) ||
1582-
(isRefSet(DestModRef) && isModSet(Res)))
1583-
return false;
1584-
1585-
return true;
1586-
};
1587-
1588-
if (!CaptureTrackingWithModRef(SrcAlloca, SrcModRefCallback))
1589-
return false;
1590-
1591-
// We can do the transformation. First, align the allocas appropriately.
1592-
SrcAlloca->setAlignment(
1593-
std::max(SrcAlloca->getAlign(), DestAlloca->getAlign()));
1594-
1595-
// Merge the two allocas.
1596-
DestAlloca->replaceAllUsesWith(SrcAlloca);
1597-
eraseInstruction(DestAlloca);
1598-
1599-
// Drop metadata on the source alloca.
1600-
SrcAlloca->dropUnknownNonDebugMetadata();
1601-
1602-
// Do "shrink wrap" the lifetimes, if the original lifetime intrinsics exists.
1603-
if (!LifetimeMarkers.empty()) {
1604-
LLVMContext &C = SrcAlloca->getContext();
1605-
IRBuilder<> Builder(C);
1606-
1607-
ConstantInt *AllocaSize = ConstantInt::get(Type::getInt64Ty(C), Size);
1608-
// Create a new lifetime start marker before the first user of src or alloca
1609-
// users.
1610-
Builder.SetInsertPoint(FirstUser->getParent(), FirstUser->getIterator());
1611-
Builder.CreateLifetimeStart(SrcAlloca, AllocaSize);
1612-
1613-
// Create a new lifetime end marker after the last user of src or alloca
1614-
// users.
1615-
// FIXME: If the last user is the terminator for the bb, we can insert
1616-
// lifetime.end marker to the immidiate post-dominator, but currently do
1617-
// nothing.
1618-
if (!LastUser->isTerminator()) {
1619-
Builder.SetInsertPoint(LastUser->getParent(), ++LastUser->getIterator());
1620-
Builder.CreateLifetimeEnd(SrcAlloca, AllocaSize);
1621-
}
1622-
1623-
// Remove all other lifetime markers.
1624-
for (Instruction *I : LifetimeMarkers)
1625-
eraseInstruction(I);
1626-
}
1627-
1628-
// As this transformation can cause memory accesses that didn't previously
1629-
// alias to begin to alias one another, we remove !noalias metadata from any
1630-
// uses of either alloca. This is conservative, but more precision doesn't
1631-
// seem worthwhile right now.
1632-
for (Instruction *I : NoAliasInstrs)
1633-
I->setMetadata(LLVMContext::MD_noalias, nullptr);
1634-
1635-
LLVM_DEBUG(dbgs() << "Stack Move: Performed staack-move optimization\n");
1636-
NumStackMove++;
1637-
return true;
1638-
}
1639-
16401411
/// Perform simplification of memcpy's. If we have memcpy A
16411412
/// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite
16421413
/// B to be a memcpy from X to Z (or potentially a memmove, depending on
@@ -1693,14 +1464,13 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
16931464
MemoryAccess *SrcClobber = MSSA->getWalker()->getClobberingMemoryAccess(
16941465
AnyClobber, MemoryLocation::getForSource(M), BAA);
16951466

1696-
// There are five possible optimizations we can do for memcpy:
1467+
// There are four possible optimizations we can do for memcpy:
16971468
// a) memcpy-memcpy xform which exposes redundance for DSE.
16981469
// b) call-memcpy xform for return slot optimization.
16991470
// c) memcpy from freshly alloca'd space or space that has just started
17001471
// its lifetime copies undefined data, and we can therefore eliminate
17011472
// the memcpy in favor of the data that was already at the destination.
17021473
// d) memcpy from a just-memset'd source can be turned into memset.
1703-
// e) elimination of memcpy via stack-move optimization.
17041474
if (auto *MD = dyn_cast<MemoryDef>(SrcClobber)) {
17051475
if (Instruction *MI = MD->getMemoryInst()) {
17061476
if (auto *CopySize = dyn_cast<ConstantInt>(M->getLength())) {
@@ -1719,8 +1489,7 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
17191489
}
17201490
}
17211491
if (auto *MDep = dyn_cast<MemCpyInst>(MI))
1722-
if (processMemCpyMemCpyDependence(M, MDep, BAA))
1723-
return true;
1492+
return processMemCpyMemCpyDependence(M, MDep, BAA);
17241493
if (auto *MDep = dyn_cast<MemSetInst>(MI)) {
17251494
if (performMemCpyToMemSetOptzn(M, MDep, BAA)) {
17261495
LLVM_DEBUG(dbgs() << "Converted memcpy to memset\n");
@@ -1739,27 +1508,6 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
17391508
}
17401509
}
17411510

1742-
// If the transfer is from a stack slot to a stack slot, then we may be able
1743-
// to perform the stack-move optimization. See the comments in
1744-
// performStackMoveOptzn() for more details.
1745-
auto *DestAlloca = dyn_cast<AllocaInst>(M->getDest());
1746-
if (!DestAlloca)
1747-
return false;
1748-
auto *SrcAlloca = dyn_cast<AllocaInst>(M->getSource());
1749-
if (!SrcAlloca)
1750-
return false;
1751-
ConstantInt *Len = dyn_cast<ConstantInt>(M->getLength());
1752-
if (Len == nullptr)
1753-
return false;
1754-
if (performStackMoveOptzn(M, M, DestAlloca, SrcAlloca, Len->getZExtValue(),
1755-
BAA)) {
1756-
// Avoid invalidating the iterator.
1757-
BBI = M->getNextNonDebugInstruction()->getIterator();
1758-
eraseInstruction(M);
1759-
++NumMemCpyInstr;
1760-
return true;
1761-
}
1762-
17631511
return false;
17641512
}
17651513

llvm/test/Transforms/MemCpyOpt/callslot.ll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,9 +56,11 @@ define void @write_dest_between_call_and_memcpy() {
5656

5757
define void @write_src_between_call_and_memcpy() {
5858
; CHECK-LABEL: @write_src_between_call_and_memcpy(
59+
; CHECK-NEXT: [[DEST:%.*]] = alloca [16 x i8], align 1
5960
; CHECK-NEXT: [[SRC:%.*]] = alloca [16 x i8], align 1
6061
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[SRC]], i8 0, i64 16, i1 false)
6162
; CHECK-NEXT: store i8 1, ptr [[SRC]], align 1
63+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[DEST]], ptr [[SRC]], i64 16, i1 false)
6264
; CHECK-NEXT: ret void
6365
;
6466
%dest = alloca [16 x i8]

0 commit comments

Comments
 (0)