@@ -234,6 +234,7 @@ static bool hasAnalyzableMemoryWrite(Instruction *I,
234
234
case Intrinsic::memset_element_unordered_atomic:
235
235
case Intrinsic::init_trampoline:
236
236
case Intrinsic::lifetime_end:
237
+ case Intrinsic::masked_store:
237
238
return true ;
238
239
}
239
240
}
@@ -257,8 +258,8 @@ static bool hasAnalyzableMemoryWrite(Instruction *I,
257
258
// / Return a Location stored to by the specified instruction. If isRemovable
258
259
// / returns true, this function and getLocForRead completely describe the memory
259
260
// / operations for this instruction.
260
- static MemoryLocation getLocForWrite (Instruction *Inst) {
261
-
261
+ static MemoryLocation getLocForWrite (Instruction *Inst,
262
+ const TargetLibraryInfo &TLI) {
262
263
if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
263
264
return MemoryLocation::get (SI);
264
265
@@ -274,6 +275,8 @@ static MemoryLocation getLocForWrite(Instruction *Inst) {
274
275
return MemoryLocation (); // Unhandled intrinsic.
275
276
case Intrinsic::init_trampoline:
276
277
return MemoryLocation (II->getArgOperand (0 ));
278
+ case Intrinsic::masked_store:
279
+ return MemoryLocation::getForArgument (II, 1 , TLI);
277
280
case Intrinsic::lifetime_end: {
278
281
uint64_t Len = cast<ConstantInt>(II->getArgOperand (0 ))->getZExtValue ();
279
282
return MemoryLocation (II->getArgOperand (1 ), Len);
@@ -325,6 +328,7 @@ static bool isRemovable(Instruction *I) {
325
328
case Intrinsic::memcpy_element_unordered_atomic:
326
329
case Intrinsic::memmove_element_unordered_atomic:
327
330
case Intrinsic::memset_element_unordered_atomic:
331
+ case Intrinsic::masked_store:
328
332
return true ;
329
333
}
330
334
}
@@ -370,9 +374,10 @@ static bool isShortenableAtTheBeginning(Instruction *I) {
370
374
}
371
375
372
376
// / Return the pointer that is being written to.
373
- static Value *getStoredPointerOperand (Instruction *I) {
377
+ static Value *getStoredPointerOperand (Instruction *I,
378
+ const TargetLibraryInfo &TLI) {
374
379
// TODO: factor this to reuse getLocForWrite
375
- MemoryLocation Loc = getLocForWrite (I);
380
+ MemoryLocation Loc = getLocForWrite (I, TLI );
376
381
assert (Loc.Ptr &&
377
382
" unable to find pointer written for analyzable instruction?" );
378
383
// TODO: most APIs don't expect const Value *
@@ -487,6 +492,24 @@ isOverwrite(const MemoryLocation &Later, const MemoryLocation &Earlier,
487
492
return OW_MaybePartial;
488
493
}
489
494
495
+ static OverwriteResult isMaskedStoreOverwrite (Instruction *Later,
496
+ Instruction *Earlier) {
497
+ auto *IIL = dyn_cast<IntrinsicInst>(Later);
498
+ auto *IIE = dyn_cast<IntrinsicInst>(Earlier);
499
+ if (IIL == nullptr || IIE == nullptr )
500
+ return OW_Unknown;
501
+ if (IIL->getIntrinsicID () != Intrinsic::masked_store ||
502
+ IIE->getIntrinsicID () != Intrinsic::masked_store)
503
+ return OW_Unknown;
504
+ // Pointers.
505
+ if (IIL->getArgOperand (1 ) != IIE->getArgOperand (1 ))
506
+ return OW_Unknown;
507
+ // Masks.
508
+ if (IIL->getArgOperand (3 ) != IIE->getArgOperand (3 ))
509
+ return OW_Unknown;
510
+ return OW_Complete;
511
+ }
512
+
490
513
// / Return 'OW_Complete' if a store to the 'Later' location completely
491
514
// / overwrites a store to the 'Earlier' location, 'OW_End' if the end of the
492
515
// / 'Earlier' location is completely overwritten by 'Later', 'OW_Begin' if the
@@ -796,7 +819,7 @@ static bool handleFree(CallInst *F, AliasAnalysis *AA,
796
819
break ;
797
820
798
821
Value *DepPointer =
799
- getUnderlyingObject (getStoredPointerOperand (Dependency));
822
+ getUnderlyingObject (getStoredPointerOperand (Dependency, *TLI ));
800
823
801
824
// Check for aliasing.
802
825
if (!AA->isMustAlias (F->getArgOperand (0 ), DepPointer))
@@ -902,7 +925,7 @@ static bool handleEndBlock(BasicBlock &BB, AliasAnalysis *AA,
902
925
if (hasAnalyzableMemoryWrite (&*BBI, *TLI) && isRemovable (&*BBI)) {
903
926
// See through pointer-to-pointer bitcasts
904
927
SmallVector<const Value *, 4 > Pointers;
905
- getUnderlyingObjects (getStoredPointerOperand (&*BBI), Pointers);
928
+ getUnderlyingObjects (getStoredPointerOperand (&*BBI, *TLI ), Pointers);
906
929
907
930
// Stores to stack values are valid candidates for removal.
908
931
bool AllDead = true ;
@@ -1119,11 +1142,12 @@ static bool tryToShortenBegin(Instruction *EarlierWrite,
1119
1142
}
1120
1143
1121
1144
static bool removePartiallyOverlappedStores (const DataLayout &DL,
1122
- InstOverlapIntervalsTy &IOL) {
1145
+ InstOverlapIntervalsTy &IOL,
1146
+ const TargetLibraryInfo &TLI) {
1123
1147
bool Changed = false ;
1124
1148
for (auto OI : IOL) {
1125
1149
Instruction *EarlierWrite = OI.first ;
1126
- MemoryLocation Loc = getLocForWrite (EarlierWrite);
1150
+ MemoryLocation Loc = getLocForWrite (EarlierWrite, TLI );
1127
1151
assert (isRemovable (EarlierWrite) && " Expect only removable instruction" );
1128
1152
1129
1153
const Value *Ptr = Loc.Ptr ->stripPointerCasts ();
@@ -1284,7 +1308,7 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
1284
1308
continue ;
1285
1309
1286
1310
// Figure out what location is being stored to.
1287
- MemoryLocation Loc = getLocForWrite (Inst);
1311
+ MemoryLocation Loc = getLocForWrite (Inst, *TLI );
1288
1312
1289
1313
// If we didn't get a useful location, fail.
1290
1314
if (!Loc.Ptr )
@@ -1308,7 +1332,7 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
1308
1332
Instruction *DepWrite = InstDep.getInst ();
1309
1333
if (!hasAnalyzableMemoryWrite (DepWrite, *TLI))
1310
1334
break ;
1311
- MemoryLocation DepLoc = getLocForWrite (DepWrite);
1335
+ MemoryLocation DepLoc = getLocForWrite (DepWrite, *TLI );
1312
1336
// If we didn't get a useful location, or if it isn't a size, bail out.
1313
1337
if (!DepLoc.Ptr )
1314
1338
break ;
@@ -1352,6 +1376,11 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
1352
1376
int64_t InstWriteOffset, DepWriteOffset;
1353
1377
OverwriteResult OR = isOverwrite (Loc, DepLoc, DL, *TLI, DepWriteOffset,
1354
1378
InstWriteOffset, *AA, BB.getParent ());
1379
+ if (OR == OW_Unknown) {
1380
+ // isOverwrite punts on MemoryLocations with an imprecise size, such
1381
+ // as masked stores. Handle this here, somwewhat inelegantly.
1382
+ OR = isMaskedStoreOverwrite (Inst, DepWrite);
1383
+ }
1355
1384
if (OR == OW_MaybePartial)
1356
1385
OR = isPartialOverwrite (Loc, DepLoc, DepWriteOffset, InstWriteOffset,
1357
1386
DepWrite, IOL);
@@ -1433,7 +1462,7 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
1433
1462
}
1434
1463
1435
1464
if (EnablePartialOverwriteTracking)
1436
- MadeChange |= removePartiallyOverlappedStores (DL, IOL);
1465
+ MadeChange |= removePartiallyOverlappedStores (DL, IOL, *TLI );
1437
1466
1438
1467
// If this block ends in a return, unwind, or unreachable, all allocas are
1439
1468
// dead at its end, which means stores to them are also dead.
@@ -2494,7 +2523,7 @@ bool eliminateDeadStoresMemorySSA(Function &F, AliasAnalysis &AA,
2494
2523
2495
2524
if (EnablePartialOverwriteTracking)
2496
2525
for (auto &KV : State.IOLs )
2497
- MadeChange |= removePartiallyOverlappedStores (State.DL , KV.second );
2526
+ MadeChange |= removePartiallyOverlappedStores (State.DL , KV.second , TLI );
2498
2527
2499
2528
MadeChange |= State.eliminateDeadWritesAtEndOfFunction ();
2500
2529
return MadeChange;
0 commit comments