Skip to content

Commit cfd594f

Browse files
committed
[SROA] isVectorPromotionViable(): memory intrinsics operate on vectors of bytes (take 3)
* This is a recommit of 3c4d2a0, * which was reverted in 25f01d5, because it exposed a miscompile in PPC backend, which was resolved in https://reviews.llvm.org/D140089 / cb3f415. * which was a recommit of cf624b2, * which was reverted in 5cfc22c, because the cut-off on the number of vector elements was not low enough, and it triggered both SDAG SDNode operand number assertions, 5and caused compile time explosions in some cases. Let's try with something really *REALLY* conservative first, just to get somewhere, and try to bump it later. FIXME: should this respect TTI reg width * num vec regs? Original commit message: Now, there's a big caveat here - these bytes are abstract bytes, not the i8 we have in LLVM, so strictly speaking this is not exactly legal, see e.g. AliveToolkit/alive2#860 ^ the "bytes" "could" have been a pointer, and loading it as an integer inserts an implicit ptrtoint. But at the same time, InstCombine's `InstCombinerImpl::SimplifyAnyMemTransfer()` would expand a memtransfer of 1/2/4/8 bytes into integer-typed load+store, so this isn't exactly a new problem. Note that in memory, poison is byte-wise, so we really can't widen elements, but SROA seems to be inconsistent here. Fixes #59116.
1 parent 37b8f09 commit cfd594f

File tree

21 files changed

+267
-304
lines changed

21 files changed

+267
-304
lines changed

clang/test/CodeGenOpenCL/amdgpu-nullptr.cl

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -515,13 +515,17 @@ typedef struct {
515515
private char *p;
516516
} StructTy3;
517517

518-
// CHECK-LABEL: test_memset_private
519-
// CHECK: call void @llvm.memset.p5i8.i64(i8 addrspace(5)* noundef align 8 {{.*}}, i8 0, i64 32, i1 false)
520-
// CHECK: [[GEP:%.*]] = getelementptr inbounds %struct.StructTy3, %struct.StructTy3 addrspace(5)* %ptr, i32 0, i32 4
521-
// CHECK: store i8 addrspace(5)* addrspacecast (i8* null to i8 addrspace(5)*), i8 addrspace(5)* addrspace(5)* [[GEP]]
522-
// CHECK: [[GEP1:%.*]] = getelementptr inbounds i8, i8 addrspace(5)* {{.*}}, i32 36
523-
// CHECK: [[GEP1_CAST:%.*]] = bitcast i8 addrspace(5)* [[GEP1]] to i32 addrspace(5)*
524-
// CHECK: store i32 0, i32 addrspace(5)* [[GEP1_CAST]], align 4
518+
// CHECK-LABEL: @test_memset_private(
519+
// CHECK-NEXT: entry:
520+
// CHECK-NEXT: [[TMP0:%.*]] = bitcast [[STRUCT_STRUCTTY3:%.*]] addrspace(5)* [[PTR:%.*]] to i8 addrspace(5)*
521+
// CHECK-NEXT: [[S3_SROA_0_SROA_0_0_S3_SROA_0_0__SROA_CAST2_SROA_CAST:%.*]] = bitcast [[STRUCT_STRUCTTY3]] addrspace(5)* [[PTR]] to <32 x i8> addrspace(5)*
522+
// CHECK-NEXT: store <32 x i8> zeroinitializer, <32 x i8> addrspace(5)* [[S3_SROA_0_SROA_0_0_S3_SROA_0_0__SROA_CAST2_SROA_CAST]], align 8, !tbaa.struct !9
523+
// CHECK-NEXT: [[S3_SROA_4_0__SROA_IDX6:%.*]] = getelementptr inbounds [[STRUCT_STRUCTTY3]], [[STRUCT_STRUCTTY3]] addrspace(5)* [[PTR]], i32 0, i32 4
524+
// CHECK-NEXT: store i8 addrspace(5)* addrspacecast (i8* null to i8 addrspace(5)*), i8 addrspace(5)* addrspace(5)* [[S3_SROA_4_0__SROA_IDX6]], align 8, !tbaa.struct !12
525+
// CHECK-NEXT: [[S3_SROA_5_0__SROA_IDX:%.*]] = getelementptr inbounds i8, i8 addrspace(5)* [[TMP0]], i32 36
526+
// CHECK-NEXT: [[S3_SROA_5_0__SROA_CAST8:%.*]] = bitcast i8 addrspace(5)* [[S3_SROA_5_0__SROA_IDX]] to i32 addrspace(5)*
527+
// CHECK-NEXT: store i32 0, i32 addrspace(5)* [[S3_SROA_5_0__SROA_CAST8]], align 4, !tbaa.struct !13
528+
// CHECK-NEXT: ret void
525529
void test_memset_private(private StructTy3 *ptr) {
526530
StructTy3 S3 = {0, 0, 0, 0, 0};
527531
*ptr = S3;

llvm/lib/Transforms/Scalar/SROA.cpp

Lines changed: 35 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2029,8 +2029,10 @@ static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S,
20292029
? Ty->getElementType()
20302030
: FixedVectorType::get(Ty->getElementType(), NumElements);
20312031

2032-
Type *SplitIntTy =
2033-
Type::getIntNTy(Ty->getContext(), NumElements * ElementSize * 8);
2032+
Type *SplitIntTy = nullptr;
2033+
if (uint64_t Bitwidth = NumElements * ElementSize * 8;
2034+
Bitwidth <= IntegerType::MAX_INT_BITS)
2035+
SplitIntTy = Type::getIntNTy(Ty->getContext(), Bitwidth);
20342036

20352037
Use *U = S.getUse();
20362038

@@ -2049,7 +2051,8 @@ static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S,
20492051
// Disable vector promotion when there are loads or stores of an FCA.
20502052
if (LTy->isStructTy())
20512053
return false;
2052-
if (P.beginOffset() > S.beginOffset() || P.endOffset() < S.endOffset()) {
2054+
if (SplitIntTy &&
2055+
(P.beginOffset() > S.beginOffset() || P.endOffset() < S.endOffset())) {
20532056
assert(LTy->isIntegerTy());
20542057
LTy = SplitIntTy;
20552058
}
@@ -2062,7 +2065,8 @@ static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S,
20622065
// Disable vector promotion when there are loads or stores of an FCA.
20632066
if (STy->isStructTy())
20642067
return false;
2065-
if (P.beginOffset() > S.beginOffset() || P.endOffset() < S.endOffset()) {
2068+
if (SplitIntTy &&
2069+
(P.beginOffset() > S.beginOffset() || P.endOffset() < S.endOffset())) {
20662070
assert(STy->isIntegerTy());
20672071
STy = SplitIntTy;
20682072
}
@@ -2112,7 +2116,8 @@ static bool checkVectorTypeForPromotion(Partition &P, VectorType *VTy,
21122116
/// SSA value. We only can ensure this for a limited set of operations, and we
21132117
/// don't want to do the rewrites unless we are confident that the result will
21142118
/// be promotable, so we have an early test here.
2115-
static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) {
2119+
static VectorType *isVectorPromotionViable(Partition &P, LLVMContext &Ctx,
2120+
const DataLayout &DL) {
21162121
// Collect the candidate types for vector-based promotion. Also track whether
21172122
// we have different element types.
21182123
SmallVector<VectorType *, 4> CandidateTys;
@@ -2149,6 +2154,7 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) {
21492154
}
21502155
}
21512156
};
2157+
bool SeenMemTransferInst = false;
21522158
// Consider any loads or stores that are the exact size of the slice.
21532159
for (const Slice &S : P)
21542160
if (S.beginOffset() == P.beginOffset() &&
@@ -2157,8 +2163,29 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) {
21572163
CheckCandidateType(LI->getType());
21582164
else if (auto *SI = dyn_cast<StoreInst>(S.getUse()->getUser()))
21592165
CheckCandidateType(SI->getValueOperand()->getType());
2166+
else if (isa<MemTransferInst>(S.getUse()->getUser()))
2167+
SeenMemTransferInst = true;
21602168
}
21612169

2170+
// If we have seen mem transfer intrinsic,
2171+
// and the partition is small-enough,
2172+
// enqueue appropriate byte vector.
2173+
//
2174+
// The "small-enough" threshold is somewhat arbitrary,
2175+
// and is mostly dictated by compile-time concerns,
2176+
// but, at the same time, SDAG SDNode can't handle
2177+
// more then 65535 operands, so we should not
2178+
// produce vectors with more than ~32768 elements.
2179+
//
2180+
// Perhaps, we should also take into account the TTI:
2181+
// `getNumberOfRegisters() * getRegisterBitWidth() / 8` ?
2182+
//
2183+
// FIXME: byte type is sticky. If we had any op with byte-typed elements,
2184+
// then we should choose that type.
2185+
if (SeenMemTransferInst && P.size() <= 64)
2186+
CheckCandidateType(
2187+
FixedVectorType::get(IntegerType::getInt8Ty(Ctx), P.size()));
2188+
21622189
// If we didn't find a vector type, nothing to do here.
21632190
if (CandidateTys.empty())
21642191
return nullptr;
@@ -2215,13 +2242,6 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) {
22152242
CandidateTys.resize(1);
22162243
}
22172244

2218-
// FIXME: hack. Do we have a named constant for this?
2219-
// SDAG SDNode can't have more than 65535 operands.
2220-
llvm::erase_if(CandidateTys, [](VectorType *VTy) {
2221-
return cast<FixedVectorType>(VTy)->getNumElements() >
2222-
std::numeric_limits<unsigned short>::max();
2223-
});
2224-
22252245
for (VectorType *VTy : CandidateTys)
22262246
if (checkVectorTypeForPromotion(P, VTy, DL))
22272247
return VTy;
@@ -4634,8 +4654,9 @@ AllocaInst *SROAPass::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
46344654

46354655
bool IsIntegerPromotable = isIntegerWideningViable(P, SliceTy, DL);
46364656

4637-
VectorType *VecTy =
4638-
IsIntegerPromotable ? nullptr : isVectorPromotionViable(P, DL);
4657+
VectorType *VecTy = IsIntegerPromotable
4658+
? nullptr
4659+
: isVectorPromotionViable(P, AI.getContext(), DL);
46394660
if (VecTy)
46404661
SliceTy = VecTy;
46414662

llvm/test/CodeGen/AMDGPU/v1024.ll

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,27 +4,28 @@
44

55
; GCN-LABEL: {{^}}test_v1024:
66
; GCN-NOT: v_accvgpr
7-
; GCN-COUNT-32: v_mov_b32_e32
7+
; GCN-COUNT-10: v_mov_b32_e32
88
; GCN-NOT: v_accvgpr
99
define amdgpu_kernel void @test_v1024() {
1010
entry:
1111
%alloca = alloca <32 x i32>, align 16, addrspace(5)
12-
call void @llvm.memset.p5.i32(ptr addrspace(5) %alloca, i8 0, i32 128, i1 false)
12+
%cast = bitcast <32 x i32> addrspace(5)* %alloca to i8 addrspace(5)*
13+
call void @llvm.memset.p5i8.i32(i8 addrspace(5)* %cast, i8 0, i32 128, i1 false)
1314
br i1 undef, label %if.then.i.i, label %if.else.i
1415

1516
if.then.i.i: ; preds = %entry
16-
call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 16 %alloca, ptr addrspace(5) align 4 undef, i64 128, i1 false)
17+
call void @llvm.memcpy.p5i8.p5i8.i64(i8 addrspace(5)* align 16 %cast, i8 addrspace(5)* align 4 undef, i64 128, i1 false)
1718
br label %if.then.i62.i
1819

1920
if.else.i: ; preds = %entry
2021
br label %if.then.i62.i
2122

2223
if.then.i62.i: ; preds = %if.else.i, %if.then.i.i
23-
call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 undef, ptr addrspace(5) align 16 %alloca, i64 128, i1 false)
24+
call void @llvm.memcpy.p1i8.p5i8.i64(i8 addrspace(1)* align 4 undef, i8 addrspace(5)* align 16 %cast, i64 128, i1 false)
2425
ret void
2526
}
2627

27-
declare void @llvm.memset.p5.i32(ptr addrspace(5) nocapture readonly, i8, i32, i1 immarg)
28-
declare void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) nocapture writeonly, ptr addrspace(5) nocapture readonly, i64, i1 immarg)
28+
declare void @llvm.memset.p5i8.i32(i8 addrspace(5)* nocapture readonly, i8, i32, i1 immarg)
29+
declare void @llvm.memcpy.p5i8.p5i8.i64(i8 addrspace(5)* nocapture writeonly, i8 addrspace(5)* nocapture readonly, i64, i1 immarg)
2930

30-
declare void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) nocapture writeonly, ptr addrspace(5) nocapture readonly, i64, i1 immarg)
31+
declare void @llvm.memcpy.p1i8.p5i8.i64(i8 addrspace(1)* nocapture writeonly, i8 addrspace(5)* nocapture readonly, i64, i1 immarg)

llvm/test/DebugInfo/Generic/assignment-tracking/sroa/memcpy.ll

Lines changed: 6 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -25,22 +25,12 @@
2525

2626
;; Split alloca.
2727
; CHECK: entry:
28-
; CHECK-NEXT: %To.sroa.0 = alloca { i32, i32, i32 }, align 8, !DIAssignID ![[ID_1:[0-9]+]]
29-
; CHECK-NEXT: call void @llvm.dbg.assign(metadata {{.+}} undef, metadata ![[TO:[0-9]+]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 96), metadata ![[ID_1]], metadata ptr %To.sroa.0, metadata !DIExpression()), !dbg
30-
31-
; CHECK-NEXT: %To.sroa.4 = alloca { i32, i32, i32 }, align 8, !DIAssignID ![[ID_3:[0-9]+]]
32-
; CHECK-NEXT: call void @llvm.dbg.assign(metadata {{.+}} undef, metadata ![[TO]], metadata !DIExpression(DW_OP_LLVM_fragment, 128, 96), metadata ![[ID_3]], metadata ptr %To.sroa.4, metadata !DIExpression()), !dbg
33-
34-
;; Split memcpy.
35-
; CHECK: call void @llvm.memcpy{{.*}}(ptr align 8 %To.sroa.0, ptr align 4 @From, i64 12, i1 false),{{.*}}!DIAssignID ![[ID_4:[0-9]+]]
36-
;; This slice has been split and is promoted.
37-
; CHECK: %To.sroa.3.0.copyload = load i32, ptr getelementptr inbounds (i8, ptr @From, i64 12)
38-
; CHECK: call void @llvm.memcpy{{.*}}(ptr align 8 %To.sroa.4, ptr align 4 getelementptr inbounds (i8, ptr @From, i64 16), i64 12, i1 false){{.*}}!DIAssignID ![[ID_6:[0-9]+]]
39-
40-
;; Intrinsics for the splits above.
41-
; CHECK-NEXT: call void @llvm.dbg.assign(metadata {{.+}} undef, metadata ![[TO]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 96), metadata ![[ID_4]], metadata ptr %To.sroa.0, metadata !DIExpression()), !dbg
42-
; CHECK-NEXT: call void @llvm.dbg.assign(metadata i32 %To.sroa.3.0.copyload, metadata ![[TO]], metadata !DIExpression(DW_OP_LLVM_fragment, 96, 32), metadata !{{.+}}, metadata ptr undef, metadata !DIExpression()), !dbg
43-
; CHECK-NEXT: call void @llvm.dbg.assign(metadata {{.+}} undef, metadata ![[TO]], metadata !DIExpression(DW_OP_LLVM_fragment, 128, 96), metadata ![[ID_6]], metadata ptr %To.sroa.4, metadata !DIExpression()), !dbg
28+
; CHECK-NEXT: %To.sroa.0.sroa.0.0.copyload = load <12 x i8>, ptr @From, align 4, !dbg
29+
; CHECK-NEXT: %To.sroa.3.0.copyload = load i32, ptr getelementptr inbounds (i8, ptr @From, i64 12), align 4, !dbg
30+
; CHECK-NEXT: %To.sroa.4.sroa.0.0.copyload = load <12 x i8>, ptr getelementptr inbounds (i8, ptr @From, i64 16), align 4, !dbg
31+
; CHECK-NEXT: call void @llvm.dbg.assign(metadata <12 x i8> %To.sroa.0.sroa.0.0.copyload, metadata ![[TO:[0-9]+]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 96), metadata ![[ID_1:[0-9]+]], metadata ptr undef, metadata !DIExpression()), !dbg
32+
; CHECK-NEXT: call void @llvm.dbg.assign(metadata i32 %To.sroa.3.0.copyload, metadata ![[TO]], metadata !DIExpression(DW_OP_LLVM_fragment, 96, 32), metadata ![[ID_3:[0-9]+]], metadata ptr undef, metadata !DIExpression()), !dbg
33+
; CHECK-NEXT: call void @llvm.dbg.assign(metadata <12 x i8> %To.sroa.4.sroa.0.0.copyload, metadata ![[TO]], metadata !DIExpression(DW_OP_LLVM_fragment, 128, 96), metadata ![[ID_6:[0-9]+]], metadata ptr undef, metadata !DIExpression()), !dbg
4434

4535
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
4636

llvm/test/DebugInfo/Generic/assignment-tracking/sroa/memmove-to-from-same-alloca.ll

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,15 @@
66
;; __attribute__((nodebug)) int Cond;
77
;; __attribute__((nodebug)) Blob *C;
88
;; __attribute__((nodebug)) void call(int);
9-
;;
9+
;;
1010
;; void f() {
1111
;; int A[16];
1212
;; __attribute__ ((nodebug)) int B[16];
1313
;; // A[0:6) <- Glob
1414
;; __builtin_memmove(&A[0], &Glob, sizeof(Blob));
1515
;; call(0);
1616
;; // B[8:14) <- Glob
17-
;; __builtin_memmove(&B[8], &Glob, sizeof(Blob));
17+
;; __builtin_memmove(&B[8], &Glob, sizeof(Blob));
1818
;; call(A[0]);
1919
;; // A[8:14) <- A[0:6)
2020
;; __builtin_memmove(&A[8], &A[0], sizeof(Blob));
@@ -24,9 +24,9 @@
2424
;; __builtin_memmove(C, &A[8], sizeof(Blob));
2525
;; else
2626
;; // C <- B[8:14)
27-
;; __builtin_memmove(C, &B[8], sizeof(Blob));
27+
;; __builtin_memmove(C, &B[8], sizeof(Blob));
2828
;; }
29-
;;
29+
;;
3030
;; using:
3131
;; clang test.cpp -emit-llvm -S -g -O2 -Xclang -disable-llvm-passes -o - \
3232
;; | opt -passes=declare-to-assign -o test.ll - -S
@@ -38,12 +38,12 @@
3838
;; memcpy. Check that the dbg.assign address and fragment are correct and
3939
;; ensure the DIAssignID still links it to the memmove(/memcpy).
4040

41-
; CHECK: %A.sroa.0.sroa.5 = alloca [5 x i32]
42-
; CHECK: llvm.memcpy{{.*}}(ptr align 4 %A.sroa.0.sroa.5, ptr align 4 getelementptr inbounds (i8, ptr @Glob, i64 4), i64 20, i1 false){{.*}}!DIAssignID ![[ID:[0-9]+]]
41+
; CHECK: %B = alloca [16 x i32]
42+
; CHECK: %A.sroa.0.sroa.5.0.copyload = load <20 x i8>, ptr getelementptr inbounds (i8, ptr @Glob, i64 4), align 4, !dbg
4343
;; Here's the dbg.assign for element 0 - it's not important for the test.
4444
; CHECK-NEXT: llvm.dbg.assign({{.*}}!DIExpression(DW_OP_LLVM_fragment, 0, 32){{.*}})
4545
;; This is the dbg.assign we care about:
46-
; CHECK-NEXT: llvm.dbg.assign(metadata i1 undef, metadata ![[VAR:[0-9]+]], metadata !DIExpression(DW_OP_LLVM_fragment, 32, 160), metadata ![[ID]], metadata ptr %A.sroa.0.sroa.5, metadata !DIExpression())
46+
; CHECK-NEXT: llvm.dbg.assign(metadata <20 x i8> %A.sroa.0.sroa.5.0.copyload, metadata ![[VAR:[0-9]+]], metadata !DIExpression(DW_OP_LLVM_fragment, 32, 160), metadata ![[ID:[0-9]+]], metadata ptr undef, metadata !DIExpression())
4747

4848
; CHECK: ![[VAR]] = !DILocalVariable(name: "A"
4949

llvm/test/DebugInfo/Generic/assignment-tracking/sroa/store.ll

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -26,22 +26,13 @@
2626
;; | opt -passes=declare-to-assign -S -o -
2727

2828
; CHECK: entry:
29-
; CHECK-NEXT: %S.sroa.0 = alloca { i32, i32, i32 }, align 8, !DIAssignID ![[ID_1:[0-9]+]]
30-
; CHECK-NEXT: call void @llvm.dbg.assign(metadata i1 undef, metadata ![[VAR:[0-9]+]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 96), metadata ![[ID_1]], metadata ptr %S.sroa.0, metadata !DIExpression()), !dbg
29+
; CHECK-NEXT: %agg.tmp = alloca %struct.LargeStruct
30+
; CHECK-NEXT: call void @llvm.dbg.assign(metadata <12 x i8> zeroinitializer, metadata ![[VAR:[0-9]+]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 96), metadata ![[ID_1:[0-9]+]], metadata ptr undef, metadata !DIExpression()), !dbg
3131

32-
; CHECK-NEXT: %S.sroa.6 = alloca { i32, i32, i32 }, align 8, !DIAssignID ![[ID_3:[0-9]+]]
33-
; CHECK-NEXT: call void @llvm.dbg.assign(metadata i1 undef, metadata ![[VAR]], metadata !DIExpression(DW_OP_LLVM_fragment, 128, 96), metadata ![[ID_3]], metadata ptr %S.sroa.6, metadata !DIExpression()), !dbg
34-
35-
;; The memset has been split into [0, 96)[96, 128)[128, 224) bit slices. The
36-
;; memset for the middle slice has been removed.
37-
; CHECK: call void @llvm.memset{{.*}}(ptr align 8 %S.sroa.0, i8 0, i64 12, i1 false), !dbg !{{.+}}, !DIAssignID ![[ID_4:[0-9]+]]
38-
; CHECK-NEXT: call void @llvm.memset{{.*}}(ptr align 8 %S.sroa.6, i8 0, i64 12, i1 false), !dbg !{{.+}}, !DIAssignID ![[ID_5:[0-9]+]]
39-
40-
; CHECK-NEXT: call void @llvm.dbg.assign(metadata i8 0, metadata ![[VAR]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 96), metadata ![[ID_4]], metadata ptr %S.sroa.0, metadata !DIExpression()), !dbg
4132
;; This is the one we care about most in this test: check that a memset->store
4233
;; gets a correct dbg.assign.
4334
; CHECK-NEXT: call void @llvm.dbg.assign(metadata i32 0, metadata ![[VAR]], metadata !DIExpression(DW_OP_LLVM_fragment, 96, 32), metadata !{{.+}}, metadata ptr undef, metadata !DIExpression()), !dbg
44-
; CHECK-NEXT: call void @llvm.dbg.assign(metadata i8 0, metadata ![[VAR]], metadata !DIExpression(DW_OP_LLVM_fragment, 128, 96), metadata ![[ID_5]], metadata ptr %S.sroa.6, metadata !DIExpression()), !dbg
35+
; CHECK-NEXT: call void @llvm.dbg.assign(metadata <12 x i8> zeroinitializer, metadata ![[VAR]], metadata !DIExpression(DW_OP_LLVM_fragment, 128, 96), metadata ![[ID_5:[0-9]+]], metadata ptr undef, metadata !DIExpression()), !dbg
4536

4637
;; The load from global+store becomes a load.
4738
;; FIXME: In reality it is actually stored again later on.

llvm/test/DebugInfo/Generic/assignment-tracking/sroa/user-memcpy.ll

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,8 @@
2424

2525
;; | V3i point = {0, 0, 0};
2626
; CHECK-NEXT: call void @llvm.dbg.assign(metadata i64 0, metadata ![[point:[0-9]+]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 64), metadata !{{.+}}, metadata ptr undef, metadata !DIExpression()), !dbg
27-
; CHECK-NEXT: call void @llvm.dbg.assign(metadata i64 0, metadata ![[point]], metadata !DIExpression(DW_OP_LLVM_fragment, 64, 64), metadata !{{.+}}, metadata ptr undef, metadata !DIExpression()), !dbg
28-
; CHECK-NEXT: call void @llvm.dbg.assign(metadata i64 0, metadata ![[point]], metadata !DIExpression(DW_OP_LLVM_fragment, 128, 64), metadata !{{.+}}, metadata ptr undef, metadata !DIExpression()), !dbg
29-
27+
; CHECK-NEXT: call void @llvm.dbg.assign(metadata <16 x i8> zeroinitializer, metadata ![[point]], metadata !DIExpression(DW_OP_LLVM_fragment, 64, 128), metadata !{{.+}}, metadata ptr undef, metadata !DIExpression()), !dbg
28+
; CHECK: call void @llvm.dbg.value(metadata <16 x i8> %point.sroa.3.16.vecblend, metadata ![[point:[0-9]+]], metadata !DIExpression(DW_OP_LLVM_fragment, 64, 128)), !dbg
3029
;; point.z = 5000;
3130
; CHECK-NEXT: call void @llvm.dbg.assign(metadata i64 5000, metadata ![[point]], metadata !DIExpression(DW_OP_LLVM_fragment, 128, 64), metadata !{{.+}}, metadata ptr undef, metadata !DIExpression()), !dbg
3231

@@ -35,20 +34,15 @@
3534
;; local.other.x = global.other.x
3635
;; local.other.y = global.other.y
3736
;; local.other.z = global.other.z
38-
; CHECK-NEXT: %other.sroa.0.0.copyload = load i64, ptr @__const._Z3funv.other
39-
; CHECK-NEXT: %other.sroa.4.0.copyload = load i64, ptr getelementptr inbounds (i8, ptr @__const._Z3funv.other, i64 8)
40-
; CHECK-NEXT: %other.sroa.5.0.copyload = load i64, ptr getelementptr inbounds (i8, ptr @__const._Z3funv.other, i64 16)
41-
; CHECK-NEXT: call void @llvm.dbg.assign(metadata i64 %other.sroa.0.0.copyload, metadata ![[other:[0-9]+]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 64), metadata !{{.+}}, metadata ptr undef, metadata !DIExpression()), !dbg
42-
; CHECK-NEXT: call void @llvm.dbg.assign(metadata i64 %other.sroa.4.0.copyload, metadata ![[other]], metadata !DIExpression(DW_OP_LLVM_fragment, 64, 64), metadata !{{.+}}, metadata ptr undef, metadata !DIExpression()), !dbg
43-
; CHECK-NEXT: call void @llvm.dbg.assign(metadata i64 %other.sroa.5.0.copyload, metadata ![[other]], metadata !DIExpression(DW_OP_LLVM_fragment, 128, 64), metadata !{{.+}}, metadata ptr undef, metadata !DIExpression()), !dbg
37+
; CHECK-NEXT: %other.sroa.0.0.copyload = load <24 x i8>, ptr @__const._Z3funv.other, align 8, !dbg
38+
; CHECK-NEXT: call void @llvm.dbg.assign(metadata <24 x i8> %other.sroa.0.0.copyload, metadata ![[other:[0-9]+]], metadata !DIExpression(), metadata !{{.+}}, metadata ptr undef, metadata !DIExpression()), !dbg
4439

4540
;; | std::memcpy(&point.y, &other.x, sizeof(long) * 2);
4641
;; other is now 3 scalars:
4742
;; point.y = other.x
48-
; CHECK-NEXT: call void @llvm.dbg.assign(metadata i64 %other.sroa.0.0.copyload, metadata ![[point]], metadata !DIExpression(DW_OP_LLVM_fragment, 64, 64), metadata !{{.+}}, metadata ptr undef, metadata !DIExpression()), !dbg
49-
;;
5043
;; point.z = other.y
51-
; CHECK-NEXT: call void @llvm.dbg.assign(metadata i64 %other.sroa.4.0.copyload, metadata ![[point]], metadata !DIExpression(DW_OP_LLVM_fragment, 128, 64), metadata !{{.+}}, metadata ptr undef, metadata !DIExpression()), !dbg
44+
; CHECK-NEXT: %other.sroa.0.0.vec.extract = shufflevector
45+
; CHECK-NEXT: call void @llvm.dbg.assign(metadata <16 x i8> %other.sroa.0.0.vec.extract, metadata ![[point]], metadata !DIExpression(DW_OP_LLVM_fragment, 64, 128), metadata !{{.+}}, metadata ptr undef, metadata !DIExpression()), !dbg
5246

5347
; CHECK: ![[point]] = !DILocalVariable(name: "point",
5448
; CHECK: ![[other]] = !DILocalVariable(name: "other",

0 commit comments

Comments
 (0)