Skip to content

Commit 4cf8b29

Browse files
authored
[AMDGPU][PromoteAlloca] Correctly handle a variable vector index (llvm#83597)
The promote alloca to vector transformation assumes that the vector index is a constant value. If it is not a constant, then either an assert occurs or the tranformation generates an incorrect index.
1 parent 4ce737b commit 4cf8b29

File tree

2 files changed

+55
-14
lines changed

2 files changed

+55
-14
lines changed

llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -401,14 +401,16 @@ static Value *promoteAllocaUserToVector(
401401
// We're loading the full vector.
402402
Type *AccessTy = Inst->getType();
403403
TypeSize AccessSize = DL.getTypeStoreSize(AccessTy);
404-
if (AccessSize == VecStoreSize && cast<Constant>(Index)->isZeroValue()) {
405-
if (AccessTy->isPtrOrPtrVectorTy())
406-
CurVal = CreateTempPtrIntCast(CurVal, AccessTy);
407-
else if (CurVal->getType()->isPtrOrPtrVectorTy())
408-
CurVal = CreateTempPtrIntCast(CurVal, CurVal->getType());
409-
Value *NewVal = Builder.CreateBitOrPointerCast(CurVal, AccessTy);
410-
Inst->replaceAllUsesWith(NewVal);
411-
return nullptr;
404+
if (Constant *CI = dyn_cast<Constant>(Index)) {
405+
if (CI->isZeroValue() && AccessSize == VecStoreSize) {
406+
if (AccessTy->isPtrOrPtrVectorTy())
407+
CurVal = CreateTempPtrIntCast(CurVal, AccessTy);
408+
else if (CurVal->getType()->isPtrOrPtrVectorTy())
409+
CurVal = CreateTempPtrIntCast(CurVal, CurVal->getType());
410+
Value *NewVal = Builder.CreateBitOrPointerCast(CurVal, AccessTy);
411+
Inst->replaceAllUsesWith(NewVal);
412+
return nullptr;
413+
}
412414
}
413415

414416
// Loading a subvector.
@@ -456,12 +458,14 @@ static Value *promoteAllocaUserToVector(
456458
// We're storing the full vector, we can handle this without knowing CurVal.
457459
Type *AccessTy = Val->getType();
458460
TypeSize AccessSize = DL.getTypeStoreSize(AccessTy);
459-
if (AccessSize == VecStoreSize && cast<Constant>(Index)->isZeroValue()) {
460-
if (AccessTy->isPtrOrPtrVectorTy())
461-
Val = CreateTempPtrIntCast(Val, AccessTy);
462-
else if (VectorTy->isPtrOrPtrVectorTy())
463-
Val = CreateTempPtrIntCast(Val, VectorTy);
464-
return Builder.CreateBitOrPointerCast(Val, VectorTy);
461+
if (Constant *CI = dyn_cast<Constant>(Index)) {
462+
if (CI->isZeroValue() && AccessSize == VecStoreSize) {
463+
if (AccessTy->isPtrOrPtrVectorTy())
464+
Val = CreateTempPtrIntCast(Val, AccessTy);
465+
else if (VectorTy->isPtrOrPtrVectorTy())
466+
Val = CreateTempPtrIntCast(Val, VectorTy);
467+
return Builder.CreateBitOrPointerCast(Val, VectorTy);
468+
}
465469
}
466470

467471
// Storing a subvector.
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2+
; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -passes=amdgpu-promote-alloca < %s | FileCheck %s
3+
4+
; Check that promoting an alloca to a vector form works correctly when a variable
5+
; vector index is used.
6+
7+
define amdgpu_kernel void @non_constant_index(i32 %arg) {
8+
; CHECK-LABEL: define amdgpu_kernel void @non_constant_index(
9+
; CHECK-SAME: i32 [[ARG:%.*]]) {
10+
; CHECK-NEXT: bb:
11+
; CHECK-NEXT: br label [[BB1:%.*]]
12+
; CHECK: bb1:
13+
; CHECK-NEXT: br label [[BB1]]
14+
; CHECK: bb2:
15+
; CHECK-NEXT: br label [[BB3:%.*]]
16+
; CHECK: bb3:
17+
; CHECK-NEXT: [[PROMOTEALLOCA:%.*]] = phi <2 x float> [ [[TMP2:%.*]], [[BB3]] ], [ undef, [[BB2:%.*]] ]
18+
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x float> [[PROMOTEALLOCA]], float 0.000000e+00, i32 [[ARG]]
19+
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[ARG]], 1
20+
; CHECK-NEXT: [[TMP2]] = insertelement <2 x float> [[TMP0]], float 0.000000e+00, i32 [[TMP1]]
21+
; CHECK-NEXT: br label [[BB3]]
22+
;
23+
bb:
24+
%i = alloca [2 x float], align 4, addrspace(5)
25+
br label %bb1
26+
27+
bb1:
28+
br label %bb1
29+
30+
bb2:
31+
br label %bb3
32+
33+
bb3:
34+
%i4 = getelementptr float, ptr addrspace(5) %i, i32 %arg
35+
store <2 x float> zeroinitializer, ptr addrspace(5) %i4, align 8
36+
br label %bb3
37+
}

0 commit comments

Comments
 (0)