Skip to content

Commit 39a41c8

Browse files
committed
[CGCall][RISCV] Handle function calls with parameter of RVV tuple type
This was an oversight in D146872, where function calls with tuple type was not covered. This commit fixes this. Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D157953
1 parent 0f481f2 commit 39a41c8

File tree

3 files changed

+106
-22
lines changed

3 files changed

+106
-22
lines changed

clang/lib/CodeGen/CGCall.cpp

Lines changed: 42 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -5279,30 +5279,50 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
52795279
dyn_cast<llvm::StructType>(ArgInfo.getCoerceToType());
52805280
if (STy && ArgInfo.isDirect() && ArgInfo.getCanBeFlattened()) {
52815281
llvm::Type *SrcTy = Src.getElementType();
5282-
uint64_t SrcSize = CGM.getDataLayout().getTypeAllocSize(SrcTy);
5283-
uint64_t DstSize = CGM.getDataLayout().getTypeAllocSize(STy);
5284-
5285-
// If the source type is smaller than the destination type of the
5286-
// coerce-to logic, copy the source value into a temp alloca the size
5287-
// of the destination type to allow loading all of it. The bits past
5288-
// the source value are left undef.
5289-
if (SrcSize < DstSize) {
5290-
Address TempAlloca
5291-
= CreateTempAlloca(STy, Src.getAlignment(),
5292-
Src.getName() + ".coerce");
5293-
Builder.CreateMemCpy(TempAlloca, Src, SrcSize);
5294-
Src = TempAlloca;
5282+
llvm::TypeSize SrcTypeSize =
5283+
CGM.getDataLayout().getTypeAllocSize(SrcTy);
5284+
llvm::TypeSize DstTypeSize = CGM.getDataLayout().getTypeAllocSize(STy);
5285+
if (SrcTypeSize.isScalable()) {
5286+
assert(STy->containsHomogeneousScalableVectorTypes() &&
5287+
"ABI only supports structure with homogeneous scalable vector "
5288+
"type");
5289+
assert(SrcTypeSize == DstTypeSize &&
5290+
"Only allow non-fractional movement of structure with "
5291+
"homogeneous scalable vector type");
5292+
assert(NumIRArgs == STy->getNumElements());
5293+
5294+
llvm::Value *StoredStructValue =
5295+
Builder.CreateLoad(Src, Src.getName() + ".tuple");
5296+
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
5297+
llvm::Value *Extract = Builder.CreateExtractValue(
5298+
StoredStructValue, i, Src.getName() + ".extract" + Twine(i));
5299+
IRCallArgs[FirstIRArg + i] = Extract;
5300+
}
52955301
} else {
5296-
Src = Src.withElementType(STy);
5297-
}
5302+
uint64_t SrcSize = SrcTypeSize.getFixedValue();
5303+
uint64_t DstSize = DstTypeSize.getFixedValue();
5304+
5305+
// If the source type is smaller than the destination type of the
5306+
// coerce-to logic, copy the source value into a temp alloca the size
5307+
// of the destination type to allow loading all of it. The bits past
5308+
// the source value are left undef.
5309+
if (SrcSize < DstSize) {
5310+
Address TempAlloca = CreateTempAlloca(STy, Src.getAlignment(),
5311+
Src.getName() + ".coerce");
5312+
Builder.CreateMemCpy(TempAlloca, Src, SrcSize);
5313+
Src = TempAlloca;
5314+
} else {
5315+
Src = Src.withElementType(STy);
5316+
}
52985317

5299-
assert(NumIRArgs == STy->getNumElements());
5300-
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
5301-
Address EltPtr = Builder.CreateStructGEP(Src, i);
5302-
llvm::Value *LI = Builder.CreateLoad(EltPtr);
5303-
if (ArgHasMaybeUndefAttr)
5304-
LI = Builder.CreateFreeze(LI);
5305-
IRCallArgs[FirstIRArg + i] = LI;
5318+
assert(NumIRArgs == STy->getNumElements());
5319+
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
5320+
Address EltPtr = Builder.CreateStructGEP(Src, i);
5321+
llvm::Value *LI = Builder.CreateLoad(EltPtr);
5322+
if (ArgHasMaybeUndefAttr)
5323+
LI = Builder.CreateFreeze(LI);
5324+
IRCallArgs[FirstIRArg + i] = LI;
5325+
}
53065326
}
53075327
} else {
53085328
// In the simple case, just pass the coerced loaded value.

clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-tuple-type.c

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,3 +90,36 @@ void baz(__rvv_int32m1x2_t v_tuple) {
9090
__rvv_int32m1x2_t qux(__rvv_int32m1x2_t v_tuple) {
9191
return v_tuple;
9292
}
93+
94+
// O0-LABEL: define dso_local { <vscale x 2 x i32>, <vscale x 2 x i32> } @quux
95+
// O0-SAME: (<vscale x 2 x i32> [[V_TUPLE_COERCE0:%.*]], <vscale x 2 x i32> [[V_TUPLE_COERCE1:%.*]]) #[[ATTR0]] {
96+
// O0-NEXT: entry:
97+
// O0-NEXT: [[V_TUPLE:%.*]] = alloca { <vscale x 2 x i32>, <vscale x 2 x i32> }, align 4
98+
// O0-NEXT: [[V_TUPLE_ADDR:%.*]] = alloca { <vscale x 2 x i32>, <vscale x 2 x i32> }, align 4
99+
// O0-NEXT: [[COERCE:%.*]] = alloca { <vscale x 2 x i32>, <vscale x 2 x i32> }, align 4
100+
// O0-NEXT: [[TMP0:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } poison, <vscale x 2 x i32> [[V_TUPLE_COERCE0]], 0
101+
// O0-NEXT: [[TMP1:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } [[TMP0]], <vscale x 2 x i32> [[V_TUPLE_COERCE1]], 1
102+
// O0-NEXT: store { <vscale x 2 x i32>, <vscale x 2 x i32> } [[TMP1]], ptr [[V_TUPLE]], align 4
103+
// O0-NEXT: [[V_TUPLE1:%.*]] = load { <vscale x 2 x i32>, <vscale x 2 x i32> }, ptr [[V_TUPLE]], align 4
104+
// O0-NEXT: store { <vscale x 2 x i32>, <vscale x 2 x i32> } [[V_TUPLE1]], ptr [[V_TUPLE_ADDR]], align 4
105+
// O0-NEXT: [[TMP2:%.*]] = load { <vscale x 2 x i32>, <vscale x 2 x i32> }, ptr [[V_TUPLE_ADDR]], align 4
106+
// O0-NEXT: store { <vscale x 2 x i32>, <vscale x 2 x i32> } [[TMP2]], ptr [[COERCE]], align 4
107+
// O0-NEXT: [[COERCE_TUPLE:%.*]] = load { <vscale x 2 x i32>, <vscale x 2 x i32> }, ptr [[COERCE]], align 4
108+
// O0-NEXT: [[COERCE_EXTRACT0:%.*]] = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } [[COERCE_TUPLE]], 0
109+
// O0-NEXT: [[COERCE_EXTRACT1:%.*]] = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } [[COERCE_TUPLE]], 1
110+
// O0-NEXT: [[CALL:%.*]] = call { <vscale x 2 x i32>, <vscale x 2 x i32> } @qux(<vscale x 2 x i32> [[COERCE_EXTRACT0]], <vscale x 2 x i32> [[COERCE_EXTRACT1]])
111+
// O0-NEXT: ret { <vscale x 2 x i32>, <vscale x 2 x i32> } [[CALL]]
112+
//
113+
// AFTER_MEM2REG-LABEL: define dso_local { <vscale x 2 x i32>, <vscale x 2 x i32> } @quux
114+
// AFTER_MEM2REG-SAME: (<vscale x 2 x i32> [[V_TUPLE_COERCE0:%.*]], <vscale x 2 x i32> [[V_TUPLE_COERCE1:%.*]]) #[[ATTR0]] {
115+
// AFTER_MEM2REG-NEXT: entry:
116+
// AFTER_MEM2REG-NEXT: [[TMP0:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } poison, <vscale x 2 x i32> [[V_TUPLE_COERCE0]], 0
117+
// AFTER_MEM2REG-NEXT: [[TMP1:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } [[TMP0]], <vscale x 2 x i32> [[V_TUPLE_COERCE1]], 1
118+
// AFTER_MEM2REG-NEXT: [[COERCE_EXTRACT0:%.*]] = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } [[TMP1]], 0
119+
// AFTER_MEM2REG-NEXT: [[COERCE_EXTRACT1:%.*]] = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } [[TMP1]], 1
120+
// AFTER_MEM2REG-NEXT: [[CALL:%.*]] = call { <vscale x 2 x i32>, <vscale x 2 x i32> } @qux(<vscale x 2 x i32> [[COERCE_EXTRACT0]], <vscale x 2 x i32> [[COERCE_EXTRACT1]])
121+
// AFTER_MEM2REG-NEXT: ret { <vscale x 2 x i32>, <vscale x 2 x i32> } [[CALL]]
122+
//
123+
__rvv_int32m1x2_t quux(__rvv_int32m1x2_t v_tuple) {
124+
return qux(v_tuple);
125+
}

llvm/test/Transforms/SROA/scalable-vector-struct.ll

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,3 +20,34 @@ define %struct.test @alloca(<vscale x 1 x i32> %x, <vscale x 1 x i32> %y) {
2020
%val = load %struct.test, %struct.test* %addr, align 4
2121
ret %struct.test %val
2222
}
23+
24+
25+
define { <vscale x 2 x i32>, <vscale x 2 x i32> } @return_tuple(<vscale x 2 x i32> %v_tuple.coerce0, <vscale x 2 x i32> %v_tuple.coerce1) {
26+
; CHECK-LABEL: @return_tuple(
27+
; CHECK-NEXT: entry:
28+
; CHECK-NEXT: [[TMP0:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } poison, <vscale x 2 x i32> [[V_TUPLE_COERCE0:%.*]], 0
29+
; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } [[TMP0]], <vscale x 2 x i32> [[V_TUPLE_COERCE1:%.*]], 1
30+
; CHECK-NEXT: [[COERCE_EXTRACT0:%.*]] = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } [[TMP1]], 0
31+
; CHECK-NEXT: [[COERCE_EXTRACT1:%.*]] = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } [[TMP1]], 1
32+
; CHECK-NEXT: [[CALL:%.*]] = call { <vscale x 2 x i32>, <vscale x 2 x i32> } @foo(<vscale x 2 x i32> [[COERCE_EXTRACT0]], <vscale x 2 x i32> [[COERCE_EXTRACT1]])
33+
; CHECK-NEXT: ret { <vscale x 2 x i32>, <vscale x 2 x i32> } [[CALL]]
34+
;
35+
entry:
36+
%v_tuple = alloca { <vscale x 2 x i32>, <vscale x 2 x i32> }, align 4
37+
%v_tuple.addr = alloca { <vscale x 2 x i32>, <vscale x 2 x i32> }, align 4
38+
%coerce = alloca { <vscale x 2 x i32>, <vscale x 2 x i32> }, align 4
39+
%0 = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } poison, <vscale x 2 x i32> %v_tuple.coerce0, 0
40+
%1 = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } %0, <vscale x 2 x i32> %v_tuple.coerce1, 1
41+
store { <vscale x 2 x i32>, <vscale x 2 x i32> } %1, ptr %v_tuple, align 4
42+
%v_tuple1 = load { <vscale x 2 x i32>, <vscale x 2 x i32> }, ptr %v_tuple, align 4
43+
store { <vscale x 2 x i32>, <vscale x 2 x i32> } %v_tuple1, ptr %v_tuple.addr, align 4
44+
%2 = load { <vscale x 2 x i32>, <vscale x 2 x i32> }, ptr %v_tuple.addr, align 4
45+
store { <vscale x 2 x i32>, <vscale x 2 x i32> } %2, ptr %coerce, align 4
46+
%coerce.tuple = load { <vscale x 2 x i32>, <vscale x 2 x i32> }, ptr %coerce, align 4
47+
%coerce.extract0 = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } %coerce.tuple, 0
48+
%coerce.extract1 = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } %coerce.tuple, 1
49+
%call = call { <vscale x 2 x i32>, <vscale x 2 x i32> } @foo(<vscale x 2 x i32> %coerce.extract0, <vscale x 2 x i32> %coerce.extract1)
50+
ret { <vscale x 2 x i32>, <vscale x 2 x i32> } %call
51+
}
52+
53+
declare { <vscale x 2 x i32>, <vscale x 2 x i32> } @foo(<vscale x 2 x i32>, <vscale x 2 x i32>)

0 commit comments

Comments
 (0)