Skip to content

Commit 63ae1e9

Browse files
authored
[RISCV] Emit VP strided loads/stores in RISCVGatherScatterLowering (llvm#98111)
RISCVGatherScatterLowering is the last user of riscv_masked_strided_{load,store} after llvm#98131 and llvm#98112, this patch changes it to emit the VP equivalent instead. This allows us to remove the masked_strided intrinsics so we have only have one lowering path. riscv_masked_strided_{load,store} didn't have AVL operands and were always VLMAX, so this passes in the fixed or scalable element count to the EVL instead, which RISCVVectorPeephole should now convert to VLMAX after llvm#97800. For loads we also use a vp_select to get passthru (mask undisturbed) behaviour
1 parent f4e8fbc commit 63ae1e9

File tree

7 files changed

+114
-386
lines changed

7 files changed

+114
-386
lines changed

llvm/include/llvm/IR/IntrinsicsRISCV.td

-14
Original file line numberDiff line numberDiff line change
@@ -1710,20 +1710,6 @@ let TargetPrefix = "riscv" in {
17101710
defm vsuxseg # nf : RISCVISegStore<nf>;
17111711
}
17121712

1713-
// Strided loads/stores for fixed vectors.
1714-
def int_riscv_masked_strided_load
1715-
: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1716-
[LLVMMatchType<0>, llvm_anyptr_ty,
1717-
llvm_anyint_ty,
1718-
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
1719-
[NoCapture<ArgIndex<1>>, IntrReadMem]>;
1720-
def int_riscv_masked_strided_store
1721-
: DefaultAttrsIntrinsic<[],
1722-
[llvm_anyvector_ty, llvm_anyptr_ty,
1723-
llvm_anyint_ty,
1724-
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
1725-
[NoCapture<ArgIndex<1>>, IntrWriteMem]>;
1726-
17271713
// Segment loads/stores for fixed vectors.
17281714
foreach nf = [2, 3, 4, 5, 6, 7, 8] in {
17291715
def int_riscv_seg # nf # _load

llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp

+12-6
Original file line numberDiff line numberDiff line change
@@ -515,17 +515,23 @@ bool RISCVGatherScatterLowering::tryCreateStridedLoadStore(IntrinsicInst *II,
515515

516516
Builder.SetInsertPoint(II);
517517

518+
Value *EVL = Builder.CreateElementCount(
519+
IntegerType::get(Ctx, 32), cast<VectorType>(DataType)->getElementCount());
520+
518521
CallInst *Call;
519-
if (II->getIntrinsicID() == Intrinsic::masked_gather)
522+
if (II->getIntrinsicID() == Intrinsic::masked_gather) {
520523
Call = Builder.CreateIntrinsic(
521-
Intrinsic::riscv_masked_strided_load,
524+
Intrinsic::experimental_vp_strided_load,
522525
{DataType, BasePtr->getType(), Stride->getType()},
523-
{II->getArgOperand(3), BasePtr, Stride, II->getArgOperand(2)});
524-
else
526+
{BasePtr, Stride, II->getArgOperand(2), EVL});
527+
Call = Builder.CreateIntrinsic(
528+
Intrinsic::vp_select, {DataType},
529+
{II->getOperand(2), Call, II->getArgOperand(3), EVL});
530+
} else
525531
Call = Builder.CreateIntrinsic(
526-
Intrinsic::riscv_masked_strided_store,
532+
Intrinsic::experimental_vp_strided_store,
527533
{DataType, BasePtr->getType(), Stride->getType()},
528-
{II->getArgOperand(0), BasePtr, Stride, II->getArgOperand(3)});
534+
{II->getArgOperand(0), BasePtr, Stride, II->getArgOperand(3), EVL});
529535

530536
Call->takeName(II);
531537
II->replaceAllUsesWith(Call);

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

-159
Original file line numberDiff line numberDiff line change
@@ -1622,12 +1622,6 @@ bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
16221622
Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
16231623
MachineMemOperand::MOVolatile;
16241624
return true;
1625-
case Intrinsic::riscv_masked_strided_load:
1626-
return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ false,
1627-
/*IsUnitStrided*/ false);
1628-
case Intrinsic::riscv_masked_strided_store:
1629-
return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ true,
1630-
/*IsUnitStrided*/ false);
16311625
case Intrinsic::riscv_seg2_load:
16321626
case Intrinsic::riscv_seg3_load:
16331627
case Intrinsic::riscv_seg4_load:
@@ -9414,81 +9408,6 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
94149408
switch (IntNo) {
94159409
default:
94169410
break;
9417-
case Intrinsic::riscv_masked_strided_load: {
9418-
SDLoc DL(Op);
9419-
MVT XLenVT = Subtarget.getXLenVT();
9420-
9421-
// If the mask is known to be all ones, optimize to an unmasked intrinsic;
9422-
// the selection of the masked intrinsics doesn't do this for us.
9423-
SDValue Mask = Op.getOperand(5);
9424-
bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
9425-
9426-
MVT VT = Op->getSimpleValueType(0);
9427-
MVT ContainerVT = VT;
9428-
if (VT.isFixedLengthVector())
9429-
ContainerVT = getContainerForFixedLengthVector(VT);
9430-
9431-
SDValue PassThru = Op.getOperand(2);
9432-
if (!IsUnmasked) {
9433-
MVT MaskVT = getMaskTypeFor(ContainerVT);
9434-
if (VT.isFixedLengthVector()) {
9435-
Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
9436-
PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
9437-
}
9438-
}
9439-
9440-
auto *Load = cast<MemIntrinsicSDNode>(Op);
9441-
SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
9442-
SDValue Ptr = Op.getOperand(3);
9443-
SDValue Stride = Op.getOperand(4);
9444-
SDValue Result, Chain;
9445-
9446-
// TODO: We restrict this to unmasked loads currently in consideration of
9447-
// the complexity of handling all falses masks.
9448-
MVT ScalarVT = ContainerVT.getVectorElementType();
9449-
if (IsUnmasked && isNullConstant(Stride) && ContainerVT.isInteger()) {
9450-
SDValue ScalarLoad =
9451-
DAG.getExtLoad(ISD::EXTLOAD, DL, XLenVT, Load->getChain(), Ptr,
9452-
ScalarVT, Load->getMemOperand());
9453-
Chain = ScalarLoad.getValue(1);
9454-
Result = lowerScalarSplat(SDValue(), ScalarLoad, VL, ContainerVT, DL, DAG,
9455-
Subtarget);
9456-
} else if (IsUnmasked && isNullConstant(Stride) && isTypeLegal(ScalarVT)) {
9457-
SDValue ScalarLoad = DAG.getLoad(ScalarVT, DL, Load->getChain(), Ptr,
9458-
Load->getMemOperand());
9459-
Chain = ScalarLoad.getValue(1);
9460-
Result = DAG.getSplat(ContainerVT, DL, ScalarLoad);
9461-
} else {
9462-
SDValue IntID = DAG.getTargetConstant(
9463-
IsUnmasked ? Intrinsic::riscv_vlse : Intrinsic::riscv_vlse_mask, DL,
9464-
XLenVT);
9465-
9466-
SmallVector<SDValue, 8> Ops{Load->getChain(), IntID};
9467-
if (IsUnmasked)
9468-
Ops.push_back(DAG.getUNDEF(ContainerVT));
9469-
else
9470-
Ops.push_back(PassThru);
9471-
Ops.push_back(Ptr);
9472-
Ops.push_back(Stride);
9473-
if (!IsUnmasked)
9474-
Ops.push_back(Mask);
9475-
Ops.push_back(VL);
9476-
if (!IsUnmasked) {
9477-
SDValue Policy =
9478-
DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
9479-
Ops.push_back(Policy);
9480-
}
9481-
9482-
SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
9483-
Result =
9484-
DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
9485-
Load->getMemoryVT(), Load->getMemOperand());
9486-
Chain = Result.getValue(1);
9487-
}
9488-
if (VT.isFixedLengthVector())
9489-
Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
9490-
return DAG.getMergeValues({Result, Chain}, DL);
9491-
}
94929411
case Intrinsic::riscv_seg2_load:
94939412
case Intrinsic::riscv_seg3_load:
94949413
case Intrinsic::riscv_seg4_load:
@@ -9568,47 +9487,6 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
95689487
switch (IntNo) {
95699488
default:
95709489
break;
9571-
case Intrinsic::riscv_masked_strided_store: {
9572-
SDLoc DL(Op);
9573-
MVT XLenVT = Subtarget.getXLenVT();
9574-
9575-
// If the mask is known to be all ones, optimize to an unmasked intrinsic;
9576-
// the selection of the masked intrinsics doesn't do this for us.
9577-
SDValue Mask = Op.getOperand(5);
9578-
bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
9579-
9580-
SDValue Val = Op.getOperand(2);
9581-
MVT VT = Val.getSimpleValueType();
9582-
MVT ContainerVT = VT;
9583-
if (VT.isFixedLengthVector()) {
9584-
ContainerVT = getContainerForFixedLengthVector(VT);
9585-
Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
9586-
}
9587-
if (!IsUnmasked) {
9588-
MVT MaskVT = getMaskTypeFor(ContainerVT);
9589-
if (VT.isFixedLengthVector())
9590-
Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
9591-
}
9592-
9593-
SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
9594-
9595-
SDValue IntID = DAG.getTargetConstant(
9596-
IsUnmasked ? Intrinsic::riscv_vsse : Intrinsic::riscv_vsse_mask, DL,
9597-
XLenVT);
9598-
9599-
auto *Store = cast<MemIntrinsicSDNode>(Op);
9600-
SmallVector<SDValue, 8> Ops{Store->getChain(), IntID};
9601-
Ops.push_back(Val);
9602-
Ops.push_back(Op.getOperand(3)); // Ptr
9603-
Ops.push_back(Op.getOperand(4)); // Stride
9604-
if (!IsUnmasked)
9605-
Ops.push_back(Mask);
9606-
Ops.push_back(VL);
9607-
9608-
return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, Store->getVTList(),
9609-
Ops, Store->getMemoryVT(),
9610-
Store->getMemOperand());
9611-
}
96129490
case Intrinsic::riscv_seg2_store:
96139491
case Intrinsic::riscv_seg3_store:
96149492
case Intrinsic::riscv_seg4_store:
@@ -17551,43 +17429,6 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
1755117429
// By default we do not combine any intrinsic.
1755217430
default:
1755317431
return SDValue();
17554-
case Intrinsic::riscv_masked_strided_load: {
17555-
MVT VT = N->getSimpleValueType(0);
17556-
auto *Load = cast<MemIntrinsicSDNode>(N);
17557-
SDValue PassThru = N->getOperand(2);
17558-
SDValue Base = N->getOperand(3);
17559-
SDValue Stride = N->getOperand(4);
17560-
SDValue Mask = N->getOperand(5);
17561-
17562-
// If the stride is equal to the element size in bytes, we can use
17563-
// a masked.load.
17564-
const unsigned ElementSize = VT.getScalarStoreSize();
17565-
if (auto *StrideC = dyn_cast<ConstantSDNode>(Stride);
17566-
StrideC && StrideC->getZExtValue() == ElementSize)
17567-
return DAG.getMaskedLoad(VT, DL, Load->getChain(), Base,
17568-
DAG.getUNDEF(XLenVT), Mask, PassThru,
17569-
Load->getMemoryVT(), Load->getMemOperand(),
17570-
ISD::UNINDEXED, ISD::NON_EXTLOAD);
17571-
return SDValue();
17572-
}
17573-
case Intrinsic::riscv_masked_strided_store: {
17574-
auto *Store = cast<MemIntrinsicSDNode>(N);
17575-
SDValue Value = N->getOperand(2);
17576-
SDValue Base = N->getOperand(3);
17577-
SDValue Stride = N->getOperand(4);
17578-
SDValue Mask = N->getOperand(5);
17579-
17580-
// If the stride is equal to the element size in bytes, we can use
17581-
// a masked.store.
17582-
const unsigned ElementSize = Value.getValueType().getScalarStoreSize();
17583-
if (auto *StrideC = dyn_cast<ConstantSDNode>(Stride);
17584-
StrideC && StrideC->getZExtValue() == ElementSize)
17585-
return DAG.getMaskedStore(Store->getChain(), DL, Value, Base,
17586-
DAG.getUNDEF(XLenVT), Mask,
17587-
Value.getValueType(), Store->getMemOperand(),
17588-
ISD::UNINDEXED, false);
17589-
return SDValue();
17590-
}
1759117432
case Intrinsic::riscv_vcpop:
1759217433
case Intrinsic::riscv_vcpop_mask:
1759317434
case Intrinsic::riscv_vfirst:

llvm/test/CodeGen/RISCV/pr89833.ll

-16
This file was deleted.

0 commit comments

Comments
 (0)