Skip to content

Commit 972f297

Browse files
authored
Resolve TODO: Use TokenFactor for inline memset (llvm#87002)
We can rewrite this as a TokenFactor like memcpy is.
1 parent 70bf139 commit 972f297

File tree

1 file changed

+31
-29
lines changed

1 file changed

+31
-29
lines changed

llvm/lib/Target/X86/X86SelectionDAGInfo.cpp

Lines changed: 31 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -48,26 +48,25 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset(
4848
SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Val,
4949
SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline,
5050
MachinePointerInfo DstPtrInfo) const {
51-
ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
52-
const X86Subtarget &Subtarget =
53-
DAG.getMachineFunction().getSubtarget<X86Subtarget>();
51+
// If to a segment-relative address space, use the default lowering.
52+
if (DstPtrInfo.getAddrSpace() >= 256)
53+
return SDValue();
5454

55-
#ifndef NDEBUG
5655
// If the base register might conflict with our physical registers, bail out.
5756
const MCPhysReg ClobberSet[] = {X86::RCX, X86::RAX, X86::RDI,
5857
X86::ECX, X86::EAX, X86::EDI};
59-
assert(!isBaseRegConflictPossible(DAG, ClobberSet));
60-
#endif
61-
62-
// If to a segment-relative address space, use the default lowering.
63-
if (DstPtrInfo.getAddrSpace() >= 256)
58+
if (isBaseRegConflictPossible(DAG, ClobberSet))
6459
return SDValue();
6560

61+
ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
62+
const X86Subtarget &Subtarget =
63+
DAG.getMachineFunction().getSubtarget<X86Subtarget>();
64+
6665
// If not DWORD aligned or size is more than the threshold, call the library.
6766
// The libc version is likely to be faster for these cases. It can use the
6867
// address value and run time information about the CPU.
6968
if (Alignment < Align(4) || !ConstantSize ||
70-
ConstantSize->getZExtValue() > Subtarget.getMaxInlineSizeThreshold())
69+
ConstantSize->getZExtValue() > Subtarget.getMaxInlineSizeThreshold())
7170
return SDValue();
7271

7372
uint64_t SizeVal = ConstantSize->getZExtValue();
@@ -128,26 +127,29 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset(
128127
InGlue = Chain.getValue(1);
129128

130129
SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
131-
SDValue Ops[] = { Chain, DAG.getValueType(AVT), InGlue };
132-
Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops);
133-
134-
if (BytesLeft) {
135-
// Handle the last 1 - 7 bytes.
136-
unsigned Offset = SizeVal - BytesLeft;
137-
EVT AddrVT = Dst.getValueType();
138-
EVT SizeVT = Size.getValueType();
139-
140-
Chain =
141-
DAG.getMemset(Chain, dl,
142-
DAG.getNode(ISD::ADD, dl, AddrVT, Dst,
143-
DAG.getConstant(Offset, dl, AddrVT)),
144-
Val, DAG.getConstant(BytesLeft, dl, SizeVT), Alignment,
145-
isVolatile, AlwaysInline,
146-
/* isTailCall */ false, DstPtrInfo.getWithOffset(Offset));
147-
}
130+
SDValue Ops[] = {Chain, DAG.getValueType(AVT), InGlue};
131+
SDValue RepStos = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops);
148132

149-
// TODO: Use a Tokenfactor, as in memcpy, instead of a single chain.
150-
return Chain;
133+
/// RepStos can process the whole length.
134+
if (BytesLeft == 0)
135+
return RepStos;
136+
137+
// Handle the last 1 - 7 bytes.
138+
SmallVector<SDValue, 4> Results;
139+
Results.push_back(RepStos);
140+
unsigned Offset = SizeVal - BytesLeft;
141+
EVT AddrVT = Dst.getValueType();
142+
EVT SizeVT = Size.getValueType();
143+
144+
Results.push_back(
145+
DAG.getMemset(Chain, dl,
146+
DAG.getNode(ISD::ADD, dl, AddrVT, Dst,
147+
DAG.getConstant(Offset, dl, AddrVT)),
148+
Val, DAG.getConstant(BytesLeft, dl, SizeVT), Alignment,
149+
isVolatile, AlwaysInline,
150+
/* isTailCall */ false, DstPtrInfo.getWithOffset(Offset)));
151+
152+
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Results);
151153
}
152154

153155
/// Emit a single REP MOVS{B,W,D,Q} instruction.

0 commit comments

Comments
 (0)