@@ -48,26 +48,25 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset(
48
48
SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Val,
49
49
SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline,
50
50
MachinePointerInfo DstPtrInfo) const {
51
- ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
52
- const X86Subtarget &Subtarget =
53
- DAG. getMachineFunction (). getSubtarget <X86Subtarget> ();
51
+ // If to a segment-relative address space, use the default lowering.
52
+ if (DstPtrInfo. getAddrSpace () >= 256 )
53
+ return SDValue ();
54
54
55
- #ifndef NDEBUG
56
55
// If the base register might conflict with our physical registers, bail out.
57
56
const MCPhysReg ClobberSet[] = {X86::RCX, X86::RAX, X86::RDI,
58
57
X86::ECX, X86::EAX, X86::EDI};
59
- assert (!isBaseRegConflictPossible (DAG, ClobberSet));
60
- #endif
61
-
62
- // If to a segment-relative address space, use the default lowering.
63
- if (DstPtrInfo.getAddrSpace () >= 256 )
58
+ if (isBaseRegConflictPossible (DAG, ClobberSet))
64
59
return SDValue ();
65
60
61
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
62
+ const X86Subtarget &Subtarget =
63
+ DAG.getMachineFunction ().getSubtarget <X86Subtarget>();
64
+
66
65
// If not DWORD aligned or size is more than the threshold, call the library.
67
66
// The libc version is likely to be faster for these cases. It can use the
68
67
// address value and run time information about the CPU.
69
68
if (Alignment < Align (4 ) || !ConstantSize ||
70
- ConstantSize->getZExtValue () > Subtarget.getMaxInlineSizeThreshold ())
69
+ ConstantSize->getZExtValue () > Subtarget.getMaxInlineSizeThreshold ())
71
70
return SDValue ();
72
71
73
72
uint64_t SizeVal = ConstantSize->getZExtValue ();
@@ -128,26 +127,29 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset(
128
127
InGlue = Chain.getValue (1 );
129
128
130
129
SDVTList Tys = DAG.getVTList (MVT::Other, MVT::Glue);
131
- SDValue Ops[] = { Chain, DAG.getValueType (AVT), InGlue };
132
- Chain = DAG.getNode (X86ISD::REP_STOS, dl, Tys, Ops);
133
-
134
- if (BytesLeft) {
135
- // Handle the last 1 - 7 bytes.
136
- unsigned Offset = SizeVal - BytesLeft;
137
- EVT AddrVT = Dst.getValueType ();
138
- EVT SizeVT = Size.getValueType ();
139
-
140
- Chain =
141
- DAG.getMemset (Chain, dl,
142
- DAG.getNode (ISD::ADD, dl, AddrVT, Dst,
143
- DAG.getConstant (Offset, dl, AddrVT)),
144
- Val, DAG.getConstant (BytesLeft, dl, SizeVT), Alignment,
145
- isVolatile, AlwaysInline,
146
- /* isTailCall */ false , DstPtrInfo.getWithOffset (Offset));
147
- }
130
+ SDValue Ops[] = {Chain, DAG.getValueType (AVT), InGlue};
131
+ SDValue RepStos = DAG.getNode (X86ISD::REP_STOS, dl, Tys, Ops);
148
132
149
- // TODO: Use a Tokenfactor, as in memcpy, instead of a single chain.
150
- return Chain;
133
+ // / RepStos can process the whole length.
134
+ if (BytesLeft == 0 )
135
+ return RepStos;
136
+
137
+ // Handle the last 1 - 7 bytes.
138
+ SmallVector<SDValue, 4 > Results;
139
+ Results.push_back (RepStos);
140
+ unsigned Offset = SizeVal - BytesLeft;
141
+ EVT AddrVT = Dst.getValueType ();
142
+ EVT SizeVT = Size.getValueType ();
143
+
144
+ Results.push_back (
145
+ DAG.getMemset (Chain, dl,
146
+ DAG.getNode (ISD::ADD, dl, AddrVT, Dst,
147
+ DAG.getConstant (Offset, dl, AddrVT)),
148
+ Val, DAG.getConstant (BytesLeft, dl, SizeVT), Alignment,
149
+ isVolatile, AlwaysInline,
150
+ /* isTailCall */ false , DstPtrInfo.getWithOffset (Offset)));
151
+
152
+ return DAG.getNode (ISD::TokenFactor, dl, MVT::Other, Results);
151
153
}
152
154
153
155
// / Emit a single REP MOVS{B,W,D,Q} instruction.
0 commit comments