Skip to content

Commit c3bf6d2

Browse files
committed
[X86] Fold PSHUF(VSHIFT(X,Y)) -> VSHIFT(PSHUF(X),Y)
PSHUFD/PSHUFLW/PSHUFHW can act as a vector move / folded load, notably helping simplify pre-AVX cases in particular. This is a much milder alternative to refactoring canonicalizeShuffleWithBinOps to support SSE shifts nodes.
1 parent 1eb74f7 commit c3bf6d2

36 files changed

+893
-1002
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42075,10 +42075,37 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
4207542075
}
4207642076
case X86ISD::PSHUFD:
4207742077
case X86ISD::PSHUFLW:
42078-
case X86ISD::PSHUFHW:
42078+
case X86ISD::PSHUFHW: {
42079+
SDValue N0 = N.getOperand(0);
42080+
SDValue N1 = N.getOperand(1);
42081+
if (N0->hasOneUse()) {
42082+
SDValue V = peekThroughOneUseBitcasts(N0);
42083+
switch (V.getOpcode()) {
42084+
case X86ISD::VSHL:
42085+
case X86ISD::VSRL:
42086+
case X86ISD::VSRA:
42087+
case X86ISD::VSHLI:
42088+
case X86ISD::VSRLI:
42089+
case X86ISD::VSRAI:
42090+
case X86ISD::VROTLI:
42091+
case X86ISD::VROTRI: {
42092+
MVT InnerVT = V.getSimpleValueType();
42093+
if (InnerVT.getScalarSizeInBits() <= VT.getScalarSizeInBits()) {
42094+
SDValue Res = DAG.getNode(Opcode, DL, VT,
42095+
DAG.getBitcast(VT, V.getOperand(0)), N1);
42096+
Res = DAG.getBitcast(InnerVT, Res);
42097+
Res = DAG.getNode(V.getOpcode(), DL, InnerVT, Res, V.getOperand(1));
42098+
return DAG.getBitcast(VT, Res);
42099+
}
42100+
break;
42101+
}
42102+
}
42103+
}
42104+
4207942105
Mask = getPSHUFShuffleMask(N);
4208042106
assert(Mask.size() == 4);
4208142107
break;
42108+
}
4208242109
case X86ISD::MOVSD:
4208342110
case X86ISD::MOVSH:
4208442111
case X86ISD::MOVSS: {

llvm/test/CodeGen/X86/abds-vector-128.ll

Lines changed: 21 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -81,44 +81,36 @@ define <16 x i8> @abd_ext_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
8181
; SSE2-NEXT: psubq %xmm10, %xmm7
8282
; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm9[2],xmm1[3],xmm9[3]
8383
; SSE2-NEXT: psubq %xmm1, %xmm8
84-
; SSE2-NEXT: movdqa %xmm0, %xmm1
84+
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
8585
; SSE2-NEXT: psrad $31, %xmm1
86-
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
8786
; SSE2-NEXT: pxor %xmm1, %xmm0
8887
; SSE2-NEXT: psubq %xmm1, %xmm0
89-
; SSE2-NEXT: movdqa %xmm3, %xmm1
88+
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
9089
; SSE2-NEXT: psrad $31, %xmm1
91-
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
9290
; SSE2-NEXT: pxor %xmm1, %xmm3
9391
; SSE2-NEXT: psubq %xmm1, %xmm3
94-
; SSE2-NEXT: movdqa %xmm4, %xmm1
92+
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm4[1,1,3,3]
9593
; SSE2-NEXT: psrad $31, %xmm1
96-
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
9794
; SSE2-NEXT: pxor %xmm1, %xmm4
9895
; SSE2-NEXT: psubq %xmm1, %xmm4
99-
; SSE2-NEXT: movdqa %xmm5, %xmm1
96+
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm5[1,1,3,3]
10097
; SSE2-NEXT: psrad $31, %xmm1
101-
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
10298
; SSE2-NEXT: pxor %xmm1, %xmm5
10399
; SSE2-NEXT: psubq %xmm1, %xmm5
104-
; SSE2-NEXT: movdqa %xmm2, %xmm1
100+
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
105101
; SSE2-NEXT: psrad $31, %xmm1
106-
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
107102
; SSE2-NEXT: pxor %xmm1, %xmm2
108103
; SSE2-NEXT: psubq %xmm1, %xmm2
109-
; SSE2-NEXT: movdqa %xmm6, %xmm1
104+
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm6[1,1,3,3]
110105
; SSE2-NEXT: psrad $31, %xmm1
111-
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
112106
; SSE2-NEXT: pxor %xmm1, %xmm6
113107
; SSE2-NEXT: psubq %xmm1, %xmm6
114-
; SSE2-NEXT: movdqa %xmm7, %xmm1
108+
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm7[1,1,3,3]
115109
; SSE2-NEXT: psrad $31, %xmm1
116-
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
117110
; SSE2-NEXT: pxor %xmm1, %xmm7
118111
; SSE2-NEXT: psubq %xmm1, %xmm7
119-
; SSE2-NEXT: movdqa %xmm8, %xmm1
112+
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm8[1,1,3,3]
120113
; SSE2-NEXT: psrad $31, %xmm1
121-
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
122114
; SSE2-NEXT: pxor %xmm1, %xmm8
123115
; SSE2-NEXT: psubq %xmm1, %xmm8
124116
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
@@ -233,44 +225,36 @@ define <16 x i8> @abd_ext_v16i8_undef(<16 x i8> %a, <16 x i8> %b) nounwind {
233225
; SSE2-NEXT: psubq %xmm10, %xmm7
234226
; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm9[2],xmm1[3],xmm9[3]
235227
; SSE2-NEXT: psubq %xmm1, %xmm8
236-
; SSE2-NEXT: movdqa %xmm0, %xmm1
228+
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
237229
; SSE2-NEXT: psrad $31, %xmm1
238-
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
239230
; SSE2-NEXT: pxor %xmm1, %xmm0
240231
; SSE2-NEXT: psubq %xmm1, %xmm0
241-
; SSE2-NEXT: movdqa %xmm3, %xmm1
232+
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
242233
; SSE2-NEXT: psrad $31, %xmm1
243-
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
244234
; SSE2-NEXT: pxor %xmm1, %xmm3
245235
; SSE2-NEXT: psubq %xmm1, %xmm3
246-
; SSE2-NEXT: movdqa %xmm4, %xmm1
236+
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm4[1,1,3,3]
247237
; SSE2-NEXT: psrad $31, %xmm1
248-
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
249238
; SSE2-NEXT: pxor %xmm1, %xmm4
250239
; SSE2-NEXT: psubq %xmm1, %xmm4
251-
; SSE2-NEXT: movdqa %xmm5, %xmm1
240+
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm5[1,1,3,3]
252241
; SSE2-NEXT: psrad $31, %xmm1
253-
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
254242
; SSE2-NEXT: pxor %xmm1, %xmm5
255243
; SSE2-NEXT: psubq %xmm1, %xmm5
256-
; SSE2-NEXT: movdqa %xmm2, %xmm1
244+
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
257245
; SSE2-NEXT: psrad $31, %xmm1
258-
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
259246
; SSE2-NEXT: pxor %xmm1, %xmm2
260247
; SSE2-NEXT: psubq %xmm1, %xmm2
261-
; SSE2-NEXT: movdqa %xmm6, %xmm1
248+
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm6[1,1,3,3]
262249
; SSE2-NEXT: psrad $31, %xmm1
263-
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
264250
; SSE2-NEXT: pxor %xmm1, %xmm6
265251
; SSE2-NEXT: psubq %xmm1, %xmm6
266-
; SSE2-NEXT: movdqa %xmm7, %xmm1
252+
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm7[1,1,3,3]
267253
; SSE2-NEXT: psrad $31, %xmm1
268-
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
269254
; SSE2-NEXT: pxor %xmm1, %xmm7
270255
; SSE2-NEXT: psubq %xmm1, %xmm7
271-
; SSE2-NEXT: movdqa %xmm8, %xmm1
256+
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm8[1,1,3,3]
272257
; SSE2-NEXT: psrad $31, %xmm1
273-
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
274258
; SSE2-NEXT: pxor %xmm1, %xmm8
275259
; SSE2-NEXT: psubq %xmm1, %xmm8
276260
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
@@ -378,14 +362,12 @@ define <4 x i32> @abd_ext_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
378362
; SSE2-NEXT: pcmpgtd %xmm5, %xmm3
379363
; SSE2-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1]
380364
; SSE2-NEXT: psubq %xmm5, %xmm2
381-
; SSE2-NEXT: movdqa %xmm0, %xmm1
365+
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
382366
; SSE2-NEXT: psrad $31, %xmm1
383-
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
384367
; SSE2-NEXT: pxor %xmm1, %xmm0
385368
; SSE2-NEXT: psubq %xmm1, %xmm0
386-
; SSE2-NEXT: movdqa %xmm2, %xmm1
369+
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
387370
; SSE2-NEXT: psrad $31, %xmm1
388-
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
389371
; SSE2-NEXT: pxor %xmm1, %xmm2
390372
; SSE2-NEXT: psubq %xmm1, %xmm2
391373
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2]
@@ -432,14 +414,12 @@ define <4 x i32> @abd_ext_v4i32_undef(<4 x i32> %a, <4 x i32> %b) nounwind {
432414
; SSE2-NEXT: pcmpgtd %xmm5, %xmm3
433415
; SSE2-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1]
434416
; SSE2-NEXT: psubq %xmm5, %xmm2
435-
; SSE2-NEXT: movdqa %xmm0, %xmm1
417+
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
436418
; SSE2-NEXT: psrad $31, %xmm1
437-
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
438419
; SSE2-NEXT: pxor %xmm1, %xmm0
439420
; SSE2-NEXT: psubq %xmm1, %xmm0
440-
; SSE2-NEXT: movdqa %xmm2, %xmm1
421+
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
441422
; SSE2-NEXT: psrad $31, %xmm1
442-
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
443423
; SSE2-NEXT: pxor %xmm1, %xmm2
444424
; SSE2-NEXT: psubq %xmm1, %xmm2
445425
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2]
@@ -1023,9 +1003,8 @@ define <2 x i64> @abd_subnsw_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
10231003
; SSE2-LABEL: abd_subnsw_v2i64:
10241004
; SSE2: # %bb.0:
10251005
; SSE2-NEXT: psubq %xmm1, %xmm0
1026-
; SSE2-NEXT: movdqa %xmm0, %xmm1
1006+
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
10271007
; SSE2-NEXT: psrad $31, %xmm1
1028-
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
10291008
; SSE2-NEXT: pxor %xmm1, %xmm0
10301009
; SSE2-NEXT: psubq %xmm1, %xmm0
10311010
; SSE2-NEXT: retq

llvm/test/CodeGen/X86/abdu-vector-128.ll

Lines changed: 12 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -81,24 +81,20 @@ define <8 x i16> @abd_ext_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
8181
; SSE2-NEXT: psubq %xmm6, %xmm4
8282
; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm5[2],xmm1[3],xmm5[3]
8383
; SSE2-NEXT: psubq %xmm1, %xmm0
84-
; SSE2-NEXT: movdqa %xmm3, %xmm1
84+
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
8585
; SSE2-NEXT: psrad $31, %xmm1
86-
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
8786
; SSE2-NEXT: pxor %xmm1, %xmm3
8887
; SSE2-NEXT: psubq %xmm1, %xmm3
89-
; SSE2-NEXT: movdqa %xmm2, %xmm1
88+
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
9089
; SSE2-NEXT: psrad $31, %xmm1
91-
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
9290
; SSE2-NEXT: pxor %xmm1, %xmm2
9391
; SSE2-NEXT: psubq %xmm1, %xmm2
94-
; SSE2-NEXT: movdqa %xmm4, %xmm1
92+
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm4[1,1,3,3]
9593
; SSE2-NEXT: psrad $31, %xmm1
96-
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
9794
; SSE2-NEXT: pxor %xmm1, %xmm4
9895
; SSE2-NEXT: psubq %xmm1, %xmm4
99-
; SSE2-NEXT: movdqa %xmm0, %xmm1
96+
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
10097
; SSE2-NEXT: psrad $31, %xmm1
101-
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
10298
; SSE2-NEXT: pxor %xmm1, %xmm0
10399
; SSE2-NEXT: psubq %xmm1, %xmm0
104100
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
@@ -162,24 +158,20 @@ define <8 x i16> @abd_ext_v8i16_undef(<8 x i16> %a, <8 x i16> %b) nounwind {
162158
; SSE2-NEXT: psubq %xmm6, %xmm4
163159
; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm5[2],xmm1[3],xmm5[3]
164160
; SSE2-NEXT: psubq %xmm1, %xmm0
165-
; SSE2-NEXT: movdqa %xmm3, %xmm1
161+
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
166162
; SSE2-NEXT: psrad $31, %xmm1
167-
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
168163
; SSE2-NEXT: pxor %xmm1, %xmm3
169164
; SSE2-NEXT: psubq %xmm1, %xmm3
170-
; SSE2-NEXT: movdqa %xmm2, %xmm1
165+
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
171166
; SSE2-NEXT: psrad $31, %xmm1
172-
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
173167
; SSE2-NEXT: pxor %xmm1, %xmm2
174168
; SSE2-NEXT: psubq %xmm1, %xmm2
175-
; SSE2-NEXT: movdqa %xmm4, %xmm1
169+
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm4[1,1,3,3]
176170
; SSE2-NEXT: psrad $31, %xmm1
177-
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
178171
; SSE2-NEXT: pxor %xmm1, %xmm4
179172
; SSE2-NEXT: psubq %xmm1, %xmm4
180-
; SSE2-NEXT: movdqa %xmm0, %xmm1
173+
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
181174
; SSE2-NEXT: psrad $31, %xmm1
182-
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
183175
; SSE2-NEXT: pxor %xmm1, %xmm0
184176
; SSE2-NEXT: psubq %xmm1, %xmm0
185177
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
@@ -229,14 +221,12 @@ define <4 x i32> @abd_ext_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
229221
; SSE2-NEXT: psubq %xmm4, %xmm3
230222
; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
231223
; SSE2-NEXT: psubq %xmm1, %xmm0
232-
; SSE2-NEXT: movdqa %xmm3, %xmm1
224+
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
233225
; SSE2-NEXT: psrad $31, %xmm1
234-
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
235226
; SSE2-NEXT: pxor %xmm1, %xmm3
236227
; SSE2-NEXT: psubq %xmm1, %xmm3
237-
; SSE2-NEXT: movdqa %xmm0, %xmm1
228+
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
238229
; SSE2-NEXT: psrad $31, %xmm1
239-
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
240230
; SSE2-NEXT: pxor %xmm1, %xmm0
241231
; SSE2-NEXT: psubq %xmm1, %xmm0
242232
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm3[0,2]
@@ -276,14 +266,12 @@ define <4 x i32> @abd_ext_v4i32_undef(<4 x i32> %a, <4 x i32> %b) nounwind {
276266
; SSE2-NEXT: psubq %xmm4, %xmm3
277267
; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
278268
; SSE2-NEXT: psubq %xmm1, %xmm0
279-
; SSE2-NEXT: movdqa %xmm3, %xmm1
269+
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
280270
; SSE2-NEXT: psrad $31, %xmm1
281-
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
282271
; SSE2-NEXT: pxor %xmm1, %xmm3
283272
; SSE2-NEXT: psubq %xmm1, %xmm3
284-
; SSE2-NEXT: movdqa %xmm0, %xmm1
273+
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
285274
; SSE2-NEXT: psrad $31, %xmm1
286-
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
287275
; SSE2-NEXT: pxor %xmm1, %xmm0
288276
; SSE2-NEXT: psubq %xmm1, %xmm0
289277
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm3[0,2]

llvm/test/CodeGen/X86/avx512-cmp.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -191,8 +191,8 @@ define <8 x i32> @legalize_loop(<8 x double> %arg) {
191191
; KNL-NEXT: vxorpd %xmm1, %xmm1, %xmm1
192192
; KNL-NEXT: vcmpnltpd %zmm0, %zmm1, %k1
193193
; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
194-
; KNL-NEXT: vpsrld $31, %ymm0, %ymm1
195-
; KNL-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[3,2,1,0,7,6,5,4]
194+
; KNL-NEXT: vpshufd {{.*#+}} ymm1 = ymm0[3,2,1,0,7,6,5,4]
195+
; KNL-NEXT: vpsrld $31, %ymm1, %ymm1
196196
; KNL-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,0,1]
197197
; KNL-NEXT: vpsubd %ymm0, %ymm1, %ymm0
198198
; KNL-NEXT: retq
@@ -202,8 +202,8 @@ define <8 x i32> @legalize_loop(<8 x double> %arg) {
202202
; SKX-NEXT: vxorpd %xmm1, %xmm1, %xmm1
203203
; SKX-NEXT: vcmpnltpd %zmm0, %zmm1, %k0
204204
; SKX-NEXT: vpmovm2d %k0, %ymm0
205-
; SKX-NEXT: vpsrld $31, %ymm0, %ymm1
206-
; SKX-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[3,2,1,0,7,6,5,4]
205+
; SKX-NEXT: vpshufd {{.*#+}} ymm1 = ymm0[3,2,1,0,7,6,5,4]
206+
; SKX-NEXT: vpsrld $31, %ymm1, %ymm1
207207
; SKX-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,0,1]
208208
; SKX-NEXT: vpsubd %ymm0, %ymm1, %ymm0
209209
; SKX-NEXT: retq

llvm/test/CodeGen/X86/combine-abs.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -107,14 +107,12 @@ define <32 x i8> @combine_v32i8_abs_abs(<32 x i8> %a) {
107107
define <4 x i64> @combine_v4i64_abs_abs(<4 x i64> %a) {
108108
; SSE2-LABEL: combine_v4i64_abs_abs:
109109
; SSE2: # %bb.0:
110-
; SSE2-NEXT: movdqa %xmm0, %xmm2
110+
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
111111
; SSE2-NEXT: psrad $31, %xmm2
112-
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
113112
; SSE2-NEXT: pxor %xmm2, %xmm0
114113
; SSE2-NEXT: psubq %xmm2, %xmm0
115-
; SSE2-NEXT: movdqa %xmm1, %xmm2
114+
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
116115
; SSE2-NEXT: psrad $31, %xmm2
117-
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
118116
; SSE2-NEXT: pxor %xmm2, %xmm1
119117
; SSE2-NEXT: psubq %xmm2, %xmm1
120118
; SSE2-NEXT: retq

llvm/test/CodeGen/X86/combine-mul.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -325,9 +325,8 @@ define <16 x i8> @combine_mul_to_abs_v16i8(<16 x i8> %x) {
325325
define <2 x i64> @combine_mul_to_abs_v2i64(<2 x i64> %x) {
326326
; SSE-LABEL: combine_mul_to_abs_v2i64:
327327
; SSE: # %bb.0:
328-
; SSE-NEXT: movdqa %xmm0, %xmm1
328+
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
329329
; SSE-NEXT: psrad $31, %xmm1
330-
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
331330
; SSE-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
332331
; SSE-NEXT: movdqa %xmm0, %xmm2
333332
; SSE-NEXT: psrlq $32, %xmm2

llvm/test/CodeGen/X86/combine-ptest.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -265,9 +265,9 @@ define i32 @ptestz_v2i64_signbits(<2 x i64> %c, i32 %a, i32 %b) {
265265
; SSE41-LABEL: ptestz_v2i64_signbits:
266266
; SSE41: # %bb.0:
267267
; SSE41-NEXT: movl %edi, %eax
268-
; SSE41-NEXT: psrad $31, %xmm0
269268
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
270-
; SSE41-NEXT: ptest %xmm0, %xmm0
269+
; SSE41-NEXT: movmskps %xmm0, %ecx
270+
; SSE41-NEXT: testl %ecx, %ecx
271271
; SSE41-NEXT: cmovnel %esi, %eax
272272
; SSE41-NEXT: retq
273273
;

0 commit comments

Comments
 (0)