@@ -10,12 +10,11 @@ define <8 x i16> @cmp_ne_load_const(ptr %x) nounwind {
10
10
; SSE-LABEL: cmp_ne_load_const:
11
11
; SSE: # %bb.0:
12
12
; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
13
+ ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
13
14
; SSE-NEXT: pxor %xmm1, %xmm1
14
15
; SSE-NEXT: pcmpeqb %xmm0, %xmm1
15
16
; SSE-NEXT: pcmpeqd %xmm0, %xmm0
16
17
; SSE-NEXT: pxor %xmm1, %xmm0
17
- ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
18
- ; SSE-NEXT: psraw $8, %xmm0
19
18
; SSE-NEXT: retq
20
19
;
21
20
; AVX-LABEL: cmp_ne_load_const:
@@ -36,12 +35,11 @@ define <8 x i16> @cmp_ne_load_const_volatile(ptr %x) nounwind {
36
35
; SSE-LABEL: cmp_ne_load_const_volatile:
37
36
; SSE: # %bb.0:
38
37
; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
38
+ ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
39
39
; SSE-NEXT: pxor %xmm1, %xmm1
40
40
; SSE-NEXT: pcmpeqb %xmm0, %xmm1
41
41
; SSE-NEXT: pcmpeqd %xmm0, %xmm0
42
42
; SSE-NEXT: pxor %xmm1, %xmm0
43
- ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
44
- ; SSE-NEXT: psraw $8, %xmm0
45
43
; SSE-NEXT: retq
46
44
;
47
45
; AVX2-LABEL: cmp_ne_load_const_volatile:
@@ -75,15 +73,15 @@ define <8 x i16> @cmp_ne_load_const_extra_use1(ptr %x) nounwind {
75
73
; SSE-LABEL: cmp_ne_load_const_extra_use1:
76
74
; SSE: # %bb.0:
77
75
; SSE-NEXT: subq $24, %rsp
78
- ; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
79
- ; SSE-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill
76
+ ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
77
+ ; SSE-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
80
78
; SSE-NEXT: callq use_v8i8@PLT
81
- ; SSE-NEXT: pxor %xmm0 , %xmm0
82
- ; SSE-NEXT: pcmpeqb (%rsp), % xmm0 # 16-byte Folded Reload
83
- ; SSE-NEXT: pcmpeqd %xmm1, %xmm1
84
- ; SSE-NEXT: pxor %xmm0, %xmm1
85
- ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
86
- ; SSE-NEXT: psraw $8 , %xmm0
79
+ ; SSE-NEXT: movdqa (%rsp) , %xmm0 # 16-byte Reload
80
+ ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
81
+ ; SSE-NEXT: pxor %xmm1, %xmm1
82
+ ; SSE-NEXT: pcmpeqb %xmm0, %xmm1
83
+ ; SSE-NEXT: pcmpeqd % xmm0, % xmm0
84
+ ; SSE-NEXT: pxor %xmm1 , %xmm0
87
85
; SSE-NEXT: addq $24, %rsp
88
86
; SSE-NEXT: retq
89
87
;
@@ -135,9 +133,8 @@ define <8 x i16> @cmp_ne_load_const_extra_use2(ptr %x) nounwind {
135
133
; SSE-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill
136
134
; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
137
135
; SSE-NEXT: callq use_v8i1@PLT
138
- ; SSE-NEXT: punpcklbw (%rsp), %xmm0 # 16-byte Folded Reload
139
- ; SSE-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7]
140
- ; SSE-NEXT: psraw $8, %xmm0
136
+ ; SSE-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload
137
+ ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
141
138
; SSE-NEXT: addq $24, %rsp
142
139
; SSE-NEXT: retq
143
140
;
@@ -183,12 +180,11 @@ define <8 x i16> @cmp_ne_no_load_const(i64 %x) nounwind {
183
180
; SSE-LABEL: cmp_ne_no_load_const:
184
181
; SSE: # %bb.0:
185
182
; SSE-NEXT: movq %rdi, %xmm0
183
+ ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
186
184
; SSE-NEXT: pxor %xmm1, %xmm1
187
185
; SSE-NEXT: pcmpeqb %xmm0, %xmm1
188
186
; SSE-NEXT: pcmpeqd %xmm0, %xmm0
189
187
; SSE-NEXT: pxor %xmm1, %xmm0
190
- ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
191
- ; SSE-NEXT: psraw $8, %xmm0
192
188
; SSE-NEXT: retq
193
189
;
194
190
; AVX2-LABEL: cmp_ne_no_load_const:
@@ -223,11 +219,10 @@ define <4 x i32> @cmp_ult_load_const(ptr %x) nounwind {
223
219
; SSE-NEXT: movdqa {{.*#+}} xmm1 = <42,214,0,255,u,u,u,u,u,u,u,u,u,u,u,u>
224
220
; SSE-NEXT: pmaxub %xmm0, %xmm1
225
221
; SSE-NEXT: pcmpeqb %xmm0, %xmm1
222
+ ; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
223
+ ; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
226
224
; SSE-NEXT: pcmpeqd %xmm0, %xmm0
227
225
; SSE-NEXT: pxor %xmm1, %xmm0
228
- ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
229
- ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
230
- ; SSE-NEXT: psrad $24, %xmm0
231
226
; SSE-NEXT: retq
232
227
;
233
228
; AVX-LABEL: cmp_ult_load_const:
@@ -251,11 +246,10 @@ define <3 x i32> @cmp_ult_load_const_bad_type(ptr %x) nounwind {
251
246
; SSE-NEXT: movdqa {{.*#+}} xmm1 = <42,214,0,u,u,u,u,u,u,u,u,u,u,u,u,u>
252
247
; SSE-NEXT: pmaxub %xmm0, %xmm1
253
248
; SSE-NEXT: pcmpeqb %xmm0, %xmm1
249
+ ; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
250
+ ; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
254
251
; SSE-NEXT: pcmpeqd %xmm0, %xmm0
255
252
; SSE-NEXT: pxor %xmm1, %xmm0
256
- ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
257
- ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
258
- ; SSE-NEXT: psrad $24, %xmm0
259
253
; SSE-NEXT: retq
260
254
;
261
255
; AVX2-LABEL: cmp_ult_load_const_bad_type:
@@ -288,12 +282,11 @@ define <3 x i32> @cmp_ult_load_const_bad_type(ptr %x) nounwind {
288
282
define <4 x i32 > @cmp_slt_load_const (ptr %x ) nounwind {
289
283
; SSE-LABEL: cmp_slt_load_const:
290
284
; SSE: # %bb.0:
291
- ; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
292
- ; SSE-NEXT: movdqa {{.*#+}} xmm1 = <42,214,0,255,u,u,u,u,u,u,u,u,u,u,u,u>
293
- ; SSE-NEXT: pcmpgtb %xmm0 , %xmm1
294
- ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[ 7]
285
+ ; SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
286
+ ; SSE-NEXT: movdqa {{.*#+}} xmm0 = <42,214,0,255,u,u,u,u,u,u,u,u,u,u,u,u>
287
+ ; SSE-NEXT: pcmpgtb %xmm1 , %xmm0
288
+ ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7, 7]
295
289
; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
296
- ; SSE-NEXT: psrad $24, %xmm0
297
290
; SSE-NEXT: retq
298
291
;
299
292
; AVX-LABEL: cmp_slt_load_const:
@@ -314,11 +307,9 @@ define <2 x i64> @cmp_ne_zextload(ptr %x, ptr %y) nounwind {
314
307
; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
315
308
; SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
316
309
; SSE-NEXT: pcmpeqd %xmm0, %xmm1
310
+ ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
317
311
; SSE-NEXT: pcmpeqd %xmm0, %xmm0
318
312
; SSE-NEXT: pxor %xmm1, %xmm0
319
- ; SSE-NEXT: pxor %xmm1, %xmm1
320
- ; SSE-NEXT: pcmpgtd %xmm0, %xmm1
321
- ; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
322
313
; SSE-NEXT: retq
323
314
;
324
315
; AVX2-LABEL: cmp_ne_zextload:
@@ -353,10 +344,9 @@ define <8 x i16> @cmp_ugt_zextload(ptr %x, ptr %y) nounwind {
353
344
; SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
354
345
; SSE-NEXT: pminub %xmm0, %xmm1
355
346
; SSE-NEXT: pcmpeqb %xmm0, %xmm1
347
+ ; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
356
348
; SSE-NEXT: pcmpeqd %xmm0, %xmm0
357
349
; SSE-NEXT: pxor %xmm1, %xmm0
358
- ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
359
- ; SSE-NEXT: psraw $8, %xmm0
360
350
; SSE-NEXT: retq
361
351
;
362
352
; AVX-LABEL: cmp_ugt_zextload:
@@ -381,7 +371,6 @@ define <8 x i16> @cmp_sgt_zextload(ptr %x, ptr %y) nounwind {
381
371
; SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
382
372
; SSE-NEXT: pcmpgtb %xmm1, %xmm0
383
373
; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
384
- ; SSE-NEXT: psraw $8, %xmm0
385
374
; SSE-NEXT: retq
386
375
;
387
376
; AVX-LABEL: cmp_sgt_zextload:
@@ -407,10 +396,9 @@ define <8 x i32> @cmp_ne_zextload_from_legal_op(ptr %x, ptr %y) {
407
396
; SSE-NEXT: pcmpeqw (%rsi), %xmm0
408
397
; SSE-NEXT: pcmpeqd %xmm1, %xmm1
409
398
; SSE-NEXT: pxor %xmm0, %xmm1
399
+ ; SSE-NEXT: movdqa %xmm1, %xmm0
410
400
; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
411
- ; SSE-NEXT: psrad $16, %xmm0
412
401
; SSE-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
413
- ; SSE-NEXT: psrad $16, %xmm1
414
402
; SSE-NEXT: retq
415
403
;
416
404
; AVX2-LABEL: cmp_ne_zextload_from_legal_op:
@@ -448,16 +436,15 @@ define <8 x i32> @PR50055(ptr %src, ptr %dst) nounwind {
448
436
; SSE-NEXT: movdqa %xmm1, %xmm0
449
437
; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
450
438
; SSE-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
439
+ ; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
451
440
; SSE-NEXT: pcmpeqb %xmm3, %xmm2
452
441
; SSE-NEXT: pcmpeqd %xmm3, %xmm3
453
442
; SSE-NEXT: pxor %xmm2, %xmm3
454
- ; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7]
455
- ; SSE-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
456
- ; SSE-NEXT: psrad $24, %xmm3
457
- ; SSE-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4,4,5,5,6,6,7,7]
458
- ; SSE-NEXT: psrad $24, %xmm2
459
- ; SSE-NEXT: movdqa %xmm2, 16(%rsi)
460
- ; SSE-NEXT: movdqa %xmm3, (%rsi)
443
+ ; SSE-NEXT: movdqa %xmm3, %xmm2
444
+ ; SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3]
445
+ ; SSE-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4,4,5,5,6,6,7,7]
446
+ ; SSE-NEXT: movdqa %xmm3, 16(%rsi)
447
+ ; SSE-NEXT: movdqa %xmm2, (%rsi)
461
448
; SSE-NEXT: retq
462
449
;
463
450
; AVX-LABEL: PR50055:
@@ -484,12 +471,11 @@ define <8 x i16> @multi_use_narrower_size(ptr %src, ptr %dst) nounwind {
484
471
; SSE-NEXT: pxor %xmm2, %xmm2
485
472
; SSE-NEXT: movdqa %xmm1, %xmm0
486
473
; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
487
- ; SSE-NEXT: pcmpeqb %xmm2, %xmm1
488
474
; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
475
+ ; SSE-NEXT: pcmpeqb %xmm2, %xmm1
476
+ ; SSE-NEXT: movdqa %xmm1, %xmm2
489
477
; SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
490
- ; SSE-NEXT: psrad $24, %xmm2
491
478
; SSE-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
492
- ; SSE-NEXT: psrad $24, %xmm1
493
479
; SSE-NEXT: movdqa %xmm1, 16(%rsi)
494
480
; SSE-NEXT: movdqa %xmm2, (%rsi)
495
481
; SSE-NEXT: retq
@@ -524,9 +510,8 @@ define <8 x i32> @multi_use_wider_size(ptr %src, ptr %dst) nounwind {
524
510
; SSE-NEXT: movdqa %xmm1, %xmm0
525
511
; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
526
512
; SSE-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
527
- ; SSE-NEXT: pcmpeqb %xmm3, %xmm2
528
513
; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
529
- ; SSE-NEXT: psraw $8 , %xmm2
514
+ ; SSE-NEXT: pcmpeqb %xmm3 , %xmm2
530
515
; SSE-NEXT: movdqa %xmm2, (%rsi)
531
516
; SSE-NEXT: retq
532
517
;
@@ -556,13 +541,12 @@ define <4 x i64> @PR50055_signed(ptr %src, ptr %dst) {
556
541
; SSE-NEXT: psrad $24, %xmm0
557
542
; SSE-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
558
543
; SSE-NEXT: psrad $24, %xmm1
544
+ ; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
559
545
; SSE-NEXT: pxor %xmm3, %xmm3
560
546
; SSE-NEXT: pcmpgtb %xmm3, %xmm2
561
- ; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
547
+ ; SSE-NEXT: movdqa % xmm2, %xmm3
562
548
; SSE-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
563
- ; SSE-NEXT: psrad $24, %xmm3
564
549
; SSE-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4,4,5,5,6,6,7,7]
565
- ; SSE-NEXT: psrad $24, %xmm2
566
550
; SSE-NEXT: movdqa %xmm2, 16(%rsi)
567
551
; SSE-NEXT: movdqa %xmm3, (%rsi)
568
552
; SSE-NEXT: retq
0 commit comments