Skip to content

Commit 67c3f2b

Browse files
authored
[X86] mayFoldIntoStore - peek through oneuse bitcase users to find a store node (llvm#123366)
mayFoldIntoStore currently just checks the direct (oneuse) user of a SDValue to check its stored, which prevents cases where we bitcast the value prior to storing (usually the bitcast will be removed later). This patch peeks up through oneuse BITCAST nodes chain to see if its eventually stored. The main use of mayFoldIntoStore is v8i16 EXTRACT_VECTOR_ELT lowering which will only use PEXTRW/PEXTRB for index0 extractions (vs the faster MOVD) if the extracted value will be folded into a store on SSE41+ targets. Fixes llvm#107086
1 parent c3a935e commit 67c3f2b

File tree

11 files changed

+50
-79
lines changed

11 files changed

+50
-79
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2812,7 +2812,16 @@ bool X86::mayFoldLoadIntoBroadcastFromMem(SDValue Op, MVT EltVT,
28122812
}
28132813

28142814
bool X86::mayFoldIntoStore(SDValue Op) {
2815-
return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->user_begin());
2815+
if (!Op.hasOneUse())
2816+
return false;
2817+
// Peek through (oneuse) bitcast users
2818+
SDNode *User = *Op->user_begin();
2819+
while (User->getOpcode() == ISD::BITCAST) {
2820+
if (!User->hasOneUse())
2821+
return false;
2822+
User = *User->user_begin();
2823+
}
2824+
return ISD::isNormalStore(User);
28162825
}
28172826

28182827
bool X86::mayFoldIntoZeroExtend(SDValue Op) {

llvm/test/CodeGen/X86/canonicalize-vars-f16-type.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,7 @@ define void @v_test_canonicalize__half(half addrspace(1)* %out) nounwind {
5353
; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
5454
; AVX512-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
5555
; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
56-
; AVX512-NEXT: vmovd %xmm0, %eax
57-
; AVX512-NEXT: movw %ax, (%rdi)
56+
; AVX512-NEXT: vpextrw $0, %xmm0, (%rdi)
5857
; AVX512-NEXT: retq
5958
entry:
6059
%val = load half, half addrspace(1)* %out

llvm/test/CodeGen/X86/cvt16.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,7 @@ define void @test1(float %src, ptr %dest) nounwind {
3434
; F16C-LABEL: test1:
3535
; F16C: # %bb.0:
3636
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
37-
; F16C-NEXT: vmovd %xmm0, %eax
38-
; F16C-NEXT: movw %ax, (%rdi)
37+
; F16C-NEXT: vpextrw $0, %xmm0, (%rdi)
3938
; F16C-NEXT: retq
4039
;
4140
; SOFTFLOAT-LABEL: test1:

llvm/test/CodeGen/X86/fp-strict-scalar-fp16.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -316,8 +316,7 @@ define void @fptrunc_float_to_f16(ptr %val, ptr%ret) nounwind strictfp {
316316
; AVX: # %bb.0:
317317
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
318318
; AVX-NEXT: vcvtps2ph $4, %xmm0, %xmm0
319-
; AVX-NEXT: vmovd %xmm0, %eax
320-
; AVX-NEXT: movw %ax, (%rsi)
319+
; AVX-NEXT: vpextrw $0, %xmm0, (%rsi)
321320
; AVX-NEXT: retq
322321
;
323322
; X86-LABEL: fptrunc_float_to_f16:
@@ -411,8 +410,7 @@ define void @fsqrt_f16(ptr %a) nounwind strictfp {
411410
; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
412411
; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
413412
; AVX-NEXT: vcvtps2ph $4, %xmm0, %xmm0
414-
; AVX-NEXT: vmovd %xmm0, %eax
415-
; AVX-NEXT: movw %ax, (%rdi)
413+
; AVX-NEXT: vpextrw $0, %xmm0, (%rdi)
416414
; AVX-NEXT: retq
417415
;
418416
; X86-LABEL: fsqrt_f16:

llvm/test/CodeGen/X86/fp16-libcalls.ll

Lines changed: 20 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,7 @@ define void @test_half_ceil(half %a0, ptr %p0) nounwind {
1212
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
1313
; F16C-NEXT: vroundss $10, %xmm0, %xmm0, %xmm0
1414
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
15-
; F16C-NEXT: vmovd %xmm0, %eax
16-
; F16C-NEXT: movw %ax, (%rdi)
15+
; F16C-NEXT: vpextrw $0, %xmm0, (%rdi)
1716
; F16C-NEXT: retq
1817
;
1918
; FP16-LABEL: test_half_ceil:
@@ -108,8 +107,7 @@ define void @test_half_cos(half %a0, ptr %p0) nounwind {
108107
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
109108
; F16C-NEXT: callq cosf@PLT
110109
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
111-
; F16C-NEXT: vmovd %xmm0, %eax
112-
; F16C-NEXT: movw %ax, (%rbx)
110+
; F16C-NEXT: vpextrw $0, %xmm0, (%rbx)
113111
; F16C-NEXT: popq %rbx
114112
; F16C-NEXT: retq
115113
;
@@ -167,8 +165,7 @@ define void @test_half_exp(half %a0, ptr %p0) nounwind {
167165
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
168166
; F16C-NEXT: callq expf@PLT
169167
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
170-
; F16C-NEXT: vmovd %xmm0, %eax
171-
; F16C-NEXT: movw %ax, (%rbx)
168+
; F16C-NEXT: vpextrw $0, %xmm0, (%rbx)
172169
; F16C-NEXT: popq %rbx
173170
; F16C-NEXT: retq
174171
;
@@ -226,8 +223,7 @@ define void @test_half_exp2(half %a0, ptr %p0) nounwind {
226223
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
227224
; F16C-NEXT: callq exp2f@PLT
228225
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
229-
; F16C-NEXT: vmovd %xmm0, %eax
230-
; F16C-NEXT: movw %ax, (%rbx)
226+
; F16C-NEXT: vpextrw $0, %xmm0, (%rbx)
231227
; F16C-NEXT: popq %rbx
232228
; F16C-NEXT: retq
233229
;
@@ -285,8 +281,7 @@ define void @test_half_exp10(half %a0, ptr %p0) nounwind {
285281
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
286282
; F16C-NEXT: callq exp10f@PLT
287283
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
288-
; F16C-NEXT: vmovd %xmm0, %eax
289-
; F16C-NEXT: movw %ax, (%rbx)
284+
; F16C-NEXT: vpextrw $0, %xmm0, (%rbx)
290285
; F16C-NEXT: popq %rbx
291286
; F16C-NEXT: retq
292287
;
@@ -342,8 +337,7 @@ define void @test_half_fabs(half %a0, ptr %p0) nounwind {
342337
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
343338
; F16C-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
344339
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
345-
; F16C-NEXT: vmovd %xmm0, %eax
346-
; F16C-NEXT: movw %ax, (%rdi)
340+
; F16C-NEXT: vpextrw $0, %xmm0, (%rdi)
347341
; F16C-NEXT: retq
348342
;
349343
; FP16-LABEL: test_half_fabs:
@@ -383,8 +377,7 @@ define void @test_half_floor(half %a0, ptr %p0) nounwind {
383377
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
384378
; F16C-NEXT: vroundss $9, %xmm0, %xmm0, %xmm0
385379
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
386-
; F16C-NEXT: vmovd %xmm0, %eax
387-
; F16C-NEXT: movw %ax, (%rdi)
380+
; F16C-NEXT: vpextrw $0, %xmm0, (%rdi)
388381
; F16C-NEXT: retq
389382
;
390383
; FP16-LABEL: test_half_floor:
@@ -438,8 +431,7 @@ define void @test_half_fma(half %a0, half %a1, half %a2, ptr %p0) nounwind {
438431
; F16C-NEXT: vcvtph2ps %xmm2, %xmm2
439432
; F16C-NEXT: callq fmaf@PLT
440433
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
441-
; F16C-NEXT: vmovd %xmm0, %eax
442-
; F16C-NEXT: movw %ax, (%rbx)
434+
; F16C-NEXT: vpextrw $0, %xmm0, (%rbx)
443435
; F16C-NEXT: popq %rbx
444436
; F16C-NEXT: retq
445437
;
@@ -525,8 +517,7 @@ define void @test_half_fneg(half %a0, ptr %p0) nounwind {
525517
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
526518
; F16C-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
527519
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
528-
; F16C-NEXT: vmovd %xmm0, %eax
529-
; F16C-NEXT: movw %ax, (%rdi)
520+
; F16C-NEXT: vpextrw $0, %xmm0, (%rdi)
530521
; F16C-NEXT: retq
531522
;
532523
; FP16-LABEL: test_half_fneg:
@@ -568,8 +559,7 @@ define void @test_half_log(half %a0, ptr %p0) nounwind {
568559
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
569560
; F16C-NEXT: callq logf@PLT
570561
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
571-
; F16C-NEXT: vmovd %xmm0, %eax
572-
; F16C-NEXT: movw %ax, (%rbx)
562+
; F16C-NEXT: vpextrw $0, %xmm0, (%rbx)
573563
; F16C-NEXT: popq %rbx
574564
; F16C-NEXT: retq
575565
;
@@ -627,8 +617,7 @@ define void @test_half_log2(half %a0, ptr %p0) nounwind {
627617
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
628618
; F16C-NEXT: callq log2f@PLT
629619
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
630-
; F16C-NEXT: vmovd %xmm0, %eax
631-
; F16C-NEXT: movw %ax, (%rbx)
620+
; F16C-NEXT: vpextrw $0, %xmm0, (%rbx)
632621
; F16C-NEXT: popq %rbx
633622
; F16C-NEXT: retq
634623
;
@@ -686,8 +675,7 @@ define void @test_half_log10(half %a0, ptr %p0) nounwind {
686675
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
687676
; F16C-NEXT: callq log10f@PLT
688677
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
689-
; F16C-NEXT: vmovd %xmm0, %eax
690-
; F16C-NEXT: movw %ax, (%rbx)
678+
; F16C-NEXT: vpextrw $0, %xmm0, (%rbx)
691679
; F16C-NEXT: popq %rbx
692680
; F16C-NEXT: retq
693681
;
@@ -743,8 +731,7 @@ define void @test_half_nearbyint(half %a0, ptr %p0) nounwind {
743731
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
744732
; F16C-NEXT: vroundss $12, %xmm0, %xmm0, %xmm0
745733
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
746-
; F16C-NEXT: vmovd %xmm0, %eax
747-
; F16C-NEXT: movw %ax, (%rdi)
734+
; F16C-NEXT: vpextrw $0, %xmm0, (%rdi)
748735
; F16C-NEXT: retq
749736
;
750737
; FP16-LABEL: test_half_nearbyint:
@@ -797,8 +784,7 @@ define void @test_half_pow(half %a0, half %a1, ptr %p0) nounwind {
797784
; F16C-NEXT: vcvtph2ps %xmm1, %xmm1
798785
; F16C-NEXT: callq powf@PLT
799786
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
800-
; F16C-NEXT: vmovd %xmm0, %eax
801-
; F16C-NEXT: movw %ax, (%rbx)
787+
; F16C-NEXT: vpextrw $0, %xmm0, (%rbx)
802788
; F16C-NEXT: popq %rbx
803789
; F16C-NEXT: retq
804790
;
@@ -876,8 +862,7 @@ define void @test_half_powi(half %a0, i32 %a1, ptr %p0) nounwind {
876862
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
877863
; F16C-NEXT: callq __powisf2@PLT
878864
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
879-
; F16C-NEXT: vmovd %xmm0, %eax
880-
; F16C-NEXT: movw %ax, (%rbx)
865+
; F16C-NEXT: vpextrw $0, %xmm0, (%rbx)
881866
; F16C-NEXT: popq %rbx
882867
; F16C-NEXT: retq
883868
;
@@ -943,8 +928,7 @@ define void @test_half_rint(half %a0, ptr %p0) nounwind {
943928
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
944929
; F16C-NEXT: vroundss $4, %xmm0, %xmm0, %xmm0
945930
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
946-
; F16C-NEXT: vmovd %xmm0, %eax
947-
; F16C-NEXT: movw %ax, (%rdi)
931+
; F16C-NEXT: vpextrw $0, %xmm0, (%rdi)
948932
; F16C-NEXT: retq
949933
;
950934
; FP16-LABEL: test_half_rint:
@@ -996,8 +980,7 @@ define void @test_half_sin(half %a0, ptr %p0) nounwind {
996980
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
997981
; F16C-NEXT: callq sinf@PLT
998982
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
999-
; F16C-NEXT: vmovd %xmm0, %eax
1000-
; F16C-NEXT: movw %ax, (%rbx)
983+
; F16C-NEXT: vpextrw $0, %xmm0, (%rbx)
1001984
; F16C-NEXT: popq %rbx
1002985
; F16C-NEXT: retq
1003986
;
@@ -1053,8 +1036,7 @@ define void @test_half_sqrt(half %a0, ptr %p0) nounwind {
10531036
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
10541037
; F16C-NEXT: vsqrtss %xmm0, %xmm0, %xmm0
10551038
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
1056-
; F16C-NEXT: vmovd %xmm0, %eax
1057-
; F16C-NEXT: movw %ax, (%rdi)
1039+
; F16C-NEXT: vpextrw $0, %xmm0, (%rdi)
10581040
; F16C-NEXT: retq
10591041
;
10601042
; FP16-LABEL: test_half_sqrt:
@@ -1107,8 +1089,7 @@ define void @test_half_tan(half %a0, ptr %p0) nounwind {
11071089
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
11081090
; F16C-NEXT: callq tanf@PLT
11091091
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
1110-
; F16C-NEXT: vmovd %xmm0, %eax
1111-
; F16C-NEXT: movw %ax, (%rbx)
1092+
; F16C-NEXT: vpextrw $0, %xmm0, (%rbx)
11121093
; F16C-NEXT: popq %rbx
11131094
; F16C-NEXT: retq
11141095
;
@@ -1164,8 +1145,7 @@ define void @test_half_trunc(half %a0, ptr %p0) nounwind {
11641145
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
11651146
; F16C-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0
11661147
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
1167-
; F16C-NEXT: vmovd %xmm0, %eax
1168-
; F16C-NEXT: movw %ax, (%rdi)
1148+
; F16C-NEXT: vpextrw $0, %xmm0, (%rdi)
11691149
; F16C-NEXT: retq
11701150
;
11711151
; FP16-LABEL: test_half_trunc:

llvm/test/CodeGen/X86/half-constrained.ll

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -176,8 +176,7 @@ define void @float_to_half(float %0) strictfp {
176176
; X86-F16C: # %bb.0:
177177
; X86-F16C-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
178178
; X86-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
179-
; X86-F16C-NEXT: vmovd %xmm0, %eax
180-
; X86-F16C-NEXT: movw %ax, a
179+
; X86-F16C-NEXT: vpextrw $0, %xmm0, a
181180
; X86-F16C-NEXT: retl
182181
;
183182
; X64-NOF16C-LABEL: float_to_half:
@@ -197,9 +196,8 @@ define void @float_to_half(float %0) strictfp {
197196
; X64-F16C-NEXT: vxorps %xmm1, %xmm1, %xmm1
198197
; X64-F16C-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
199198
; X64-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
200-
; X64-F16C-NEXT: vmovd %xmm0, %eax
201-
; X64-F16C-NEXT: movq a@GOTPCREL(%rip), %rcx
202-
; X64-F16C-NEXT: movw %ax, (%rcx)
199+
; X64-F16C-NEXT: movq a@GOTPCREL(%rip), %rax
200+
; X64-F16C-NEXT: vpextrw $0, %xmm0, (%rax)
203201
; X64-F16C-NEXT: retq
204202
%2 = tail call half @llvm.experimental.constrained.fptrunc.f16.f32(float %0, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
205203
store half %2, ptr @a, align 2
@@ -354,8 +352,7 @@ define void @add() strictfp {
354352
; X86-F16C-NEXT: vxorps %xmm1, %xmm1, %xmm1
355353
; X86-F16C-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
356354
; X86-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
357-
; X86-F16C-NEXT: vmovd %xmm0, %eax
358-
; X86-F16C-NEXT: movw %ax, c
355+
; X86-F16C-NEXT: vpextrw $0, %xmm0, c
359356
; X86-F16C-NEXT: retl
360357
;
361358
; X64-NOF16C-LABEL: add:
@@ -392,9 +389,8 @@ define void @add() strictfp {
392389
; X64-F16C-NEXT: vxorps %xmm1, %xmm1, %xmm1
393390
; X64-F16C-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
394391
; X64-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
395-
; X64-F16C-NEXT: vmovd %xmm0, %eax
396-
; X64-F16C-NEXT: movq c@GOTPCREL(%rip), %rcx
397-
; X64-F16C-NEXT: movw %ax, (%rcx)
392+
; X64-F16C-NEXT: movq c@GOTPCREL(%rip), %rax
393+
; X64-F16C-NEXT: vpextrw $0, %xmm0, (%rax)
398394
; X64-F16C-NEXT: retq
399395
%1 = load half, ptr @a, align 2
400396
%2 = tail call float @llvm.experimental.constrained.fpext.f32.f16(half %1, metadata !"fpexcept.strict") #0

llvm/test/CodeGen/X86/half-darwin.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,7 @@ define void @truncsfhf(float %in, ptr %ptr) nounwind {
1616
; CHECK-F16C-LABEL: truncsfhf:
1717
; CHECK-F16C: ## %bb.0:
1818
; CHECK-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
19-
; CHECK-F16C-NEXT: vmovd %xmm0, %eax
20-
; CHECK-F16C-NEXT: movw %ax, (%rdi)
19+
; CHECK-F16C-NEXT: vpextrw $0, %xmm0, (%rdi)
2120
; CHECK-F16C-NEXT: retq
2221
;
2322
; CHECK-FP16-LABEL: truncsfhf:
@@ -108,8 +107,7 @@ define void @strict_truncsfhf(float %in, ptr %ptr) nounwind strictfp {
108107
; CHECK-F16C-NEXT: vxorps %xmm1, %xmm1, %xmm1
109108
; CHECK-F16C-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
110109
; CHECK-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
111-
; CHECK-F16C-NEXT: vmovd %xmm0, %eax
112-
; CHECK-F16C-NEXT: movw %ax, (%rdi)
110+
; CHECK-F16C-NEXT: vpextrw $0, %xmm0, (%rdi)
113111
; CHECK-F16C-NEXT: retq
114112
;
115113
; CHECK-FP16-LABEL: strict_truncsfhf:

llvm/test/CodeGen/X86/half.ll

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -146,8 +146,7 @@ define void @test_trunc32(float %in, ptr %addr) #0 {
146146
; BWON-F16C-LABEL: test_trunc32:
147147
; BWON-F16C: # %bb.0:
148148
; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
149-
; BWON-F16C-NEXT: vmovd %xmm0, %eax
150-
; BWON-F16C-NEXT: movw %ax, (%rdi)
149+
; BWON-F16C-NEXT: vpextrw $0, %xmm0, (%rdi)
151150
; BWON-F16C-NEXT: retq
152151
;
153152
; CHECK-I686-LABEL: test_trunc32:
@@ -265,8 +264,7 @@ define void @test_sitofp_i64(i64 %a, ptr %p) #0 {
265264
; BWON-F16C: # %bb.0:
266265
; BWON-F16C-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0
267266
; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
268-
; BWON-F16C-NEXT: vmovd %xmm0, %eax
269-
; BWON-F16C-NEXT: movw %ax, (%rsi)
267+
; BWON-F16C-NEXT: vpextrw $0, %xmm0, (%rsi)
270268
; BWON-F16C-NEXT: retq
271269
;
272270
; CHECK-I686-LABEL: test_sitofp_i64:
@@ -398,8 +396,7 @@ define void @test_uitofp_i64(i64 %a, ptr %p) #0 {
398396
; BWON-F16C-NEXT: vaddss %xmm0, %xmm0, %xmm0
399397
; BWON-F16C-NEXT: .LBB10_3:
400398
; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
401-
; BWON-F16C-NEXT: vmovd %xmm0, %eax
402-
; BWON-F16C-NEXT: movw %ax, (%rsi)
399+
; BWON-F16C-NEXT: vpextrw $0, %xmm0, (%rsi)
403400
; BWON-F16C-NEXT: retq
404401
;
405402
; CHECK-I686-LABEL: test_uitofp_i64:
@@ -1075,8 +1072,7 @@ define void @main.158() #0 {
10751072
; BWON-F16C-NEXT: vmovss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0]
10761073
; BWON-F16C-NEXT: .LBB20_2: # %entry
10771074
; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
1078-
; BWON-F16C-NEXT: vmovd %xmm0, %eax
1079-
; BWON-F16C-NEXT: movw %ax, (%rax)
1075+
; BWON-F16C-NEXT: vpextrw $0, %xmm0, (%rax)
10801076
; BWON-F16C-NEXT: retq
10811077
;
10821078
; CHECK-I686-LABEL: main.158:

llvm/test/CodeGen/X86/pr91005.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,7 @@ define void @PR91005(ptr %0) minsize {
1616
; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
1717
; CHECK-NEXT: vmulss %xmm1, %xmm0, %xmm0
1818
; CHECK-NEXT: vcvtps2ph $4, %xmm0, %xmm0
19-
; CHECK-NEXT: vmovd %xmm0, %eax
20-
; CHECK-NEXT: movw %ax, (%rdi)
19+
; CHECK-NEXT: vpextrw $0, %xmm0, (%rdi)
2120
; CHECK-NEXT: .LBB0_2: # %common.ret
2221
; CHECK-NEXT: retq
2322
%2 = bitcast <2 x half> poison to <2 x i16>

llvm/test/CodeGen/X86/pr95278.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,7 @@ define void @PR95278(ptr %p0, ptr %p1) {
88
; CHECK-NEXT: vextractf32x4 $3, %zmm0, %xmm0
99
; CHECK-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
1010
; CHECK-NEXT: vcvtps2ph $4, %xmm0, %xmm0
11-
; CHECK-NEXT: vmovd %xmm0, %eax
12-
; CHECK-NEXT: movw %ax, (%rsi)
11+
; CHECK-NEXT: vpextrw $0, %xmm0, (%rsi)
1312
; CHECK-NEXT: vzeroupper
1413
; CHECK-NEXT: retq
1514
%load = load <1024 x half>, ptr %p0, align 2

llvm/test/CodeGen/X86/vector-half-conversions.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2596,15 +2596,13 @@ define void @store_cvt_f32_to_i16(float %a0, ptr %a1) nounwind {
25962596
; F16C-LABEL: store_cvt_f32_to_i16:
25972597
; F16C: # %bb.0:
25982598
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
2599-
; F16C-NEXT: vmovd %xmm0, %eax
2600-
; F16C-NEXT: movw %ax, (%rdi)
2599+
; F16C-NEXT: vpextrw $0, %xmm0, (%rdi)
26012600
; F16C-NEXT: retq
26022601
;
26032602
; AVX512-LABEL: store_cvt_f32_to_i16:
26042603
; AVX512: # %bb.0:
26052604
; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
2606-
; AVX512-NEXT: vmovd %xmm0, %eax
2607-
; AVX512-NEXT: movw %ax, (%rdi)
2605+
; AVX512-NEXT: vpextrw $0, %xmm0, (%rdi)
26082606
; AVX512-NEXT: retq
26092607
%1 = fptrunc float %a0 to half
26102608
%2 = bitcast half %1 to i16

0 commit comments

Comments
 (0)