Skip to content

Commit d18bee2

Browse files
committed
[X86] combineConcatVectorOps - concatenate FADD/FSUB/FMUL ops if we don't increase the number of INSERT_SUBVECTOR nodes.
FADD/FSUB/FMUL are usually less port-bound than INSERT_SUBVECTOR, so only concatenate if it reduces the instruction count and doesn't introduce extra INSERT_SUBVECTOR nodes.
1 parent e933c05 commit d18bee2

File tree

4 files changed

+127
-64
lines changed

4 files changed

+127
-64
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

+13-1
Original file line numberDiff line numberDiff line change
@@ -55529,7 +55529,19 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
5552955529
}
5553055530
break;
5553155531
// Due to VADD, VSUB, VMUL can executed on more ports than VINSERT and
55532-
// their latency are short, so here we don't replace them.
55532+
// their latency are short, so here we don't replace them unless we won't
55533+
// introduce extra VINSERT.
55534+
case ISD::FADD:
55535+
case ISD::FSUB:
55536+
case ISD::FMUL:
55537+
if (!IsSplat && (IsConcatFree(VT, Ops, 0) || IsConcatFree(VT, Ops, 1)) &&
55538+
(VT.is256BitVector() ||
55539+
(VT.is512BitVector() && Subtarget.useAVX512Regs()))) {
55540+
return DAG.getNode(Op0.getOpcode(), DL, VT,
55541+
ConcatSubOperand(VT, Ops, 0),
55542+
ConcatSubOperand(VT, Ops, 1));
55543+
}
55544+
break;
5553355545
case ISD::FDIV:
5553455546
if (!IsSplat && (VT.is256BitVector() ||
5553555547
(VT.is512BitVector() && Subtarget.useAVX512Regs()))) {

llvm/test/CodeGen/X86/widen_fadd.ll

+38-21
Original file line numberDiff line numberDiff line change
@@ -373,13 +373,35 @@ define <8 x float> @widen_fadd_v4f32_v8f32_const(<4 x float> %x, <4 x float> %y)
373373
; SSE-NEXT: addps %xmm2, %xmm1
374374
; SSE-NEXT: retq
375375
;
376-
; AVX-LABEL: widen_fadd_v4f32_v8f32_const:
377-
; AVX: # %bb.0:
378-
; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [2.0E+0,2.0E+0,2.0E+0,2.0E+0]
379-
; AVX-NEXT: vaddps %xmm2, %xmm0, %xmm0
380-
; AVX-NEXT: vaddps %xmm2, %xmm1, %xmm1
381-
; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
382-
; AVX-NEXT: retq
376+
; AVX1-LABEL: widen_fadd_v4f32_v8f32_const:
377+
; AVX1: # %bb.0:
378+
; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
379+
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
380+
; AVX1-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
381+
; AVX1-NEXT: retq
382+
;
383+
; AVX2-LABEL: widen_fadd_v4f32_v8f32_const:
384+
; AVX2: # %bb.0:
385+
; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
386+
; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
387+
; AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0]
388+
; AVX2-NEXT: vaddps %ymm1, %ymm0, %ymm0
389+
; AVX2-NEXT: retq
390+
;
391+
; AVX512F-LABEL: widen_fadd_v4f32_v8f32_const:
392+
; AVX512F: # %bb.0:
393+
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
394+
; AVX512F-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
395+
; AVX512F-NEXT: vbroadcastss {{.*#+}} ymm1 = [2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0]
396+
; AVX512F-NEXT: vaddps %ymm1, %ymm0, %ymm0
397+
; AVX512F-NEXT: retq
398+
;
399+
; AVX512VL-LABEL: widen_fadd_v4f32_v8f32_const:
400+
; AVX512VL: # %bb.0:
401+
; AVX512VL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
402+
; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
403+
; AVX512VL-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
404+
; AVX512VL-NEXT: retq
383405
%x2 = fadd <4 x float> %x, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
384406
%y2 = fadd <4 x float> %y, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
385407
%r = shufflevector <4 x float> %x2, <4 x float> %y2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -398,25 +420,23 @@ define <16 x float> @widen_fadd_v4f32_v16f32_const(<4 x float> %x, <4 x float> %
398420
;
399421
; AVX1OR2-LABEL: widen_fadd_v4f32_v16f32_const:
400422
; AVX1OR2: # %bb.0:
401-
; AVX1OR2-NEXT: vbroadcastss {{.*#+}} xmm4 = [2.0E+0,2.0E+0,2.0E+0,2.0E+0]
402-
; AVX1OR2-NEXT: vaddps %xmm4, %xmm0, %xmm0
403-
; AVX1OR2-NEXT: vaddps %xmm4, %xmm1, %xmm1
404-
; AVX1OR2-NEXT: vaddps %xmm4, %xmm2, %xmm2
405-
; AVX1OR2-NEXT: vaddps %xmm4, %xmm3, %xmm3
423+
; AVX1OR2-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2
424+
; AVX1OR2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
406425
; AVX1OR2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
407-
; AVX1OR2-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm1
426+
; AVX1OR2-NEXT: vbroadcastss {{.*#+}} ymm1 = [2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0]
427+
; AVX1OR2-NEXT: vaddps %ymm1, %ymm0, %ymm0
428+
; AVX1OR2-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
429+
; AVX1OR2-NEXT: vaddps %ymm1, %ymm2, %ymm1
408430
; AVX1OR2-NEXT: retq
409431
;
410432
; AVX512-LABEL: widen_fadd_v4f32_v16f32_const:
411433
; AVX512: # %bb.0:
412-
; AVX512-NEXT: vbroadcastss {{.*#+}} xmm4 = [2.0E+0,2.0E+0,2.0E+0,2.0E+0]
413-
; AVX512-NEXT: vaddps %xmm4, %xmm0, %xmm0
414-
; AVX512-NEXT: vaddps %xmm4, %xmm1, %xmm1
415-
; AVX512-NEXT: vaddps %xmm4, %xmm2, %xmm2
416-
; AVX512-NEXT: vaddps %xmm4, %xmm3, %xmm3
434+
; AVX512-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2
435+
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
417436
; AVX512-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
418437
; AVX512-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
419438
; AVX512-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
439+
; AVX512-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
420440
; AVX512-NEXT: retq
421441
%x2 = fadd <4 x float> %x, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
422442
%y2 = fadd <4 x float> %y, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
@@ -427,6 +447,3 @@ define <16 x float> @widen_fadd_v4f32_v16f32_const(<4 x float> %x, <4 x float> %
427447
%r = shufflevector <8 x float> %r0, <8 x float> %r1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
428448
ret <16 x float> %r
429449
}
430-
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
431-
; AVX1: {{.*}}
432-
; AVX2: {{.*}}

llvm/test/CodeGen/X86/widen_fmul.ll

+38-21
Original file line numberDiff line numberDiff line change
@@ -373,13 +373,35 @@ define <8 x float> @widen_fmul_v4f32_v8f32_const(<4 x float> %x, <4 x float> %y)
373373
; SSE-NEXT: mulps %xmm2, %xmm1
374374
; SSE-NEXT: retq
375375
;
376-
; AVX-LABEL: widen_fmul_v4f32_v8f32_const:
377-
; AVX: # %bb.0:
378-
; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [3.0E+0,3.0E+0,3.0E+0,3.0E+0]
379-
; AVX-NEXT: vmulps %xmm2, %xmm0, %xmm0
380-
; AVX-NEXT: vmulps %xmm2, %xmm1, %xmm1
381-
; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
382-
; AVX-NEXT: retq
376+
; AVX1-LABEL: widen_fmul_v4f32_v8f32_const:
377+
; AVX1: # %bb.0:
378+
; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
379+
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
380+
; AVX1-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
381+
; AVX1-NEXT: retq
382+
;
383+
; AVX2-LABEL: widen_fmul_v4f32_v8f32_const:
384+
; AVX2: # %bb.0:
385+
; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
386+
; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
387+
; AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0]
388+
; AVX2-NEXT: vmulps %ymm1, %ymm0, %ymm0
389+
; AVX2-NEXT: retq
390+
;
391+
; AVX512F-LABEL: widen_fmul_v4f32_v8f32_const:
392+
; AVX512F: # %bb.0:
393+
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
394+
; AVX512F-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
395+
; AVX512F-NEXT: vbroadcastss {{.*#+}} ymm1 = [3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0]
396+
; AVX512F-NEXT: vmulps %ymm1, %ymm0, %ymm0
397+
; AVX512F-NEXT: retq
398+
;
399+
; AVX512VL-LABEL: widen_fmul_v4f32_v8f32_const:
400+
; AVX512VL: # %bb.0:
401+
; AVX512VL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
402+
; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
403+
; AVX512VL-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
404+
; AVX512VL-NEXT: retq
383405
%x2 = fmul <4 x float> %x, <float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00>
384406
%y2 = fmul <4 x float> %y, <float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00>
385407
%r = shufflevector <4 x float> %x2, <4 x float> %y2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -398,25 +420,23 @@ define <16 x float> @widen_fmul_v4f32_v16f32_const(<4 x float> %x, <4 x float> %
398420
;
399421
; AVX1OR2-LABEL: widen_fmul_v4f32_v16f32_const:
400422
; AVX1OR2: # %bb.0:
401-
; AVX1OR2-NEXT: vbroadcastss {{.*#+}} xmm4 = [3.0E+0,3.0E+0,3.0E+0,3.0E+0]
402-
; AVX1OR2-NEXT: vmulps %xmm4, %xmm0, %xmm0
403-
; AVX1OR2-NEXT: vmulps %xmm4, %xmm1, %xmm1
404-
; AVX1OR2-NEXT: vmulps %xmm4, %xmm2, %xmm2
405-
; AVX1OR2-NEXT: vmulps %xmm4, %xmm3, %xmm3
423+
; AVX1OR2-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2
424+
; AVX1OR2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
406425
; AVX1OR2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
407-
; AVX1OR2-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm1
426+
; AVX1OR2-NEXT: vbroadcastss {{.*#+}} ymm1 = [3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0]
427+
; AVX1OR2-NEXT: vmulps %ymm1, %ymm0, %ymm0
428+
; AVX1OR2-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
429+
; AVX1OR2-NEXT: vmulps %ymm1, %ymm2, %ymm1
408430
; AVX1OR2-NEXT: retq
409431
;
410432
; AVX512-LABEL: widen_fmul_v4f32_v16f32_const:
411433
; AVX512: # %bb.0:
412-
; AVX512-NEXT: vbroadcastss {{.*#+}} xmm4 = [3.0E+0,3.0E+0,3.0E+0,3.0E+0]
413-
; AVX512-NEXT: vmulps %xmm4, %xmm0, %xmm0
414-
; AVX512-NEXT: vmulps %xmm4, %xmm1, %xmm1
415-
; AVX512-NEXT: vmulps %xmm4, %xmm2, %xmm2
416-
; AVX512-NEXT: vmulps %xmm4, %xmm3, %xmm3
434+
; AVX512-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2
435+
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
417436
; AVX512-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
418437
; AVX512-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
419438
; AVX512-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
439+
; AVX512-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
420440
; AVX512-NEXT: retq
421441
%x2 = fmul <4 x float> %x, <float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00>
422442
%y2 = fmul <4 x float> %y, <float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00>
@@ -427,6 +447,3 @@ define <16 x float> @widen_fmul_v4f32_v16f32_const(<4 x float> %x, <4 x float> %
427447
%r = shufflevector <8 x float> %r0, <8 x float> %r1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
428448
ret <16 x float> %r
429449
}
430-
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
431-
; AVX1: {{.*}}
432-
; AVX2: {{.*}}

llvm/test/CodeGen/X86/widen_fsub.ll

+38-21
Original file line numberDiff line numberDiff line change
@@ -373,13 +373,35 @@ define <8 x float> @widen_fsub_v4f32_v8f32_const(<4 x float> %x, <4 x float> %y)
373373
; SSE-NEXT: subps %xmm2, %xmm1
374374
; SSE-NEXT: retq
375375
;
376-
; AVX-LABEL: widen_fsub_v4f32_v8f32_const:
377-
; AVX: # %bb.0:
378-
; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [2.0E+0,2.0E+0,2.0E+0,2.0E+0]
379-
; AVX-NEXT: vsubps %xmm2, %xmm0, %xmm0
380-
; AVX-NEXT: vsubps %xmm2, %xmm1, %xmm1
381-
; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
382-
; AVX-NEXT: retq
376+
; AVX1-LABEL: widen_fsub_v4f32_v8f32_const:
377+
; AVX1: # %bb.0:
378+
; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
379+
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
380+
; AVX1-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
381+
; AVX1-NEXT: retq
382+
;
383+
; AVX2-LABEL: widen_fsub_v4f32_v8f32_const:
384+
; AVX2: # %bb.0:
385+
; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
386+
; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
387+
; AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [-2.0E+0,-2.0E+0,-2.0E+0,-2.0E+0,-2.0E+0,-2.0E+0,-2.0E+0,-2.0E+0]
388+
; AVX2-NEXT: vaddps %ymm1, %ymm0, %ymm0
389+
; AVX2-NEXT: retq
390+
;
391+
; AVX512F-LABEL: widen_fsub_v4f32_v8f32_const:
392+
; AVX512F: # %bb.0:
393+
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
394+
; AVX512F-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
395+
; AVX512F-NEXT: vbroadcastss {{.*#+}} ymm1 = [-2.0E+0,-2.0E+0,-2.0E+0,-2.0E+0,-2.0E+0,-2.0E+0,-2.0E+0,-2.0E+0]
396+
; AVX512F-NEXT: vaddps %ymm1, %ymm0, %ymm0
397+
; AVX512F-NEXT: retq
398+
;
399+
; AVX512VL-LABEL: widen_fsub_v4f32_v8f32_const:
400+
; AVX512VL: # %bb.0:
401+
; AVX512VL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
402+
; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
403+
; AVX512VL-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
404+
; AVX512VL-NEXT: retq
383405
%x2 = fsub <4 x float> %x, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
384406
%y2 = fsub <4 x float> %y, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
385407
%r = shufflevector <4 x float> %x2, <4 x float> %y2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -398,25 +420,23 @@ define <16 x float> @widen_fsub_v4f32_v16f32_const(<4 x float> %x, <4 x float> %
398420
;
399421
; AVX1OR2-LABEL: widen_fsub_v4f32_v16f32_const:
400422
; AVX1OR2: # %bb.0:
401-
; AVX1OR2-NEXT: vbroadcastss {{.*#+}} xmm4 = [2.0E+0,2.0E+0,2.0E+0,2.0E+0]
402-
; AVX1OR2-NEXT: vsubps %xmm4, %xmm0, %xmm0
403-
; AVX1OR2-NEXT: vsubps %xmm4, %xmm1, %xmm1
404-
; AVX1OR2-NEXT: vsubps %xmm4, %xmm2, %xmm2
405-
; AVX1OR2-NEXT: vsubps %xmm4, %xmm3, %xmm3
423+
; AVX1OR2-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2
424+
; AVX1OR2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
406425
; AVX1OR2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
407-
; AVX1OR2-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm1
426+
; AVX1OR2-NEXT: vbroadcastss {{.*#+}} ymm1 = [-2.0E+0,-2.0E+0,-2.0E+0,-2.0E+0,-2.0E+0,-2.0E+0,-2.0E+0,-2.0E+0]
427+
; AVX1OR2-NEXT: vaddps %ymm1, %ymm0, %ymm0
428+
; AVX1OR2-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
429+
; AVX1OR2-NEXT: vaddps %ymm1, %ymm2, %ymm1
408430
; AVX1OR2-NEXT: retq
409431
;
410432
; AVX512-LABEL: widen_fsub_v4f32_v16f32_const:
411433
; AVX512: # %bb.0:
412-
; AVX512-NEXT: vbroadcastss {{.*#+}} xmm4 = [2.0E+0,2.0E+0,2.0E+0,2.0E+0]
413-
; AVX512-NEXT: vsubps %xmm4, %xmm0, %xmm0
414-
; AVX512-NEXT: vsubps %xmm4, %xmm1, %xmm1
415-
; AVX512-NEXT: vsubps %xmm4, %xmm2, %xmm2
416-
; AVX512-NEXT: vsubps %xmm4, %xmm3, %xmm3
434+
; AVX512-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2
435+
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
417436
; AVX512-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
418437
; AVX512-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
419438
; AVX512-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
439+
; AVX512-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
420440
; AVX512-NEXT: retq
421441
%x2 = fsub <4 x float> %x, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
422442
%y2 = fsub <4 x float> %y, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
@@ -427,6 +447,3 @@ define <16 x float> @widen_fsub_v4f32_v16f32_const(<4 x float> %x, <4 x float> %
427447
%r = shufflevector <8 x float> %r0, <8 x float> %r1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
428448
ret <16 x float> %r
429449
}
430-
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
431-
; AVX1: {{.*}}
432-
; AVX2: {{.*}}

0 commit comments

Comments
 (0)