@@ -373,13 +373,35 @@ define <8 x float> @widen_fsub_v4f32_v8f32_const(<4 x float> %x, <4 x float> %y)
373
373
; SSE-NEXT: subps %xmm2, %xmm1
374
374
; SSE-NEXT: retq
375
375
;
376
- ; AVX-LABEL: widen_fsub_v4f32_v8f32_const:
377
- ; AVX: # %bb.0:
378
- ; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [2.0E+0,2.0E+0,2.0E+0,2.0E+0]
379
- ; AVX-NEXT: vsubps %xmm2, %xmm0, %xmm0
380
- ; AVX-NEXT: vsubps %xmm2, %xmm1, %xmm1
381
- ; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
382
- ; AVX-NEXT: retq
376
+ ; AVX1-LABEL: widen_fsub_v4f32_v8f32_const:
377
+ ; AVX1: # %bb.0:
378
+ ; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
379
+ ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
380
+ ; AVX1-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
381
+ ; AVX1-NEXT: retq
382
+ ;
383
+ ; AVX2-LABEL: widen_fsub_v4f32_v8f32_const:
384
+ ; AVX2: # %bb.0:
385
+ ; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
386
+ ; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
387
+ ; AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [-2.0E+0,-2.0E+0,-2.0E+0,-2.0E+0,-2.0E+0,-2.0E+0,-2.0E+0,-2.0E+0]
388
+ ; AVX2-NEXT: vaddps %ymm1, %ymm0, %ymm0
389
+ ; AVX2-NEXT: retq
390
+ ;
391
+ ; AVX512F-LABEL: widen_fsub_v4f32_v8f32_const:
392
+ ; AVX512F: # %bb.0:
393
+ ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
394
+ ; AVX512F-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
395
+ ; AVX512F-NEXT: vbroadcastss {{.*#+}} ymm1 = [-2.0E+0,-2.0E+0,-2.0E+0,-2.0E+0,-2.0E+0,-2.0E+0,-2.0E+0,-2.0E+0]
396
+ ; AVX512F-NEXT: vaddps %ymm1, %ymm0, %ymm0
397
+ ; AVX512F-NEXT: retq
398
+ ;
399
+ ; AVX512VL-LABEL: widen_fsub_v4f32_v8f32_const:
400
+ ; AVX512VL: # %bb.0:
401
+ ; AVX512VL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
402
+ ; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
403
+ ; AVX512VL-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
404
+ ; AVX512VL-NEXT: retq
383
405
%x2 = fsub <4 x float > %x , <float 2 .000000e+00 , float 2 .000000e+00 , float 2 .000000e+00 , float 2 .000000e+00 >
384
406
%y2 = fsub <4 x float > %y , <float 2 .000000e+00 , float 2 .000000e+00 , float 2 .000000e+00 , float 2 .000000e+00 >
385
407
%r = shufflevector <4 x float > %x2 , <4 x float > %y2 , <8 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 , i32 4 , i32 5 , i32 6 , i32 7 >
@@ -398,25 +420,23 @@ define <16 x float> @widen_fsub_v4f32_v16f32_const(<4 x float> %x, <4 x float> %
398
420
;
399
421
; AVX1OR2-LABEL: widen_fsub_v4f32_v16f32_const:
400
422
; AVX1OR2: # %bb.0:
401
- ; AVX1OR2-NEXT: vbroadcastss {{.*#+}} xmm4 = [2.0E+0,2.0E+0,2.0E+0,2.0E+0]
402
- ; AVX1OR2-NEXT: vsubps %xmm4, %xmm0, %xmm0
403
- ; AVX1OR2-NEXT: vsubps %xmm4, %xmm1, %xmm1
404
- ; AVX1OR2-NEXT: vsubps %xmm4, %xmm2, %xmm2
405
- ; AVX1OR2-NEXT: vsubps %xmm4, %xmm3, %xmm3
423
+ ; AVX1OR2-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2
424
+ ; AVX1OR2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
406
425
; AVX1OR2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
407
- ; AVX1OR2-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm1
426
+ ; AVX1OR2-NEXT: vbroadcastss {{.*#+}} ymm1 = [-2.0E+0,-2.0E+0,-2.0E+0,-2.0E+0,-2.0E+0,-2.0E+0,-2.0E+0,-2.0E+0]
427
+ ; AVX1OR2-NEXT: vaddps %ymm1, %ymm0, %ymm0
428
+ ; AVX1OR2-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
429
+ ; AVX1OR2-NEXT: vaddps %ymm1, %ymm2, %ymm1
408
430
; AVX1OR2-NEXT: retq
409
431
;
410
432
; AVX512-LABEL: widen_fsub_v4f32_v16f32_const:
411
433
; AVX512: # %bb.0:
412
- ; AVX512-NEXT: vbroadcastss {{.*#+}} xmm4 = [2.0E+0,2.0E+0,2.0E+0,2.0E+0]
413
- ; AVX512-NEXT: vsubps %xmm4, %xmm0, %xmm0
414
- ; AVX512-NEXT: vsubps %xmm4, %xmm1, %xmm1
415
- ; AVX512-NEXT: vsubps %xmm4, %xmm2, %xmm2
416
- ; AVX512-NEXT: vsubps %xmm4, %xmm3, %xmm3
434
+ ; AVX512-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2
435
+ ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
417
436
; AVX512-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
418
437
; AVX512-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
419
438
; AVX512-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
439
+ ; AVX512-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
420
440
; AVX512-NEXT: retq
421
441
%x2 = fsub <4 x float > %x , <float 2 .000000e+00 , float 2 .000000e+00 , float 2 .000000e+00 , float 2 .000000e+00 >
422
442
%y2 = fsub <4 x float > %y , <float 2 .000000e+00 , float 2 .000000e+00 , float 2 .000000e+00 , float 2 .000000e+00 >
@@ -427,6 +447,3 @@ define <16 x float> @widen_fsub_v4f32_v16f32_const(<4 x float> %x, <4 x float> %
427
447
%r = shufflevector <8 x float > %r0 , <8 x float > %r1 , <16 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 , i32 4 , i32 5 , i32 6 , i32 7 , i32 8 , i32 9 , i32 10 , i32 11 , i32 12 , i32 13 , i32 14 , i32 15 >
428
448
ret <16 x float > %r
429
449
}
430
- ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
431
- ; AVX1: {{.*}}
432
- ; AVX2: {{.*}}
0 commit comments