|
1 | 1 | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
2 | 2 | ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=SSE
|
3 | 3 | ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE
|
4 |
| -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1OR2 |
5 |
| -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX1OR2 |
6 |
| -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX512F |
7 |
| -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefixes=AVX,AVX512VL |
| 4 | +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX1 |
| 5 | +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX2 |
| 6 | +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX512,AVX512F |
| 7 | +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefixes=AVX,AVX512,AVX512VL |
8 | 8 |
|
9 | 9 | define void @widen_fadd_v2f32_v4f32(ptr %a0, ptr %b0, ptr %c0) {
|
10 | 10 | ; SSE-LABEL: widen_fadd_v2f32_v4f32:
|
@@ -364,3 +364,69 @@ define void @widen_fadd_v2f32_v16f32(ptr %a0, ptr %b0, ptr %c0) {
|
364 | 364 | store <2 x float> %vc14, ptr %c14, align 4
|
365 | 365 | ret void
|
366 | 366 | }
|
| 367 | + |
| 368 | +define <8 x float> @widen_fadd_v4f32_v8f32_const(<4 x float> %x, <4 x float> %y) { |
| 369 | +; SSE-LABEL: widen_fadd_v4f32_v8f32_const: |
| 370 | +; SSE: # %bb.0: |
| 371 | +; SSE-NEXT: movaps {{.*#+}} xmm2 = [2.0E+0,2.0E+0,2.0E+0,2.0E+0] |
| 372 | +; SSE-NEXT: addps %xmm2, %xmm0 |
| 373 | +; SSE-NEXT: addps %xmm2, %xmm1 |
| 374 | +; SSE-NEXT: retq |
| 375 | +; |
| 376 | +; AVX-LABEL: widen_fadd_v4f32_v8f32_const: |
| 377 | +; AVX: # %bb.0: |
| 378 | +; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [2.0E+0,2.0E+0,2.0E+0,2.0E+0] |
| 379 | +; AVX-NEXT: vaddps %xmm2, %xmm0, %xmm0 |
| 380 | +; AVX-NEXT: vaddps %xmm2, %xmm1, %xmm1 |
| 381 | +; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 |
| 382 | +; AVX-NEXT: retq |
| 383 | + %x2 = fadd <4 x float> %x, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00> |
| 384 | + %y2 = fadd <4 x float> %y, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00> |
| 385 | + %r = shufflevector <4 x float> %x2, <4 x float> %y2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> |
| 386 | + ret <8 x float> %r |
| 387 | +} |
| 388 | + |
| 389 | +define <16 x float> @widen_fadd_v4f32_v16f32_const(<4 x float> %x, <4 x float> %y, <4 x float> %z, <4 x float> %w) { |
| 390 | +; SSE-LABEL: widen_fadd_v4f32_v16f32_const: |
| 391 | +; SSE: # %bb.0: |
| 392 | +; SSE-NEXT: movaps {{.*#+}} xmm4 = [2.0E+0,2.0E+0,2.0E+0,2.0E+0] |
| 393 | +; SSE-NEXT: addps %xmm4, %xmm0 |
| 394 | +; SSE-NEXT: addps %xmm4, %xmm1 |
| 395 | +; SSE-NEXT: addps %xmm4, %xmm2 |
| 396 | +; SSE-NEXT: addps %xmm4, %xmm3 |
| 397 | +; SSE-NEXT: retq |
| 398 | +; |
| 399 | +; AVX1OR2-LABEL: widen_fadd_v4f32_v16f32_const: |
| 400 | +; AVX1OR2: # %bb.0: |
| 401 | +; AVX1OR2-NEXT: vbroadcastss {{.*#+}} xmm4 = [2.0E+0,2.0E+0,2.0E+0,2.0E+0] |
| 402 | +; AVX1OR2-NEXT: vaddps %xmm4, %xmm0, %xmm0 |
| 403 | +; AVX1OR2-NEXT: vaddps %xmm4, %xmm1, %xmm1 |
| 404 | +; AVX1OR2-NEXT: vaddps %xmm4, %xmm2, %xmm2 |
| 405 | +; AVX1OR2-NEXT: vaddps %xmm4, %xmm3, %xmm3 |
| 406 | +; AVX1OR2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 |
| 407 | +; AVX1OR2-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm1 |
| 408 | +; AVX1OR2-NEXT: retq |
| 409 | +; |
| 410 | +; AVX512-LABEL: widen_fadd_v4f32_v16f32_const: |
| 411 | +; AVX512: # %bb.0: |
| 412 | +; AVX512-NEXT: vbroadcastss {{.*#+}} xmm4 = [2.0E+0,2.0E+0,2.0E+0,2.0E+0] |
| 413 | +; AVX512-NEXT: vaddps %xmm4, %xmm0, %xmm0 |
| 414 | +; AVX512-NEXT: vaddps %xmm4, %xmm1, %xmm1 |
| 415 | +; AVX512-NEXT: vaddps %xmm4, %xmm2, %xmm2 |
| 416 | +; AVX512-NEXT: vaddps %xmm4, %xmm3, %xmm3 |
| 417 | +; AVX512-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 |
| 418 | +; AVX512-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 |
| 419 | +; AVX512-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0 |
| 420 | +; AVX512-NEXT: retq |
| 421 | + %x2 = fadd <4 x float> %x, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00> |
| 422 | + %y2 = fadd <4 x float> %y, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00> |
| 423 | + %z2 = fadd <4 x float> %z, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00> |
| 424 | + %w2 = fadd <4 x float> %w, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00> |
| 425 | + %r0 = shufflevector <4 x float> %x2, <4 x float> %y2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> |
| 426 | + %r1 = shufflevector <4 x float> %z2, <4 x float> %w2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> |
| 427 | + %r = shufflevector <8 x float> %r0, <8 x float> %r1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> |
| 428 | + ret <16 x float> %r |
| 429 | +} |
| 430 | +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: |
| 431 | +; AVX1: {{.*}} |
| 432 | +; AVX2: {{.*}} |
0 commit comments