Skip to content

Commit 14bcd8b

Browse files
committed
[X86] Add tests for enabling slow-mulld on AVX2 targets
As discussed on D110588 - Haswell/Broadwell don't have a great PMULLD implementation, we might want to enable this for them in the future
1 parent d023298 commit 14bcd8b

File tree

1 file changed

+133
-0
lines changed

1 file changed

+133
-0
lines changed

llvm/test/CodeGen/X86/slow-pmulld.ll

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2,+slow-pmulld | FileCheck %s --check-prefixes=CHECK64,SLOW64
66
; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE4-32
77
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE4-64
8+
; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=+avx2,+slow-pmulld | FileCheck %s --check-prefixes=AVX2-SLOW,AVX2-SLOW32
9+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+slow-pmulld | FileCheck %s --check-prefixes=AVX2-SLOW,AVX2-SLOW64
810
; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX-32,AVX2-32
911
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX-64,AVX2-64
1012
; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefixes=AVX-32,AVX512-32,AVX512DQ-32
@@ -42,6 +44,18 @@ define <4 x i32> @test_mul_v4i32_v4i8(<4 x i8> %A) {
4244
; SSE4-64-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
4345
; SSE4-64-NEXT: retq
4446
;
47+
; AVX2-SLOW32-LABEL: test_mul_v4i32_v4i8:
48+
; AVX2-SLOW32: # %bb.0:
49+
; AVX2-SLOW32-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
50+
; AVX2-SLOW32-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
51+
; AVX2-SLOW32-NEXT: retl
52+
;
53+
; AVX2-SLOW64-LABEL: test_mul_v4i32_v4i8:
54+
; AVX2-SLOW64: # %bb.0:
55+
; AVX2-SLOW64-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
56+
; AVX2-SLOW64-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
57+
; AVX2-SLOW64-NEXT: retq
58+
;
4559
; AVX2-32-LABEL: test_mul_v4i32_v4i8:
4660
; AVX2-32: # %bb.0:
4761
; AVX2-32-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
@@ -165,6 +179,18 @@ define <8 x i32> @test_mul_v8i32_v8i8(<8 x i8> %A) {
165179
; SSE4-64-NEXT: pmaddwd %xmm2, %xmm1
166180
; SSE4-64-NEXT: retq
167181
;
182+
; AVX2-SLOW32-LABEL: test_mul_v8i32_v8i8:
183+
; AVX2-SLOW32: # %bb.0:
184+
; AVX2-SLOW32-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
185+
; AVX2-SLOW32-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
186+
; AVX2-SLOW32-NEXT: retl
187+
;
188+
; AVX2-SLOW64-LABEL: test_mul_v8i32_v8i8:
189+
; AVX2-SLOW64: # %bb.0:
190+
; AVX2-SLOW64-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
191+
; AVX2-SLOW64-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
192+
; AVX2-SLOW64-NEXT: retq
193+
;
168194
; AVX2-32-LABEL: test_mul_v8i32_v8i8:
169195
; AVX2-32: # %bb.0:
170196
; AVX2-32-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
@@ -336,6 +362,22 @@ define <16 x i32> @test_mul_v16i32_v16i8(<16 x i8> %A) {
336362
; SSE4-64-NEXT: pmaddwd %xmm4, %xmm3
337363
; SSE4-64-NEXT: retq
338364
;
365+
; AVX2-SLOW-LABEL: test_mul_v16i32_v16i8:
366+
; AVX2-SLOW: # %bb.0:
367+
; AVX2-SLOW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
368+
; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} ymm1 = [18778,18778,18778,18778,18778,18778,18778,18778,18778,18778,18778,18778,18778,18778,18778,18778]
369+
; AVX2-SLOW-NEXT: vpmulhw %ymm1, %ymm0, %ymm2
370+
; AVX2-SLOW-NEXT: vpmullw %ymm1, %ymm0, %ymm1
371+
; AVX2-SLOW-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
372+
; AVX2-SLOW-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
373+
; AVX2-SLOW-NEXT: vinserti128 $1, %xmm0, %ymm3, %ymm0
374+
; AVX2-SLOW-NEXT: vextracti128 $1, %ymm2, %xmm2
375+
; AVX2-SLOW-NEXT: vextracti128 $1, %ymm1, %xmm1
376+
; AVX2-SLOW-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
377+
; AVX2-SLOW-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
378+
; AVX2-SLOW-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1
379+
; AVX2-SLOW-NEXT: ret{{[l|q]}}
380+
;
339381
; AVX2-32-LABEL: test_mul_v16i32_v16i8:
340382
; AVX2-32: # %bb.0:
341383
; AVX2-32-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
@@ -427,6 +469,14 @@ define <4 x i32> @test_mul_v4i32_v4i16(<4 x i16> %A) {
427469
; SSE4-64-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
428470
; SSE4-64-NEXT: retq
429471
;
472+
; AVX2-SLOW-LABEL: test_mul_v4i32_v4i16:
473+
; AVX2-SLOW: # %bb.0:
474+
; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} xmm1 = <18778,18778,18778,18778,u,u,u,u>
475+
; AVX2-SLOW-NEXT: vpmulhuw %xmm1, %xmm0, %xmm2
476+
; AVX2-SLOW-NEXT: vpmullw %xmm1, %xmm0, %xmm0
477+
; AVX2-SLOW-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
478+
; AVX2-SLOW-NEXT: ret{{[l|q]}}
479+
;
430480
; AVX-32-LABEL: test_mul_v4i32_v4i16:
431481
; AVX-32: # %bb.0:
432482
; AVX-32-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
@@ -516,6 +566,16 @@ define <8 x i32> @test_mul_v8i32_v8i16(<8 x i16> %A) {
516566
; SSE4-64-NEXT: movdqa %xmm2, %xmm0
517567
; SSE4-64-NEXT: retq
518568
;
569+
; AVX2-SLOW-LABEL: test_mul_v8i32_v8i16:
570+
; AVX2-SLOW: # %bb.0:
571+
; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} xmm1 = [18778,18778,18778,18778,18778,18778,18778,18778]
572+
; AVX2-SLOW-NEXT: vpmulhuw %xmm1, %xmm0, %xmm2
573+
; AVX2-SLOW-NEXT: vpmullw %xmm1, %xmm0, %xmm0
574+
; AVX2-SLOW-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
575+
; AVX2-SLOW-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
576+
; AVX2-SLOW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
577+
; AVX2-SLOW-NEXT: ret{{[l|q]}}
578+
;
519579
; AVX-32-LABEL: test_mul_v8i32_v8i16:
520580
; AVX-32: # %bb.0:
521581
; AVX-32-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
@@ -643,6 +703,21 @@ define <16 x i32> @test_mul_v16i32_v16i16(<16 x i16> %A) {
643703
; SSE4-64-NEXT: movdqa %xmm4, %xmm1
644704
; SSE4-64-NEXT: retq
645705
;
706+
; AVX2-SLOW-LABEL: test_mul_v16i32_v16i16:
707+
; AVX2-SLOW: # %bb.0:
708+
; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} ymm1 = [18778,18778,18778,18778,18778,18778,18778,18778,18778,18778,18778,18778,18778,18778,18778,18778]
709+
; AVX2-SLOW-NEXT: vpmulhuw %ymm1, %ymm0, %ymm2
710+
; AVX2-SLOW-NEXT: vpmullw %ymm1, %ymm0, %ymm1
711+
; AVX2-SLOW-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
712+
; AVX2-SLOW-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
713+
; AVX2-SLOW-NEXT: vinserti128 $1, %xmm0, %ymm3, %ymm0
714+
; AVX2-SLOW-NEXT: vextracti128 $1, %ymm2, %xmm2
715+
; AVX2-SLOW-NEXT: vextracti128 $1, %ymm1, %xmm1
716+
; AVX2-SLOW-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
717+
; AVX2-SLOW-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
718+
; AVX2-SLOW-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1
719+
; AVX2-SLOW-NEXT: ret{{[l|q]}}
720+
;
646721
; AVX2-32-LABEL: test_mul_v16i32_v16i16:
647722
; AVX2-32: # %bb.0:
648723
; AVX2-32-NEXT: vextracti128 $1, %ymm0, %xmm1
@@ -708,6 +783,18 @@ define <4 x i32> @test_mul_v4i32_v4i8_minsize(<4 x i8> %A) minsize {
708783
; SSE4-64-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
709784
; SSE4-64-NEXT: retq
710785
;
786+
; AVX2-SLOW32-LABEL: test_mul_v4i32_v4i8_minsize:
787+
; AVX2-SLOW32: # %bb.0:
788+
; AVX2-SLOW32-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
789+
; AVX2-SLOW32-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
790+
; AVX2-SLOW32-NEXT: retl
791+
;
792+
; AVX2-SLOW64-LABEL: test_mul_v4i32_v4i8_minsize:
793+
; AVX2-SLOW64: # %bb.0:
794+
; AVX2-SLOW64-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
795+
; AVX2-SLOW64-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
796+
; AVX2-SLOW64-NEXT: retq
797+
;
711798
; AVX2-32-LABEL: test_mul_v4i32_v4i8_minsize:
712799
; AVX2-32: # %bb.0:
713800
; AVX2-32-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
@@ -823,6 +910,18 @@ define <8 x i32> @test_mul_v8i32_v8i8_minsize(<8 x i8> %A) minsize {
823910
; SSE4-64-NEXT: pmaddwd %xmm2, %xmm1
824911
; SSE4-64-NEXT: retq
825912
;
913+
; AVX2-SLOW32-LABEL: test_mul_v8i32_v8i8_minsize:
914+
; AVX2-SLOW32: # %bb.0:
915+
; AVX2-SLOW32-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
916+
; AVX2-SLOW32-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
917+
; AVX2-SLOW32-NEXT: retl
918+
;
919+
; AVX2-SLOW64-LABEL: test_mul_v8i32_v8i8_minsize:
920+
; AVX2-SLOW64: # %bb.0:
921+
; AVX2-SLOW64-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
922+
; AVX2-SLOW64-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
923+
; AVX2-SLOW64-NEXT: retq
924+
;
826925
; AVX2-32-LABEL: test_mul_v8i32_v8i8_minsize:
827926
; AVX2-32: # %bb.0:
828927
; AVX2-32-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
@@ -974,6 +1073,16 @@ define <16 x i32> @test_mul_v16i32_v16i8_minsize(<16 x i8> %A) minsize {
9741073
; SSE4-64-NEXT: pmaddwd %xmm4, %xmm3
9751074
; SSE4-64-NEXT: retq
9761075
;
1076+
; AVX2-SLOW-LABEL: test_mul_v16i32_v16i8_minsize:
1077+
; AVX2-SLOW: # %bb.0:
1078+
; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1079+
; AVX2-SLOW-NEXT: vpmovzxbd {{.*#+}} ymm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero
1080+
; AVX2-SLOW-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
1081+
; AVX2-SLOW-NEXT: vpbroadcastd {{.*#+}} ymm2 = [18778,18778,18778,18778,18778,18778,18778,18778]
1082+
; AVX2-SLOW-NEXT: vpmaddwd %ymm2, %ymm0, %ymm0
1083+
; AVX2-SLOW-NEXT: vpmaddwd %ymm2, %ymm1, %ymm1
1084+
; AVX2-SLOW-NEXT: ret{{[l|q]}}
1085+
;
9771086
; AVX2-32-LABEL: test_mul_v16i32_v16i8_minsize:
9781087
; AVX2-32: # %bb.0:
9791088
; AVX2-32-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
@@ -1059,6 +1168,13 @@ define <4 x i32> @test_mul_v4i32_v4i16_minsize(<4 x i16> %A) minsize {
10591168
; SSE4-64-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
10601169
; SSE4-64-NEXT: retq
10611170
;
1171+
; AVX2-SLOW-LABEL: test_mul_v4i32_v4i16_minsize:
1172+
; AVX2-SLOW: # %bb.0:
1173+
; AVX2-SLOW-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1174+
; AVX2-SLOW-NEXT: vpbroadcastd {{.*#+}} xmm1 = [18778,18778,18778,18778]
1175+
; AVX2-SLOW-NEXT: vpmulld %xmm1, %xmm0, %xmm0
1176+
; AVX2-SLOW-NEXT: ret{{[l|q]}}
1177+
;
10621178
; AVX-32-LABEL: test_mul_v4i32_v4i16_minsize:
10631179
; AVX-32: # %bb.0:
10641180
; AVX-32-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
@@ -1122,6 +1238,13 @@ define <8 x i32> @test_mul_v8i32_v8i16_minsize(<8 x i16> %A) minsize {
11221238
; SSE4-64-NEXT: movdqa %xmm2, %xmm0
11231239
; SSE4-64-NEXT: retq
11241240
;
1241+
; AVX2-SLOW-LABEL: test_mul_v8i32_v8i16_minsize:
1242+
; AVX2-SLOW: # %bb.0:
1243+
; AVX2-SLOW-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1244+
; AVX2-SLOW-NEXT: vpbroadcastd {{.*#+}} ymm1 = [18778,18778,18778,18778,18778,18778,18778,18778]
1245+
; AVX2-SLOW-NEXT: vpmulld %ymm1, %ymm0, %ymm0
1246+
; AVX2-SLOW-NEXT: ret{{[l|q]}}
1247+
;
11251248
; AVX-32-LABEL: test_mul_v8i32_v8i16_minsize:
11261249
; AVX-32: # %bb.0:
11271250
; AVX-32-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
@@ -1237,6 +1360,16 @@ define <16 x i32> @test_mul_v16i32_v16i16_minsize(<16 x i16> %A) minsize {
12371360
; SSE4-64-NEXT: movdqa %xmm4, %xmm1
12381361
; SSE4-64-NEXT: retq
12391362
;
1363+
; AVX2-SLOW-LABEL: test_mul_v16i32_v16i16_minsize:
1364+
; AVX2-SLOW: # %bb.0:
1365+
; AVX2-SLOW-NEXT: vextracti128 $1, %ymm0, %xmm1
1366+
; AVX2-SLOW-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
1367+
; AVX2-SLOW-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1368+
; AVX2-SLOW-NEXT: vpbroadcastd {{.*#+}} ymm2 = [18778,18778,18778,18778,18778,18778,18778,18778]
1369+
; AVX2-SLOW-NEXT: vpmulld %ymm2, %ymm0, %ymm0
1370+
; AVX2-SLOW-NEXT: vpmulld %ymm2, %ymm1, %ymm1
1371+
; AVX2-SLOW-NEXT: ret{{[l|q]}}
1372+
;
12401373
; AVX2-32-LABEL: test_mul_v16i32_v16i16_minsize:
12411374
; AVX2-32: # %bb.0:
12421375
; AVX2-32-NEXT: vextracti128 $1, %ymm0, %xmm1

0 commit comments

Comments
 (0)