|
3 | 3 | ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE,SSE2OR3,SSSE3
|
4 | 4 | ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
|
5 | 5 | ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
|
6 |
| -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-SLOW |
7 |
| -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-FAST-ALL |
8 |
| -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-FAST-PERLANE |
| 6 | +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 |
| 7 | +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2 |
| 8 | +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2 |
9 | 9 | ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX512
|
10 | 10 | ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX512
|
11 | 11 |
|
@@ -2846,60 +2846,22 @@ define <8 x i32> @test33(<8 x i32> %a0, <8 x i64> %a1) {
|
2846 | 2846 | ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
2847 | 2847 | ; AVX1-NEXT: retq
|
2848 | 2848 | ;
|
2849 |
| -; AVX2-SLOW-LABEL: test33: |
2850 |
| -; AVX2-SLOW: # %bb.0: |
2851 |
| -; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} ymm3 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] |
2852 |
| -; AVX2-SLOW-NEXT: vpxor %ymm3, %ymm2, %ymm4 |
2853 |
| -; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} ymm5 = [9223372041149743103,9223372041149743103,9223372041149743103,9223372041149743103] |
2854 |
| -; AVX2-SLOW-NEXT: vpcmpgtq %ymm4, %ymm5, %ymm4 |
2855 |
| -; AVX2-SLOW-NEXT: vbroadcastsd {{.*#+}} ymm6 = [4294967295,4294967295,4294967295,4294967295] |
2856 |
| -; AVX2-SLOW-NEXT: vblendvpd %ymm4, %ymm2, %ymm6, %ymm2 |
2857 |
| -; AVX2-SLOW-NEXT: vpxor %ymm3, %ymm1, %ymm3 |
2858 |
| -; AVX2-SLOW-NEXT: vpcmpgtq %ymm3, %ymm5, %ymm3 |
2859 |
| -; AVX2-SLOW-NEXT: vblendvpd %ymm3, %ymm1, %ymm6, %ymm1 |
2860 |
| -; AVX2-SLOW-NEXT: vperm2f128 {{.*#+}} ymm3 = ymm1[2,3],ymm2[2,3] |
2861 |
| -; AVX2-SLOW-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 |
2862 |
| -; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,2],ymm3[0,2],ymm1[4,6],ymm3[4,6] |
2863 |
| -; AVX2-SLOW-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 |
2864 |
| -; AVX2-SLOW-NEXT: vpsubd %ymm1, %ymm0, %ymm0 |
2865 |
| -; AVX2-SLOW-NEXT: retq |
2866 |
| -; |
2867 |
| -; AVX2-FAST-ALL-LABEL: test33: |
2868 |
| -; AVX2-FAST-ALL: # %bb.0: |
2869 |
| -; AVX2-FAST-ALL-NEXT: vpbroadcastq {{.*#+}} ymm3 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] |
2870 |
| -; AVX2-FAST-ALL-NEXT: vpxor %ymm3, %ymm1, %ymm4 |
2871 |
| -; AVX2-FAST-ALL-NEXT: vpbroadcastq {{.*#+}} ymm5 = [9223372041149743103,9223372041149743103,9223372041149743103,9223372041149743103] |
2872 |
| -; AVX2-FAST-ALL-NEXT: vpcmpgtq %ymm4, %ymm5, %ymm4 |
2873 |
| -; AVX2-FAST-ALL-NEXT: vbroadcastsd {{.*#+}} ymm6 = [4294967295,4294967295,4294967295,4294967295] |
2874 |
| -; AVX2-FAST-ALL-NEXT: vblendvpd %ymm4, %ymm1, %ymm6, %ymm1 |
2875 |
| -; AVX2-FAST-ALL-NEXT: vmovapd {{.*#+}} ymm4 = [0,2,4,6,4,6,6,7] |
2876 |
| -; AVX2-FAST-ALL-NEXT: vpermps %ymm1, %ymm4, %ymm1 |
2877 |
| -; AVX2-FAST-ALL-NEXT: vpxor %ymm3, %ymm2, %ymm3 |
2878 |
| -; AVX2-FAST-ALL-NEXT: vpcmpgtq %ymm3, %ymm5, %ymm3 |
2879 |
| -; AVX2-FAST-ALL-NEXT: vblendvpd %ymm3, %ymm2, %ymm6, %ymm2 |
2880 |
| -; AVX2-FAST-ALL-NEXT: vpermps %ymm2, %ymm4, %ymm2 |
2881 |
| -; AVX2-FAST-ALL-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 |
2882 |
| -; AVX2-FAST-ALL-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 |
2883 |
| -; AVX2-FAST-ALL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 |
2884 |
| -; AVX2-FAST-ALL-NEXT: retq |
2885 |
| -; |
2886 |
| -; AVX2-FAST-PERLANE-LABEL: test33: |
2887 |
| -; AVX2-FAST-PERLANE: # %bb.0: |
2888 |
| -; AVX2-FAST-PERLANE-NEXT: vpbroadcastq {{.*#+}} ymm3 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] |
2889 |
| -; AVX2-FAST-PERLANE-NEXT: vpxor %ymm3, %ymm2, %ymm4 |
2890 |
| -; AVX2-FAST-PERLANE-NEXT: vpbroadcastq {{.*#+}} ymm5 = [9223372041149743103,9223372041149743103,9223372041149743103,9223372041149743103] |
2891 |
| -; AVX2-FAST-PERLANE-NEXT: vpcmpgtq %ymm4, %ymm5, %ymm4 |
2892 |
| -; AVX2-FAST-PERLANE-NEXT: vbroadcastsd {{.*#+}} ymm6 = [4294967295,4294967295,4294967295,4294967295] |
2893 |
| -; AVX2-FAST-PERLANE-NEXT: vblendvpd %ymm4, %ymm2, %ymm6, %ymm2 |
2894 |
| -; AVX2-FAST-PERLANE-NEXT: vpxor %ymm3, %ymm1, %ymm3 |
2895 |
| -; AVX2-FAST-PERLANE-NEXT: vpcmpgtq %ymm3, %ymm5, %ymm3 |
2896 |
| -; AVX2-FAST-PERLANE-NEXT: vblendvpd %ymm3, %ymm1, %ymm6, %ymm1 |
2897 |
| -; AVX2-FAST-PERLANE-NEXT: vperm2f128 {{.*#+}} ymm3 = ymm1[2,3],ymm2[2,3] |
2898 |
| -; AVX2-FAST-PERLANE-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 |
2899 |
| -; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,2],ymm3[0,2],ymm1[4,6],ymm3[4,6] |
2900 |
| -; AVX2-FAST-PERLANE-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 |
2901 |
| -; AVX2-FAST-PERLANE-NEXT: vpsubd %ymm1, %ymm0, %ymm0 |
2902 |
| -; AVX2-FAST-PERLANE-NEXT: retq |
| 2849 | +; AVX2-LABEL: test33: |
| 2850 | +; AVX2: # %bb.0: |
| 2851 | +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] |
| 2852 | +; AVX2-NEXT: vpxor %ymm3, %ymm2, %ymm4 |
| 2853 | +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm5 = [9223372041149743103,9223372041149743103,9223372041149743103,9223372041149743103] |
| 2854 | +; AVX2-NEXT: vpcmpgtq %ymm4, %ymm5, %ymm4 |
| 2855 | +; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm6 = [4294967295,4294967295,4294967295,4294967295] |
| 2856 | +; AVX2-NEXT: vblendvpd %ymm4, %ymm2, %ymm6, %ymm2 |
| 2857 | +; AVX2-NEXT: vpxor %ymm3, %ymm1, %ymm3 |
| 2858 | +; AVX2-NEXT: vpcmpgtq %ymm3, %ymm5, %ymm3 |
| 2859 | +; AVX2-NEXT: vblendvpd %ymm3, %ymm1, %ymm6, %ymm1 |
| 2860 | +; AVX2-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,2],ymm2[0,2],ymm1[4,6],ymm2[4,6] |
| 2861 | +; AVX2-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,2,1,3] |
| 2862 | +; AVX2-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 |
| 2863 | +; AVX2-NEXT: vpsubd %ymm1, %ymm0, %ymm0 |
| 2864 | +; AVX2-NEXT: retq |
2903 | 2865 | ;
|
2904 | 2866 | ; AVX512-LABEL: test33:
|
2905 | 2867 | ; AVX512: # %bb.0:
|
@@ -3070,66 +3032,24 @@ define <8 x i32> @test34(<8 x i32> %a0, <8 x i64> %a1) {
|
3070 | 3032 | ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
3071 | 3033 | ; AVX1-NEXT: retq
|
3072 | 3034 | ;
|
3073 |
| -; AVX2-SLOW-LABEL: test34: |
3074 |
| -; AVX2-SLOW: # %bb.0: |
3075 |
| -; AVX2-SLOW-NEXT: vpbroadcastd {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] |
3076 |
| -; AVX2-SLOW-NEXT: vpand %ymm3, %ymm0, %ymm0 |
3077 |
| -; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} ymm3 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] |
3078 |
| -; AVX2-SLOW-NEXT: vpxor %ymm3, %ymm2, %ymm4 |
3079 |
| -; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} ymm5 = [9223372041149743103,9223372041149743103,9223372041149743103,9223372041149743103] |
3080 |
| -; AVX2-SLOW-NEXT: vpcmpgtq %ymm4, %ymm5, %ymm4 |
3081 |
| -; AVX2-SLOW-NEXT: vbroadcastsd {{.*#+}} ymm6 = [4294967295,4294967295,4294967295,4294967295] |
3082 |
| -; AVX2-SLOW-NEXT: vblendvpd %ymm4, %ymm2, %ymm6, %ymm2 |
3083 |
| -; AVX2-SLOW-NEXT: vpxor %ymm3, %ymm1, %ymm3 |
3084 |
| -; AVX2-SLOW-NEXT: vpcmpgtq %ymm3, %ymm5, %ymm3 |
3085 |
| -; AVX2-SLOW-NEXT: vblendvpd %ymm3, %ymm1, %ymm6, %ymm1 |
3086 |
| -; AVX2-SLOW-NEXT: vperm2f128 {{.*#+}} ymm3 = ymm1[2,3],ymm2[2,3] |
3087 |
| -; AVX2-SLOW-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 |
3088 |
| -; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,2],ymm3[0,2],ymm1[4,6],ymm3[4,6] |
3089 |
| -; AVX2-SLOW-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 |
3090 |
| -; AVX2-SLOW-NEXT: vpsubd %ymm1, %ymm0, %ymm0 |
3091 |
| -; AVX2-SLOW-NEXT: retq |
3092 |
| -; |
3093 |
| -; AVX2-FAST-ALL-LABEL: test34: |
3094 |
| -; AVX2-FAST-ALL: # %bb.0: |
3095 |
| -; AVX2-FAST-ALL-NEXT: vpbroadcastd {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] |
3096 |
| -; AVX2-FAST-ALL-NEXT: vpand %ymm3, %ymm0, %ymm0 |
3097 |
| -; AVX2-FAST-ALL-NEXT: vpbroadcastq {{.*#+}} ymm3 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] |
3098 |
| -; AVX2-FAST-ALL-NEXT: vpxor %ymm3, %ymm1, %ymm4 |
3099 |
| -; AVX2-FAST-ALL-NEXT: vpbroadcastq {{.*#+}} ymm5 = [9223372041149743103,9223372041149743103,9223372041149743103,9223372041149743103] |
3100 |
| -; AVX2-FAST-ALL-NEXT: vpcmpgtq %ymm4, %ymm5, %ymm4 |
3101 |
| -; AVX2-FAST-ALL-NEXT: vbroadcastsd {{.*#+}} ymm6 = [4294967295,4294967295,4294967295,4294967295] |
3102 |
| -; AVX2-FAST-ALL-NEXT: vblendvpd %ymm4, %ymm1, %ymm6, %ymm1 |
3103 |
| -; AVX2-FAST-ALL-NEXT: vmovapd {{.*#+}} ymm4 = [0,2,4,6,4,6,6,7] |
3104 |
| -; AVX2-FAST-ALL-NEXT: vpermps %ymm1, %ymm4, %ymm1 |
3105 |
| -; AVX2-FAST-ALL-NEXT: vpxor %ymm3, %ymm2, %ymm3 |
3106 |
| -; AVX2-FAST-ALL-NEXT: vpcmpgtq %ymm3, %ymm5, %ymm3 |
3107 |
| -; AVX2-FAST-ALL-NEXT: vblendvpd %ymm3, %ymm2, %ymm6, %ymm2 |
3108 |
| -; AVX2-FAST-ALL-NEXT: vpermps %ymm2, %ymm4, %ymm2 |
3109 |
| -; AVX2-FAST-ALL-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 |
3110 |
| -; AVX2-FAST-ALL-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 |
3111 |
| -; AVX2-FAST-ALL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 |
3112 |
| -; AVX2-FAST-ALL-NEXT: retq |
3113 |
| -; |
3114 |
| -; AVX2-FAST-PERLANE-LABEL: test34: |
3115 |
| -; AVX2-FAST-PERLANE: # %bb.0: |
3116 |
| -; AVX2-FAST-PERLANE-NEXT: vpbroadcastd {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] |
3117 |
| -; AVX2-FAST-PERLANE-NEXT: vpand %ymm3, %ymm0, %ymm0 |
3118 |
| -; AVX2-FAST-PERLANE-NEXT: vpbroadcastq {{.*#+}} ymm3 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] |
3119 |
| -; AVX2-FAST-PERLANE-NEXT: vpxor %ymm3, %ymm2, %ymm4 |
3120 |
| -; AVX2-FAST-PERLANE-NEXT: vpbroadcastq {{.*#+}} ymm5 = [9223372041149743103,9223372041149743103,9223372041149743103,9223372041149743103] |
3121 |
| -; AVX2-FAST-PERLANE-NEXT: vpcmpgtq %ymm4, %ymm5, %ymm4 |
3122 |
| -; AVX2-FAST-PERLANE-NEXT: vbroadcastsd {{.*#+}} ymm6 = [4294967295,4294967295,4294967295,4294967295] |
3123 |
| -; AVX2-FAST-PERLANE-NEXT: vblendvpd %ymm4, %ymm2, %ymm6, %ymm2 |
3124 |
| -; AVX2-FAST-PERLANE-NEXT: vpxor %ymm3, %ymm1, %ymm3 |
3125 |
| -; AVX2-FAST-PERLANE-NEXT: vpcmpgtq %ymm3, %ymm5, %ymm3 |
3126 |
| -; AVX2-FAST-PERLANE-NEXT: vblendvpd %ymm3, %ymm1, %ymm6, %ymm1 |
3127 |
| -; AVX2-FAST-PERLANE-NEXT: vperm2f128 {{.*#+}} ymm3 = ymm1[2,3],ymm2[2,3] |
3128 |
| -; AVX2-FAST-PERLANE-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 |
3129 |
| -; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,2],ymm3[0,2],ymm1[4,6],ymm3[4,6] |
3130 |
| -; AVX2-FAST-PERLANE-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 |
3131 |
| -; AVX2-FAST-PERLANE-NEXT: vpsubd %ymm1, %ymm0, %ymm0 |
3132 |
| -; AVX2-FAST-PERLANE-NEXT: retq |
| 3035 | +; AVX2-LABEL: test34: |
| 3036 | +; AVX2: # %bb.0: |
| 3037 | +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] |
| 3038 | +; AVX2-NEXT: vpand %ymm3, %ymm0, %ymm0 |
| 3039 | +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] |
| 3040 | +; AVX2-NEXT: vpxor %ymm3, %ymm2, %ymm4 |
| 3041 | +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm5 = [9223372041149743103,9223372041149743103,9223372041149743103,9223372041149743103] |
| 3042 | +; AVX2-NEXT: vpcmpgtq %ymm4, %ymm5, %ymm4 |
| 3043 | +; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm6 = [4294967295,4294967295,4294967295,4294967295] |
| 3044 | +; AVX2-NEXT: vblendvpd %ymm4, %ymm2, %ymm6, %ymm2 |
| 3045 | +; AVX2-NEXT: vpxor %ymm3, %ymm1, %ymm3 |
| 3046 | +; AVX2-NEXT: vpcmpgtq %ymm3, %ymm5, %ymm3 |
| 3047 | +; AVX2-NEXT: vblendvpd %ymm3, %ymm1, %ymm6, %ymm1 |
| 3048 | +; AVX2-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,2],ymm2[0,2],ymm1[4,6],ymm2[4,6] |
| 3049 | +; AVX2-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,2,1,3] |
| 3050 | +; AVX2-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 |
| 3051 | +; AVX2-NEXT: vpsubd %ymm1, %ymm0, %ymm0 |
| 3052 | +; AVX2-NEXT: retq |
3133 | 3053 | ;
|
3134 | 3054 | ; AVX512-LABEL: test34:
|
3135 | 3055 | ; AVX512: # %bb.0:
|
|
0 commit comments