@@ -937,6 +937,65 @@ define <16 x i32> @zext_mulhuw_v16i16_lshr(<16 x i16> %a, <16 x i16> %b) {
937
937
ret <16 x i32 > %d
938
938
}
939
939
940
+ ; PR109790
941
+ define void @PR109790 (ptr sret ([32 x i8 ]) %ret , ptr %a ) {
942
+ ; SSE-LABEL: PR109790:
943
+ ; SSE: # %bb.0:
944
+ ; SSE-NEXT: movq %rdi, %rax
945
+ ; SSE-NEXT: movdqa {{.*#+}} xmm0 = [32767,32767,32767,32767,32767,32767,32767,32767]
946
+ ; SSE-NEXT: movdqa (%rsi), %xmm1
947
+ ; SSE-NEXT: pand %xmm0, %xmm1
948
+ ; SSE-NEXT: pand 16(%rsi), %xmm0
949
+ ; SSE-NEXT: movdqa {{.*#+}} xmm2 = [64536,64536,64536,64536,64536,64536,64536,64536]
950
+ ; SSE-NEXT: pmulhw %xmm2, %xmm0
951
+ ; SSE-NEXT: pmulhw %xmm2, %xmm1
952
+ ; SSE-NEXT: movdqa %xmm1, (%rdi)
953
+ ; SSE-NEXT: movdqa %xmm0, 16(%rdi)
954
+ ; SSE-NEXT: retq
955
+ ;
956
+ ; AVX2-LABEL: PR109790:
957
+ ; AVX2: # %bb.0:
958
+ ; AVX2-NEXT: movq %rdi, %rax
959
+ ; AVX2-NEXT: vmovdqa (%rsi), %ymm0
960
+ ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
961
+ ; AVX2-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536]
962
+ ; AVX2-NEXT: vmovdqa %ymm0, (%rdi)
963
+ ; AVX2-NEXT: vzeroupper
964
+ ; AVX2-NEXT: retq
965
+ ;
966
+ ; AVX512F-LABEL: PR109790:
967
+ ; AVX512F: # %bb.0:
968
+ ; AVX512F-NEXT: movq %rdi, %rax
969
+ ; AVX512F-NEXT: vmovdqa (%rsi), %ymm0
970
+ ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
971
+ ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
972
+ ; AVX512F-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
973
+ ; AVX512F-NEXT: vpsrld $16, %zmm0, %zmm0
974
+ ; AVX512F-NEXT: vpmovdw %zmm0, (%rdi)
975
+ ; AVX512F-NEXT: vzeroupper
976
+ ; AVX512F-NEXT: retq
977
+ ;
978
+ ; AVX512BW-LABEL: PR109790:
979
+ ; AVX512BW: # %bb.0:
980
+ ; AVX512BW-NEXT: movq %rdi, %rax
981
+ ; AVX512BW-NEXT: vmovdqa (%rsi), %ymm0
982
+ ; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
983
+ ; AVX512BW-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
984
+ ; AVX512BW-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 # [64536,0,64536,0,64536,0,64536,0,64536,0,64536,0,64536,0,64536,0,64536,0,64536,0,64536,0,64536,0,64536,0,64536,0,64536,0,64536,0]
985
+ ; AVX512BW-NEXT: vpsrld $16, %zmm0, %zmm0
986
+ ; AVX512BW-NEXT: vpmovdw %zmm0, (%rdi)
987
+ ; AVX512BW-NEXT: vzeroupper
988
+ ; AVX512BW-NEXT: retq
989
+ %load = load <16 x i16 >, ptr %a , align 32
990
+ %and = and <16 x i16 > %load , <i16 32767 , i16 32767 , i16 32767 , i16 32767 , i16 32767 , i16 32767 , i16 32767 , i16 32767 , i16 32767 , i16 32767 , i16 32767 , i16 32767 , i16 32767 , i16 32767 , i16 32767 , i16 32767 >
991
+ %ext = zext nneg <16 x i16 > %and to <16 x i32 >
992
+ %mul = mul nsw <16 x i32 > %ext , <i32 -1000 , i32 -1000 , i32 -1000 , i32 -1000 , i32 -1000 , i32 -1000 , i32 -1000 , i32 -1000 , i32 -1000 , i32 -1000 , i32 -1000 , i32 -1000 , i32 -1000 , i32 -1000 , i32 -1000 , i32 -1000 >
993
+ %srl = lshr <16 x i32 > %mul , <i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 >
994
+ %res = trunc nuw <16 x i32 > %srl to <16 x i16 >
995
+ store <16 x i16 > %res , ptr %ret , align 32
996
+ ret void
997
+ }
998
+
940
999
; PR109790
941
1000
define <16 x i16 > @zext_mulhuw_v16i16_negative_constant (<16 x i16 > %a ) {
942
1001
; SSE-LABEL: zext_mulhuw_v16i16_negative_constant:
0 commit comments