@@ -892,10 +892,7 @@ define void @PR32547(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x float>
892
892
; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
893
893
; AVX512F-NEXT: vcmpltps %zmm1, %zmm0, %k0
894
894
; AVX512F-NEXT: vcmpltps %zmm3, %zmm2, %k1
895
- ; AVX512F-NEXT: kshiftlw $8, %k0, %k0
896
- ; AVX512F-NEXT: kshiftlw $8, %k1, %k1
897
- ; AVX512F-NEXT: kshiftrw $8, %k1, %k1
898
- ; AVX512F-NEXT: korw %k1, %k0, %k1
895
+ ; AVX512F-NEXT: kunpckbw %k1, %k0, %k1
899
896
; AVX512F-NEXT: vxorps %xmm0, %xmm0, %xmm0
900
897
; AVX512F-NEXT: vmovaps %zmm0, (%rdi) {%k1}
901
898
; AVX512F-NEXT: vzeroupper
@@ -905,8 +902,7 @@ define void @PR32547(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x float>
905
902
; AVX512VL: # %bb.0: # %entry
906
903
; AVX512VL-NEXT: vcmpltps %ymm1, %ymm0, %k0
907
904
; AVX512VL-NEXT: vcmpltps %ymm3, %ymm2, %k1
908
- ; AVX512VL-NEXT: kshiftlw $8, %k0, %k0
909
- ; AVX512VL-NEXT: korw %k1, %k0, %k1
905
+ ; AVX512VL-NEXT: kunpckbw %k1, %k0, %k1
910
906
; AVX512VL-NEXT: vxorps %xmm0, %xmm0, %xmm0
911
907
; AVX512VL-NEXT: vmovaps %zmm0, (%rdi) {%k1}
912
908
; AVX512VL-NEXT: vzeroupper
@@ -916,8 +912,7 @@ define void @PR32547(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x float>
916
912
; VL_BW_DQ: # %bb.0: # %entry
917
913
; VL_BW_DQ-NEXT: vcmpltps %ymm1, %ymm0, %k0
918
914
; VL_BW_DQ-NEXT: vcmpltps %ymm3, %ymm2, %k1
919
- ; VL_BW_DQ-NEXT: kshiftlw $8, %k0, %k0
920
- ; VL_BW_DQ-NEXT: korw %k1, %k0, %k1
915
+ ; VL_BW_DQ-NEXT: kunpckbw %k1, %k0, %k1
921
916
; VL_BW_DQ-NEXT: vxorps %xmm0, %xmm0, %xmm0
922
917
; VL_BW_DQ-NEXT: vmovaps %zmm0, (%rdi) {%k1}
923
918
; VL_BW_DQ-NEXT: vzeroupper
@@ -945,10 +940,7 @@ define void @PR32547_swap(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x f
945
940
; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
946
941
; AVX512F-NEXT: vcmpltps %zmm1, %zmm0, %k0
947
942
; AVX512F-NEXT: vcmpltps %zmm3, %zmm2, %k1
948
- ; AVX512F-NEXT: kshiftlw $8, %k0, %k0
949
- ; AVX512F-NEXT: kshiftlw $8, %k1, %k1
950
- ; AVX512F-NEXT: kshiftrw $8, %k1, %k1
951
- ; AVX512F-NEXT: korw %k0, %k1, %k1
943
+ ; AVX512F-NEXT: kunpckbw %k1, %k0, %k1
952
944
; AVX512F-NEXT: vxorps %xmm0, %xmm0, %xmm0
953
945
; AVX512F-NEXT: vmovaps %zmm0, (%rdi) {%k1}
954
946
; AVX512F-NEXT: vzeroupper
@@ -958,8 +950,7 @@ define void @PR32547_swap(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x f
958
950
; AVX512VL: # %bb.0: # %entry
959
951
; AVX512VL-NEXT: vcmpltps %ymm1, %ymm0, %k0
960
952
; AVX512VL-NEXT: vcmpltps %ymm3, %ymm2, %k1
961
- ; AVX512VL-NEXT: kshiftlw $8, %k0, %k0
962
- ; AVX512VL-NEXT: korw %k0, %k1, %k1
953
+ ; AVX512VL-NEXT: kunpckbw %k1, %k0, %k1
963
954
; AVX512VL-NEXT: vxorps %xmm0, %xmm0, %xmm0
964
955
; AVX512VL-NEXT: vmovaps %zmm0, (%rdi) {%k1}
965
956
; AVX512VL-NEXT: vzeroupper
@@ -969,8 +960,7 @@ define void @PR32547_swap(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x f
969
960
; VL_BW_DQ: # %bb.0: # %entry
970
961
; VL_BW_DQ-NEXT: vcmpltps %ymm1, %ymm0, %k0
971
962
; VL_BW_DQ-NEXT: vcmpltps %ymm3, %ymm2, %k1
972
- ; VL_BW_DQ-NEXT: kshiftlw $8, %k0, %k0
973
- ; VL_BW_DQ-NEXT: korw %k0, %k1, %k1
963
+ ; VL_BW_DQ-NEXT: kunpckbw %k1, %k0, %k1
974
964
; VL_BW_DQ-NEXT: vxorps %xmm0, %xmm0, %xmm0
975
965
; VL_BW_DQ-NEXT: vmovaps %zmm0, (%rdi) {%k1}
976
966
; VL_BW_DQ-NEXT: vzeroupper
0 commit comments