@@ -3769,12 +3769,16 @@ let Predicates = [HasAVX512] in {
3769
3769
(VMOVDQA64Zrm addr:$src)>;
3770
3770
def : Pat<(alignedloadv32i16 addr:$src),
3771
3771
(VMOVDQA64Zrm addr:$src)>;
3772
+ def : Pat<(alignedloadv32f16 addr:$src),
3773
+ (VMOVAPSZrm addr:$src)>;
3772
3774
def : Pat<(alignedloadv64i8 addr:$src),
3773
3775
(VMOVDQA64Zrm addr:$src)>;
3774
3776
def : Pat<(loadv16i32 addr:$src),
3775
3777
(VMOVDQU64Zrm addr:$src)>;
3776
3778
def : Pat<(loadv32i16 addr:$src),
3777
3779
(VMOVDQU64Zrm addr:$src)>;
3780
+ def : Pat<(loadv32f16 addr:$src),
3781
+ (VMOVUPSZrm addr:$src)>;
3778
3782
def : Pat<(loadv64i8 addr:$src),
3779
3783
(VMOVDQU64Zrm addr:$src)>;
3780
3784
@@ -3783,12 +3787,16 @@ let Predicates = [HasAVX512] in {
3783
3787
(VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3784
3788
def : Pat<(alignedstore (v32i16 VR512:$src), addr:$dst),
3785
3789
(VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3790
+ def : Pat<(alignedstore (v32f16 VR512:$src), addr:$dst),
3791
+ (VMOVAPSZmr addr:$dst, VR512:$src)>;
3786
3792
def : Pat<(alignedstore (v64i8 VR512:$src), addr:$dst),
3787
3793
(VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3788
3794
def : Pat<(store (v16i32 VR512:$src), addr:$dst),
3789
3795
(VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3790
3796
def : Pat<(store (v32i16 VR512:$src), addr:$dst),
3791
3797
(VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3798
+ def : Pat<(store (v32f16 VR512:$src), addr:$dst),
3799
+ (VMOVUPSZmr addr:$dst, VR512:$src)>;
3792
3800
def : Pat<(store (v64i8 VR512:$src), addr:$dst),
3793
3801
(VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3794
3802
}
@@ -3799,12 +3807,16 @@ let Predicates = [HasVLX] in {
3799
3807
(VMOVDQA64Z128rm addr:$src)>;
3800
3808
def : Pat<(alignedloadv8i16 addr:$src),
3801
3809
(VMOVDQA64Z128rm addr:$src)>;
3810
+ def : Pat<(alignedloadv8f16 addr:$src),
3811
+ (VMOVAPSZ128rm addr:$src)>;
3802
3812
def : Pat<(alignedloadv16i8 addr:$src),
3803
3813
(VMOVDQA64Z128rm addr:$src)>;
3804
3814
def : Pat<(loadv4i32 addr:$src),
3805
3815
(VMOVDQU64Z128rm addr:$src)>;
3806
3816
def : Pat<(loadv8i16 addr:$src),
3807
3817
(VMOVDQU64Z128rm addr:$src)>;
3818
+ def : Pat<(loadv8f16 addr:$src),
3819
+ (VMOVUPSZ128rm addr:$src)>;
3808
3820
def : Pat<(loadv16i8 addr:$src),
3809
3821
(VMOVDQU64Z128rm addr:$src)>;
3810
3822
@@ -3813,12 +3825,16 @@ let Predicates = [HasVLX] in {
3813
3825
(VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3814
3826
def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst),
3815
3827
(VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3828
+ def : Pat<(alignedstore (v8f16 VR128X:$src), addr:$dst),
3829
+ (VMOVAPSZ128mr addr:$dst, VR128X:$src)>;
3816
3830
def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst),
3817
3831
(VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3818
3832
def : Pat<(store (v4i32 VR128X:$src), addr:$dst),
3819
3833
(VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3820
3834
def : Pat<(store (v8i16 VR128X:$src), addr:$dst),
3821
3835
(VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3836
+ def : Pat<(store (v8f16 VR128X:$src), addr:$dst),
3837
+ (VMOVUPSZ128mr addr:$dst, VR128X:$src)>;
3822
3838
def : Pat<(store (v16i8 VR128X:$src), addr:$dst),
3823
3839
(VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3824
3840
@@ -3827,12 +3843,16 @@ let Predicates = [HasVLX] in {
3827
3843
(VMOVDQA64Z256rm addr:$src)>;
3828
3844
def : Pat<(alignedloadv16i16 addr:$src),
3829
3845
(VMOVDQA64Z256rm addr:$src)>;
3846
+ def : Pat<(alignedloadv16f16 addr:$src),
3847
+ (VMOVAPSZ256rm addr:$src)>;
3830
3848
def : Pat<(alignedloadv32i8 addr:$src),
3831
3849
(VMOVDQA64Z256rm addr:$src)>;
3832
3850
def : Pat<(loadv8i32 addr:$src),
3833
3851
(VMOVDQU64Z256rm addr:$src)>;
3834
3852
def : Pat<(loadv16i16 addr:$src),
3835
3853
(VMOVDQU64Z256rm addr:$src)>;
3854
+ def : Pat<(loadv16f16 addr:$src),
3855
+ (VMOVUPSZ256rm addr:$src)>;
3836
3856
def : Pat<(loadv32i8 addr:$src),
3837
3857
(VMOVDQU64Z256rm addr:$src)>;
3838
3858
@@ -3841,12 +3861,16 @@ let Predicates = [HasVLX] in {
3841
3861
(VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3842
3862
def : Pat<(alignedstore (v16i16 VR256X:$src), addr:$dst),
3843
3863
(VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3864
+ def : Pat<(alignedstore (v16f16 VR256X:$src), addr:$dst),
3865
+ (VMOVAPSZ256mr addr:$dst, VR256X:$src)>;
3844
3866
def : Pat<(alignedstore (v32i8 VR256X:$src), addr:$dst),
3845
3867
(VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3846
3868
def : Pat<(store (v8i32 VR256X:$src), addr:$dst),
3847
3869
(VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3848
3870
def : Pat<(store (v16i16 VR256X:$src), addr:$dst),
3849
3871
(VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3872
+ def : Pat<(store (v16f16 VR256X:$src), addr:$dst),
3873
+ (VMOVUPSZ256mr addr:$dst, VR256X:$src)>;
3850
3874
def : Pat<(store (v32i8 VR256X:$src), addr:$dst),
3851
3875
(VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3852
3876
}
@@ -3855,16 +3879,12 @@ let Predicates = [HasBWI] in {
3855
3879
(VMOVDQU16Zrrk VR512:$src0, VK32WM:$mask, VR512:$src1)>;
3856
3880
def : Pat<(v32f16 (vselect VK32WM:$mask, (v32f16 VR512:$src1), v32f16_info.ImmAllZerosV)),
3857
3881
(VMOVDQU16Zrrkz VK32WM:$mask, VR512:$src1)>;
3858
- def : Pat<(v32f16 (alignedloadv32f16 addr:$src)),
3859
- (VMOVAPSZrm addr:$src)>;
3860
3882
def : Pat<(v32f16 (vselect VK32WM:$mask,
3861
3883
(v32f16 (alignedloadv32f16 addr:$src)), (v32f16 VR512:$src0))),
3862
3884
(VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>;
3863
3885
def : Pat<(v32f16 (vselect VK32WM:$mask,
3864
3886
(v32f16 (alignedloadv32f16 addr:$src)), v32f16_info.ImmAllZerosV)),
3865
3887
(VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
3866
- def : Pat<(v32f16 (loadv32f16 addr:$src)),
3867
- (VMOVUPSZrm addr:$src)>;
3868
3888
def : Pat<(v32f16 (vselect VK32WM:$mask,
3869
3889
(v32f16 (loadv32f16 addr:$src)), (v32f16 VR512:$src0))),
3870
3890
(VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>;
@@ -3878,10 +3898,6 @@ let Predicates = [HasBWI] in {
3878
3898
def : Pat<(v32f16 (masked_load addr:$src, VK32WM:$mask, v32f16_info.ImmAllZerosV)),
3879
3899
(VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
3880
3900
3881
- def : Pat<(alignedstore (v32f16 VR512:$src), addr:$dst),
3882
- (VMOVAPSZmr addr:$dst, VR512:$src)>;
3883
- def : Pat<(store (v32f16 VR512:$src), addr:$dst),
3884
- (VMOVUPSZmr addr:$dst, VR512:$src)>;
3885
3901
def : Pat<(masked_store (v32f16 VR512:$src), addr:$dst, VK32WM:$mask),
3886
3902
(VMOVDQU16Zmrk addr:$dst, VK32WM:$mask, VR512:$src)>;
3887
3903
}
@@ -3890,16 +3906,12 @@ let Predicates = [HasBWI, HasVLX] in {
3890
3906
(VMOVDQU16Z256rrk VR256X:$src0, VK16WM:$mask, VR256X:$src1)>;
3891
3907
def : Pat<(v16f16 (vselect VK16WM:$mask, (v16f16 VR256X:$src1), v16f16x_info.ImmAllZerosV)),
3892
3908
(VMOVDQU16Z256rrkz VK16WM:$mask, VR256X:$src1)>;
3893
- def : Pat<(v16f16 (alignedloadv16f16 addr:$src)),
3894
- (VMOVAPSZ256rm addr:$src)>;
3895
3909
def : Pat<(v16f16 (vselect VK16WM:$mask,
3896
3910
(v16f16 (alignedloadv16f16 addr:$src)), (v16f16 VR256X:$src0))),
3897
3911
(VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>;
3898
3912
def : Pat<(v16f16 (vselect VK16WM:$mask,
3899
3913
(v16f16 (alignedloadv16f16 addr:$src)), v16f16x_info.ImmAllZerosV)),
3900
3914
(VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
3901
- def : Pat<(v16f16 (loadv16f16 addr:$src)),
3902
- (VMOVUPSZ256rm addr:$src)>;
3903
3915
def : Pat<(v16f16 (vselect VK16WM:$mask,
3904
3916
(v16f16 (loadv16f16 addr:$src)), (v16f16 VR256X:$src0))),
3905
3917
(VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>;
@@ -3913,27 +3925,19 @@ let Predicates = [HasBWI, HasVLX] in {
3913
3925
def : Pat<(v16f16 (masked_load addr:$src, VK16WM:$mask, v16f16x_info.ImmAllZerosV)),
3914
3926
(VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
3915
3927
3916
- def : Pat<(alignedstore (v16f16 VR256X:$src), addr:$dst),
3917
- (VMOVAPSZ256mr addr:$dst, VR256X:$src)>;
3918
- def : Pat<(store (v16f16 VR256X:$src), addr:$dst),
3919
- (VMOVUPSZ256mr addr:$dst, VR256X:$src)>;
3920
3928
def : Pat<(masked_store (v16f16 VR256X:$src), addr:$dst, VK16WM:$mask),
3921
3929
(VMOVDQU16Z256mrk addr:$dst, VK16WM:$mask, VR256X:$src)>;
3922
3930
3923
3931
def : Pat<(v8f16 (vselect VK8WM:$mask, (v8f16 VR128X:$src1), (v8f16 VR128X:$src0))),
3924
3932
(VMOVDQU16Z128rrk VR128X:$src0, VK8WM:$mask, VR128X:$src1)>;
3925
3933
def : Pat<(v8f16 (vselect VK8WM:$mask, (v8f16 VR128X:$src1), v8f16x_info.ImmAllZerosV)),
3926
3934
(VMOVDQU16Z128rrkz VK8WM:$mask, VR128X:$src1)>;
3927
- def : Pat<(v8f16 (alignedloadv8f16 addr:$src)),
3928
- (VMOVAPSZ128rm addr:$src)>;
3929
3935
def : Pat<(v8f16 (vselect VK8WM:$mask,
3930
3936
(v8f16 (alignedloadv8f16 addr:$src)), (v8f16 VR128X:$src0))),
3931
3937
(VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>;
3932
3938
def : Pat<(v8f16 (vselect VK8WM:$mask,
3933
3939
(v8f16 (alignedloadv8f16 addr:$src)), v8f16x_info.ImmAllZerosV)),
3934
3940
(VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
3935
- def : Pat<(v8f16 (loadv8f16 addr:$src)),
3936
- (VMOVUPSZ128rm addr:$src)>;
3937
3941
def : Pat<(v8f16 (vselect VK8WM:$mask,
3938
3942
(v8f16 (loadv8f16 addr:$src)), (v8f16 VR128X:$src0))),
3939
3943
(VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>;
@@ -3947,10 +3951,6 @@ let Predicates = [HasBWI, HasVLX] in {
3947
3951
def : Pat<(v8f16 (masked_load addr:$src, VK8WM:$mask, v8f16x_info.ImmAllZerosV)),
3948
3952
(VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
3949
3953
3950
- def : Pat<(alignedstore (v8f16 VR128X:$src), addr:$dst),
3951
- (VMOVAPSZ128mr addr:$dst, VR128X:$src)>;
3952
- def : Pat<(store (v8f16 VR128X:$src), addr:$dst),
3953
- (VMOVUPSZ128mr addr:$dst, VR128X:$src)>;
3954
3954
def : Pat<(masked_store (v8f16 VR128X:$src), addr:$dst, VK8WM:$mask),
3955
3955
(VMOVDQU16Z128mrk addr:$dst, VK8WM:$mask, VR128X:$src)>;
3956
3956
}
0 commit comments