Skip to content

Commit f187948

Browse files
committed
[X86][FP16] Enable vector support for FP16 emulation
This is follow up of D107082, which enable vector support according to psABI. Reviewed By: skan Differential Revision: https://reviews.llvm.org/D127982
1 parent e97b2d4 commit f187948

24 files changed

+2180
-3014
lines changed

Diff for: llvm/lib/Target/X86/X86ISelLowering.cpp

+269-167
Large diffs are not rendered by default.

Diff for: llvm/lib/Target/X86/X86InstrAVX512.td

+24-24
Original file line numberDiff line numberDiff line change
@@ -3769,12 +3769,16 @@ let Predicates = [HasAVX512] in {
37693769
(VMOVDQA64Zrm addr:$src)>;
37703770
def : Pat<(alignedloadv32i16 addr:$src),
37713771
(VMOVDQA64Zrm addr:$src)>;
3772+
def : Pat<(alignedloadv32f16 addr:$src),
3773+
(VMOVAPSZrm addr:$src)>;
37723774
def : Pat<(alignedloadv64i8 addr:$src),
37733775
(VMOVDQA64Zrm addr:$src)>;
37743776
def : Pat<(loadv16i32 addr:$src),
37753777
(VMOVDQU64Zrm addr:$src)>;
37763778
def : Pat<(loadv32i16 addr:$src),
37773779
(VMOVDQU64Zrm addr:$src)>;
3780+
def : Pat<(loadv32f16 addr:$src),
3781+
(VMOVUPSZrm addr:$src)>;
37783782
def : Pat<(loadv64i8 addr:$src),
37793783
(VMOVDQU64Zrm addr:$src)>;
37803784

@@ -3783,12 +3787,16 @@ let Predicates = [HasAVX512] in {
37833787
(VMOVDQA64Zmr addr:$dst, VR512:$src)>;
37843788
def : Pat<(alignedstore (v32i16 VR512:$src), addr:$dst),
37853789
(VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3790+
def : Pat<(alignedstore (v32f16 VR512:$src), addr:$dst),
3791+
(VMOVAPSZmr addr:$dst, VR512:$src)>;
37863792
def : Pat<(alignedstore (v64i8 VR512:$src), addr:$dst),
37873793
(VMOVDQA64Zmr addr:$dst, VR512:$src)>;
37883794
def : Pat<(store (v16i32 VR512:$src), addr:$dst),
37893795
(VMOVDQU64Zmr addr:$dst, VR512:$src)>;
37903796
def : Pat<(store (v32i16 VR512:$src), addr:$dst),
37913797
(VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3798+
def : Pat<(store (v32f16 VR512:$src), addr:$dst),
3799+
(VMOVUPSZmr addr:$dst, VR512:$src)>;
37923800
def : Pat<(store (v64i8 VR512:$src), addr:$dst),
37933801
(VMOVDQU64Zmr addr:$dst, VR512:$src)>;
37943802
}
@@ -3799,12 +3807,16 @@ let Predicates = [HasVLX] in {
37993807
(VMOVDQA64Z128rm addr:$src)>;
38003808
def : Pat<(alignedloadv8i16 addr:$src),
38013809
(VMOVDQA64Z128rm addr:$src)>;
3810+
def : Pat<(alignedloadv8f16 addr:$src),
3811+
(VMOVAPSZ128rm addr:$src)>;
38023812
def : Pat<(alignedloadv16i8 addr:$src),
38033813
(VMOVDQA64Z128rm addr:$src)>;
38043814
def : Pat<(loadv4i32 addr:$src),
38053815
(VMOVDQU64Z128rm addr:$src)>;
38063816
def : Pat<(loadv8i16 addr:$src),
38073817
(VMOVDQU64Z128rm addr:$src)>;
3818+
def : Pat<(loadv8f16 addr:$src),
3819+
(VMOVUPSZ128rm addr:$src)>;
38083820
def : Pat<(loadv16i8 addr:$src),
38093821
(VMOVDQU64Z128rm addr:$src)>;
38103822

@@ -3813,12 +3825,16 @@ let Predicates = [HasVLX] in {
38133825
(VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
38143826
def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst),
38153827
(VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3828+
def : Pat<(alignedstore (v8f16 VR128X:$src), addr:$dst),
3829+
(VMOVAPSZ128mr addr:$dst, VR128X:$src)>;
38163830
def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst),
38173831
(VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
38183832
def : Pat<(store (v4i32 VR128X:$src), addr:$dst),
38193833
(VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
38203834
def : Pat<(store (v8i16 VR128X:$src), addr:$dst),
38213835
(VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3836+
def : Pat<(store (v8f16 VR128X:$src), addr:$dst),
3837+
(VMOVUPSZ128mr addr:$dst, VR128X:$src)>;
38223838
def : Pat<(store (v16i8 VR128X:$src), addr:$dst),
38233839
(VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
38243840

@@ -3827,12 +3843,16 @@ let Predicates = [HasVLX] in {
38273843
(VMOVDQA64Z256rm addr:$src)>;
38283844
def : Pat<(alignedloadv16i16 addr:$src),
38293845
(VMOVDQA64Z256rm addr:$src)>;
3846+
def : Pat<(alignedloadv16f16 addr:$src),
3847+
(VMOVAPSZ256rm addr:$src)>;
38303848
def : Pat<(alignedloadv32i8 addr:$src),
38313849
(VMOVDQA64Z256rm addr:$src)>;
38323850
def : Pat<(loadv8i32 addr:$src),
38333851
(VMOVDQU64Z256rm addr:$src)>;
38343852
def : Pat<(loadv16i16 addr:$src),
38353853
(VMOVDQU64Z256rm addr:$src)>;
3854+
def : Pat<(loadv16f16 addr:$src),
3855+
(VMOVUPSZ256rm addr:$src)>;
38363856
def : Pat<(loadv32i8 addr:$src),
38373857
(VMOVDQU64Z256rm addr:$src)>;
38383858

@@ -3841,12 +3861,16 @@ let Predicates = [HasVLX] in {
38413861
(VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
38423862
def : Pat<(alignedstore (v16i16 VR256X:$src), addr:$dst),
38433863
(VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3864+
def : Pat<(alignedstore (v16f16 VR256X:$src), addr:$dst),
3865+
(VMOVAPSZ256mr addr:$dst, VR256X:$src)>;
38443866
def : Pat<(alignedstore (v32i8 VR256X:$src), addr:$dst),
38453867
(VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
38463868
def : Pat<(store (v8i32 VR256X:$src), addr:$dst),
38473869
(VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
38483870
def : Pat<(store (v16i16 VR256X:$src), addr:$dst),
38493871
(VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3872+
def : Pat<(store (v16f16 VR256X:$src), addr:$dst),
3873+
(VMOVUPSZ256mr addr:$dst, VR256X:$src)>;
38503874
def : Pat<(store (v32i8 VR256X:$src), addr:$dst),
38513875
(VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
38523876
}
@@ -3855,16 +3879,12 @@ let Predicates = [HasBWI] in {
38553879
(VMOVDQU16Zrrk VR512:$src0, VK32WM:$mask, VR512:$src1)>;
38563880
def : Pat<(v32f16 (vselect VK32WM:$mask, (v32f16 VR512:$src1), v32f16_info.ImmAllZerosV)),
38573881
(VMOVDQU16Zrrkz VK32WM:$mask, VR512:$src1)>;
3858-
def : Pat<(v32f16 (alignedloadv32f16 addr:$src)),
3859-
(VMOVAPSZrm addr:$src)>;
38603882
def : Pat<(v32f16 (vselect VK32WM:$mask,
38613883
(v32f16 (alignedloadv32f16 addr:$src)), (v32f16 VR512:$src0))),
38623884
(VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>;
38633885
def : Pat<(v32f16 (vselect VK32WM:$mask,
38643886
(v32f16 (alignedloadv32f16 addr:$src)), v32f16_info.ImmAllZerosV)),
38653887
(VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
3866-
def : Pat<(v32f16 (loadv32f16 addr:$src)),
3867-
(VMOVUPSZrm addr:$src)>;
38683888
def : Pat<(v32f16 (vselect VK32WM:$mask,
38693889
(v32f16 (loadv32f16 addr:$src)), (v32f16 VR512:$src0))),
38703890
(VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>;
@@ -3878,10 +3898,6 @@ let Predicates = [HasBWI] in {
38783898
def : Pat<(v32f16 (masked_load addr:$src, VK32WM:$mask, v32f16_info.ImmAllZerosV)),
38793899
(VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
38803900

3881-
def : Pat<(alignedstore (v32f16 VR512:$src), addr:$dst),
3882-
(VMOVAPSZmr addr:$dst, VR512:$src)>;
3883-
def : Pat<(store (v32f16 VR512:$src), addr:$dst),
3884-
(VMOVUPSZmr addr:$dst, VR512:$src)>;
38853901
def : Pat<(masked_store (v32f16 VR512:$src), addr:$dst, VK32WM:$mask),
38863902
(VMOVDQU16Zmrk addr:$dst, VK32WM:$mask, VR512:$src)>;
38873903
}
@@ -3890,16 +3906,12 @@ let Predicates = [HasBWI, HasVLX] in {
38903906
(VMOVDQU16Z256rrk VR256X:$src0, VK16WM:$mask, VR256X:$src1)>;
38913907
def : Pat<(v16f16 (vselect VK16WM:$mask, (v16f16 VR256X:$src1), v16f16x_info.ImmAllZerosV)),
38923908
(VMOVDQU16Z256rrkz VK16WM:$mask, VR256X:$src1)>;
3893-
def : Pat<(v16f16 (alignedloadv16f16 addr:$src)),
3894-
(VMOVAPSZ256rm addr:$src)>;
38953909
def : Pat<(v16f16 (vselect VK16WM:$mask,
38963910
(v16f16 (alignedloadv16f16 addr:$src)), (v16f16 VR256X:$src0))),
38973911
(VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>;
38983912
def : Pat<(v16f16 (vselect VK16WM:$mask,
38993913
(v16f16 (alignedloadv16f16 addr:$src)), v16f16x_info.ImmAllZerosV)),
39003914
(VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
3901-
def : Pat<(v16f16 (loadv16f16 addr:$src)),
3902-
(VMOVUPSZ256rm addr:$src)>;
39033915
def : Pat<(v16f16 (vselect VK16WM:$mask,
39043916
(v16f16 (loadv16f16 addr:$src)), (v16f16 VR256X:$src0))),
39053917
(VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>;
@@ -3913,27 +3925,19 @@ let Predicates = [HasBWI, HasVLX] in {
39133925
def : Pat<(v16f16 (masked_load addr:$src, VK16WM:$mask, v16f16x_info.ImmAllZerosV)),
39143926
(VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
39153927

3916-
def : Pat<(alignedstore (v16f16 VR256X:$src), addr:$dst),
3917-
(VMOVAPSZ256mr addr:$dst, VR256X:$src)>;
3918-
def : Pat<(store (v16f16 VR256X:$src), addr:$dst),
3919-
(VMOVUPSZ256mr addr:$dst, VR256X:$src)>;
39203928
def : Pat<(masked_store (v16f16 VR256X:$src), addr:$dst, VK16WM:$mask),
39213929
(VMOVDQU16Z256mrk addr:$dst, VK16WM:$mask, VR256X:$src)>;
39223930

39233931
def : Pat<(v8f16 (vselect VK8WM:$mask, (v8f16 VR128X:$src1), (v8f16 VR128X:$src0))),
39243932
(VMOVDQU16Z128rrk VR128X:$src0, VK8WM:$mask, VR128X:$src1)>;
39253933
def : Pat<(v8f16 (vselect VK8WM:$mask, (v8f16 VR128X:$src1), v8f16x_info.ImmAllZerosV)),
39263934
(VMOVDQU16Z128rrkz VK8WM:$mask, VR128X:$src1)>;
3927-
def : Pat<(v8f16 (alignedloadv8f16 addr:$src)),
3928-
(VMOVAPSZ128rm addr:$src)>;
39293935
def : Pat<(v8f16 (vselect VK8WM:$mask,
39303936
(v8f16 (alignedloadv8f16 addr:$src)), (v8f16 VR128X:$src0))),
39313937
(VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>;
39323938
def : Pat<(v8f16 (vselect VK8WM:$mask,
39333939
(v8f16 (alignedloadv8f16 addr:$src)), v8f16x_info.ImmAllZerosV)),
39343940
(VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
3935-
def : Pat<(v8f16 (loadv8f16 addr:$src)),
3936-
(VMOVUPSZ128rm addr:$src)>;
39373941
def : Pat<(v8f16 (vselect VK8WM:$mask,
39383942
(v8f16 (loadv8f16 addr:$src)), (v8f16 VR128X:$src0))),
39393943
(VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>;
@@ -3947,10 +3951,6 @@ let Predicates = [HasBWI, HasVLX] in {
39473951
def : Pat<(v8f16 (masked_load addr:$src, VK8WM:$mask, v8f16x_info.ImmAllZerosV)),
39483952
(VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
39493953

3950-
def : Pat<(alignedstore (v8f16 VR128X:$src), addr:$dst),
3951-
(VMOVAPSZ128mr addr:$dst, VR128X:$src)>;
3952-
def : Pat<(store (v8f16 VR128X:$src), addr:$dst),
3953-
(VMOVUPSZ128mr addr:$dst, VR128X:$src)>;
39543954
def : Pat<(masked_store (v8f16 VR128X:$src), addr:$dst, VK8WM:$mask),
39553955
(VMOVDQU16Z128mrk addr:$dst, VK8WM:$mask, VR128X:$src)>;
39563956
}

0 commit comments

Comments
 (0)