@@ -876,20 +876,73 @@ define <8 x i32> @shuffle_spread4_singlesrc_e32(<8 x i32> %v) {
876
876
ret <8 x i32 > %out
877
877
}
878
878
879
- ; TODO: This should be either a single vslideup.vi or two widening interleaves.
880
- define <8 x i8 > @shuffle_spread4_singlesrc_e8 (<8 x i8 > %v ) {
881
- ; CHECK-LABEL: shuffle_spread4_singlesrc_e8:
879
+ define <16 x i8 > @shuffle_spread4_singlesrc_e8_idx0 (<16 x i8 > %v ) {
880
+ ; CHECK-LABEL: shuffle_spread4_singlesrc_e8_idx0:
882
881
; CHECK: # %bb.0:
883
- ; CHECK-NEXT: vsetivli zero, 8 , e8, mf2 , ta, ma
882
+ ; CHECK-NEXT: vsetivli zero, 16 , e8, m1 , ta, ma
884
883
; CHECK-NEXT: vid.v v9
885
884
; CHECK-NEXT: vsrl.vi v10, v9, 2
886
885
; CHECK-NEXT: vrgather.vv v9, v8, v10
887
- ; CHECK-NEXT: vmv1r .v v8, v9
886
+ ; CHECK-NEXT: vmv.v .v v8, v9
888
887
; CHECK-NEXT: ret
889
- %out = shufflevector <8 x i8 > %v , <8 x i8 > poison, <8 x i32 > <i32 0 , i32 undef , i32 undef , i32 undef , i32 1 , i32 undef , i32 undef , i32 undef >
890
- ret <8 x i8 > %out
888
+ %out = shufflevector <16 x i8 > %v , <16 x i8 > poison, <16 x i32 > <i32 0 , i32 undef , i32 undef , i32 undef , i32 1 , i32 undef , i32 undef , i32 undef , i32 2 , i32 undef , i32 undef , i32 undef , i32 3 , i32 undef , i32 undef , i32 undef >
889
+ ret <16 x i8 > %out
890
+ }
891
+
892
+ define <16 x i8 > @shuffle_spread4_singlesrc_e8_idx1 (<16 x i8 > %v ) {
893
+ ; CHECK-LABEL: shuffle_spread4_singlesrc_e8_idx1:
894
+ ; CHECK: # %bb.0:
895
+ ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
896
+ ; CHECK-NEXT: vid.v v9
897
+ ; CHECK-NEXT: vsrl.vi v10, v9, 2
898
+ ; CHECK-NEXT: vrgather.vv v9, v8, v10
899
+ ; CHECK-NEXT: vmv.v.v v8, v9
900
+ ; CHECK-NEXT: ret
901
+ %out = shufflevector <16 x i8 > %v , <16 x i8 > poison, <16 x i32 > <i32 undef , i32 0 , i32 undef , i32 undef , i32 undef , i32 1 , i32 undef , i32 undef , i32 undef , i32 2 , i32 undef , i32 undef , i32 undef , i32 3 , i32 undef , i32 undef >
902
+ ret <16 x i8 > %out
891
903
}
892
904
905
+ define <16 x i8 > @shuffle_spread4_singlesrc_e8_idx2 (<16 x i8 > %v ) {
906
+ ; CHECK-LABEL: shuffle_spread4_singlesrc_e8_idx2:
907
+ ; CHECK: # %bb.0:
908
+ ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
909
+ ; CHECK-NEXT: vid.v v9
910
+ ; CHECK-NEXT: vsrl.vi v10, v9, 2
911
+ ; CHECK-NEXT: vrgather.vv v9, v8, v10
912
+ ; CHECK-NEXT: vmv.v.v v8, v9
913
+ ; CHECK-NEXT: ret
914
+ %out = shufflevector <16 x i8 > %v , <16 x i8 > poison, <16 x i32 > <i32 undef , i32 undef , i32 0 , i32 undef , i32 undef , i32 undef , i32 1 , i32 undef , i32 undef , i32 undef , i32 2 , i32 undef , i32 undef , i32 undef , i32 3 , i32 undef >
915
+ ret <16 x i8 > %out
916
+ }
917
+
918
+ define <16 x i8 > @shuffle_spread4_singlesrc_e8_idx3 (<16 x i8 > %v ) {
919
+ ; CHECK-LABEL: shuffle_spread4_singlesrc_e8_idx3:
920
+ ; CHECK: # %bb.0:
921
+ ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
922
+ ; CHECK-NEXT: vid.v v9
923
+ ; CHECK-NEXT: vsrl.vi v10, v9, 2
924
+ ; CHECK-NEXT: vrgather.vv v9, v8, v10
925
+ ; CHECK-NEXT: vmv.v.v v8, v9
926
+ ; CHECK-NEXT: ret
927
+ %out = shufflevector <16 x i8 > %v , <16 x i8 > poison, <16 x i32 > <i32 undef , i32 undef , i32 undef , i32 0 , i32 undef , i32 undef , i32 undef , i32 1 , i32 undef , i32 undef , i32 undef , i32 2 , i32 undef , i32 undef , i32 undef , i32 3 >
928
+ ret <16 x i8 > %out
929
+ }
930
+
931
+ define <16 x i8 > @shuffle_spread4_singlesrc_e8_idx4 (<16 x i8 > %v ) {
932
+ ; CHECK-LABEL: shuffle_spread4_singlesrc_e8_idx4:
933
+ ; CHECK: # %bb.0:
934
+ ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
935
+ ; CHECK-NEXT: vid.v v9
936
+ ; CHECK-NEXT: vsrl.vi v9, v9, 2
937
+ ; CHECK-NEXT: vadd.vi v10, v9, -1
938
+ ; CHECK-NEXT: vrgather.vv v9, v8, v10
939
+ ; CHECK-NEXT: vmv.v.v v8, v9
940
+ ; CHECK-NEXT: ret
941
+ %out = shufflevector <16 x i8 > %v , <16 x i8 > poison, <16 x i32 > <i32 undef , i32 undef , i32 undef , i32 undef , i32 0 , i32 undef , i32 undef , i32 undef , i32 1 , i32 undef , i32 undef , i32 undef , i32 2 , i32 undef , i32 undef , i32 undef >
942
+ ret <16 x i8 > %out
943
+ }
944
+
945
+
893
946
define <32 x i8 > @shuffle_spread8_singlesrc_e8 (<32 x i8 > %v ) {
894
947
; CHECK-LABEL: shuffle_spread8_singlesrc_e8:
895
948
; CHECK: # %bb.0:
@@ -907,8 +960,8 @@ define <32 x i8> @shuffle_spread8_singlesrc_e8(<32 x i8> %v) {
907
960
define <8 x i32 > @shuffle_decompress_singlesrc_e32 (<8 x i32 > %v ) {
908
961
; CHECK-LABEL: shuffle_decompress_singlesrc_e32:
909
962
; CHECK: # %bb.0:
910
- ; CHECK-NEXT: lui a0, %hi(.LCPI61_0 )
911
- ; CHECK-NEXT: addi a0, a0, %lo(.LCPI61_0 )
963
+ ; CHECK-NEXT: lui a0, %hi(.LCPI65_0 )
964
+ ; CHECK-NEXT: addi a0, a0, %lo(.LCPI65_0 )
912
965
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
913
966
; CHECK-NEXT: vle16.v v12, (a0)
914
967
; CHECK-NEXT: vrgatherei16.vv v10, v8, v12
@@ -918,6 +971,22 @@ define <8 x i32> @shuffle_decompress_singlesrc_e32(<8 x i32> %v) {
918
971
ret <8 x i32 > %out
919
972
}
920
973
974
+ ; TODO: This should be a single vslideup.vi
975
+ define <8 x i8 > @shuffle_decompress_singlesrc_e8 (<8 x i8 > %v ) {
976
+ ; CHECK-LABEL: shuffle_decompress_singlesrc_e8:
977
+ ; CHECK: # %bb.0:
978
+ ; CHECK-NEXT: lui a0, %hi(.LCPI66_0)
979
+ ; CHECK-NEXT: addi a0, a0, %lo(.LCPI66_0)
980
+ ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
981
+ ; CHECK-NEXT: vle8.v v10, (a0)
982
+ ; CHECK-NEXT: vrgather.vv v9, v8, v10
983
+ ; CHECK-NEXT: vmv1r.v v8, v9
984
+ ; CHECK-NEXT: ret
985
+ %out = shufflevector <8 x i8 > %v , <8 x i8 > poison, <8 x i32 > <i32 0 , i32 undef , i32 undef , i32 undef , i32 1 , i32 2 , i32 3 , i32 4 >
986
+ ret <8 x i8 > %out
987
+ }
988
+
989
+
921
990
define <8 x i32 > @shuffle_repeat2_singlesrc_e32 (<8 x i32 > %v ) {
922
991
; CHECK-LABEL: shuffle_repeat2_singlesrc_e32:
923
992
; CHECK: # %bb.0:
0 commit comments