@@ -648,3 +648,178 @@ loop:
648
648
exit:
649
649
ret void
650
650
}
651
+
652
+ define void @uitofp_v8i16_to_v8f64 (ptr nocapture noundef readonly %x , ptr nocapture noundef writeonly %y , i32 noundef %n ) {
653
+ ; CHECK-LABEL: uitofp_v8i16_to_v8f64:
654
+ ; CHECK: ; %bb.0: ; %entry
655
+ ; CHECK-NEXT: Lloh22:
656
+ ; CHECK-NEXT: adrp x8, lCPI10_0@PAGE
657
+ ; CHECK-NEXT: Lloh23:
658
+ ; CHECK-NEXT: adrp x9, lCPI10_1@PAGE
659
+ ; CHECK-NEXT: Lloh24:
660
+ ; CHECK-NEXT: adrp x10, lCPI10_2@PAGE
661
+ ; CHECK-NEXT: Lloh25:
662
+ ; CHECK-NEXT: ldr q0, [x8, lCPI10_0@PAGEOFF]
663
+ ; CHECK-NEXT: Lloh26:
664
+ ; CHECK-NEXT: adrp x8, lCPI10_3@PAGE
665
+ ; CHECK-NEXT: Lloh27:
666
+ ; CHECK-NEXT: ldr q1, [x9, lCPI10_1@PAGEOFF]
667
+ ; CHECK-NEXT: Lloh28:
668
+ ; CHECK-NEXT: ldr q2, [x10, lCPI10_2@PAGEOFF]
669
+ ; CHECK-NEXT: Lloh29:
670
+ ; CHECK-NEXT: ldr q3, [x8, lCPI10_3@PAGEOFF]
671
+ ; CHECK-NEXT: mov x8, xzr
672
+ ; CHECK-NEXT: LBB10_1: ; %vector.body
673
+ ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
674
+ ; CHECK-NEXT: ldr q4, [x0, x8]
675
+ ; CHECK-NEXT: add x9, x1, x8
676
+ ; CHECK-NEXT: add x8, x8, #64
677
+ ; CHECK-NEXT: cmp x8, #2, lsl #12 ; =8192
678
+ ; CHECK-NEXT: tbl.16b v5, { v4 }, v0
679
+ ; CHECK-NEXT: tbl.16b v6, { v4 }, v1
680
+ ; CHECK-NEXT: tbl.16b v7, { v4 }, v2
681
+ ; CHECK-NEXT: tbl.16b v4, { v4 }, v3
682
+ ; CHECK-NEXT: ucvtf.2d v5, v5
683
+ ; CHECK-NEXT: ucvtf.2d v6, v6
684
+ ; CHECK-NEXT: ucvtf.2d v7, v7
685
+ ; CHECK-NEXT: ucvtf.2d v4, v4
686
+ ; CHECK-NEXT: stp q6, q5, [x9, #32]
687
+ ; CHECK-NEXT: stp q4, q7, [x9]
688
+ ; CHECK-NEXT: b.ne LBB10_1
689
+ ; CHECK-NEXT: ; %bb.2: ; %for.cond.cleanup
690
+ ; CHECK-NEXT: ret
691
+ ; CHECK-NEXT: .loh AdrpLdr Lloh26, Lloh29
692
+ ; CHECK-NEXT: .loh AdrpLdr Lloh24, Lloh28
693
+ ; CHECK-NEXT: .loh AdrpLdr Lloh23, Lloh27
694
+ ; CHECK-NEXT: .loh AdrpAdrp Lloh22, Lloh26
695
+ ; CHECK-NEXT: .loh AdrpLdr Lloh22, Lloh25
696
+ entry:
697
+ br label %vector.body
698
+
699
+ vector.body:
700
+ %index = phi i64 [ 0 , %entry ], [ %index.next , %vector.body ]
701
+ %.idx = shl nsw i64 %index , 3
702
+ %g = getelementptr inbounds i8 , ptr %x , i64 %.idx
703
+ %wide.vec = load <8 x i16 >, ptr %g , align 2
704
+ %u = uitofp <8 x i16 > %wide.vec to <8 x double >
705
+ %h = getelementptr inbounds double , ptr %y , i64 %index
706
+ store <8 x double > %u , ptr %h , align 8
707
+ %index.next = add nuw i64 %index , 8
708
+ %c = icmp eq i64 %index.next , 1024
709
+ br i1 %c , label %for.cond.cleanup , label %vector.body
710
+
711
+ for.cond.cleanup:
712
+ ret void
713
+ }
714
+
715
+ define void @uitofp_ld4_v32i16_to_v8f64 (ptr nocapture noundef readonly %x , ptr nocapture noundef writeonly %y , i32 noundef %n ) {
716
+ ; CHECK-LABEL: uitofp_ld4_v32i16_to_v8f64:
717
+ ; CHECK: ; %bb.0: ; %entry
718
+ ; CHECK-NEXT: Lloh30:
719
+ ; CHECK-NEXT: adrp x8, lCPI11_0@PAGE
720
+ ; CHECK-NEXT: Lloh31:
721
+ ; CHECK-NEXT: adrp x9, lCPI11_1@PAGE
722
+ ; CHECK-NEXT: Lloh32:
723
+ ; CHECK-NEXT: adrp x10, lCPI11_2@PAGE
724
+ ; CHECK-NEXT: Lloh33:
725
+ ; CHECK-NEXT: ldr q0, [x8, lCPI11_0@PAGEOFF]
726
+ ; CHECK-NEXT: Lloh34:
727
+ ; CHECK-NEXT: adrp x8, lCPI11_3@PAGE
728
+ ; CHECK-NEXT: Lloh35:
729
+ ; CHECK-NEXT: ldr q1, [x9, lCPI11_1@PAGEOFF]
730
+ ; CHECK-NEXT: Lloh36:
731
+ ; CHECK-NEXT: ldr q2, [x10, lCPI11_2@PAGEOFF]
732
+ ; CHECK-NEXT: Lloh37:
733
+ ; CHECK-NEXT: ldr q3, [x8, lCPI11_3@PAGEOFF]
734
+ ; CHECK-NEXT: mov x8, xzr
735
+ ; CHECK-NEXT: LBB11_1: ; %vector.body
736
+ ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
737
+ ; CHECK-NEXT: add x9, x0, x8
738
+ ; CHECK-NEXT: ldp q5, q4, [x9, #32]
739
+ ; CHECK-NEXT: ldp q7, q6, [x9]
740
+ ; CHECK-NEXT: add x9, x1, x8
741
+ ; CHECK-NEXT: add x8, x8, #64
742
+ ; CHECK-NEXT: tbl.16b v16, { v4 }, v0
743
+ ; CHECK-NEXT: tbl.16b v17, { v5 }, v0
744
+ ; CHECK-NEXT: tbl.16b v21, { v4 }, v1
745
+ ; CHECK-NEXT: tbl.16b v18, { v6 }, v0
746
+ ; CHECK-NEXT: tbl.16b v19, { v7 }, v0
747
+ ; CHECK-NEXT: tbl.16b v20, { v7 }, v1
748
+ ; CHECK-NEXT: tbl.16b v22, { v5 }, v1
749
+ ; CHECK-NEXT: tbl.16b v23, { v5 }, v2
750
+ ; CHECK-NEXT: tbl.16b v24, { v4 }, v2
751
+ ; CHECK-NEXT: tbl.16b v25, { v7 }, v2
752
+ ; CHECK-NEXT: tbl.16b v5, { v5 }, v3
753
+ ; CHECK-NEXT: tbl.16b v4, { v4 }, v3
754
+ ; CHECK-NEXT: tbl.16b v7, { v7 }, v3
755
+ ; CHECK-NEXT: tbl.16b v26, { v6 }, v1
756
+ ; CHECK-NEXT: tbl.16b v27, { v6 }, v2
757
+ ; CHECK-NEXT: tbl.16b v6, { v6 }, v3
758
+ ; CHECK-NEXT: ucvtf.2d v17, v17
759
+ ; CHECK-NEXT: ucvtf.2d v16, v16
760
+ ; CHECK-NEXT: ucvtf.2d v19, v19
761
+ ; CHECK-NEXT: ucvtf.2d v18, v18
762
+ ; CHECK-NEXT: ucvtf.2d v22, v22
763
+ ; CHECK-NEXT: ucvtf.2d v23, v23
764
+ ; CHECK-NEXT: ucvtf.2d v5, v5
765
+ ; CHECK-NEXT: ucvtf.2d v21, v21
766
+ ; CHECK-NEXT: ucvtf.2d v24, v24
767
+ ; CHECK-NEXT: ucvtf.2d v4, v4
768
+ ; CHECK-NEXT: cmp x8, #2, lsl #12 ; =8192
769
+ ; CHECK-NEXT: ucvtf.2d v20, v20
770
+ ; CHECK-NEXT: ucvtf.2d v25, v25
771
+ ; CHECK-NEXT: ucvtf.2d v7, v7
772
+ ; CHECK-NEXT: ucvtf.2d v26, v26
773
+ ; CHECK-NEXT: ucvtf.2d v27, v27
774
+ ; CHECK-NEXT: ucvtf.2d v6, v6
775
+ ; CHECK-NEXT: fadd.2d v17, v22, v17
776
+ ; CHECK-NEXT: fadd.2d v5, v23, v5
777
+ ; CHECK-NEXT: fadd.2d v16, v21, v16
778
+ ; CHECK-NEXT: fadd.2d v4, v24, v4
779
+ ; CHECK-NEXT: fadd.2d v19, v20, v19
780
+ ; CHECK-NEXT: fadd.2d v7, v25, v7
781
+ ; CHECK-NEXT: fadd.2d v18, v26, v18
782
+ ; CHECK-NEXT: fadd.2d v6, v27, v6
783
+ ; CHECK-NEXT: fadd.2d v5, v17, v5
784
+ ; CHECK-NEXT: fadd.2d v4, v16, v4
785
+ ; CHECK-NEXT: fadd.2d v7, v19, v7
786
+ ; CHECK-NEXT: fadd.2d v6, v18, v6
787
+ ; CHECK-NEXT: stp q5, q4, [x9, #32]
788
+ ; CHECK-NEXT: stp q7, q6, [x9]
789
+ ; CHECK-NEXT: b.ne LBB11_1
790
+ ; CHECK-NEXT: ; %bb.2: ; %for.cond.cleanup
791
+ ; CHECK-NEXT: ret
792
+ ; CHECK-NEXT: .loh AdrpLdr Lloh34, Lloh37
793
+ ; CHECK-NEXT: .loh AdrpLdr Lloh32, Lloh36
794
+ ; CHECK-NEXT: .loh AdrpLdr Lloh31, Lloh35
795
+ ; CHECK-NEXT: .loh AdrpAdrp Lloh30, Lloh34
796
+ ; CHECK-NEXT: .loh AdrpLdr Lloh30, Lloh33
797
+ entry:
798
+ br label %vector.body
799
+
800
+ vector.body:
801
+ %index = phi i64 [ 0 , %entry ], [ %index.next , %vector.body ]
802
+ %.idx = shl nsw i64 %index , 3
803
+ %0 = getelementptr inbounds i8 , ptr %x , i64 %.idx
804
+ %wide.vec = load <32 x i16 >, ptr %0 , align 2
805
+ %strided.vec = shufflevector <32 x i16 > %wide.vec , <32 x i16 > poison, <8 x i32 > <i32 0 , i32 4 , i32 8 , i32 12 , i32 16 , i32 20 , i32 24 , i32 28 >
806
+ %strided.vec36 = shufflevector <32 x i16 > %wide.vec , <32 x i16 > poison, <8 x i32 > <i32 1 , i32 5 , i32 9 , i32 13 , i32 17 , i32 21 , i32 25 , i32 29 >
807
+ %strided.vec37 = shufflevector <32 x i16 > %wide.vec , <32 x i16 > poison, <8 x i32 > <i32 2 , i32 6 , i32 10 , i32 14 , i32 18 , i32 22 , i32 26 , i32 30 >
808
+ %strided.vec38 = shufflevector <32 x i16 > %wide.vec , <32 x i16 > poison, <8 x i32 > <i32 3 , i32 7 , i32 11 , i32 15 , i32 19 , i32 23 , i32 27 , i32 31 >
809
+ %1 = uitofp <8 x i16 > %strided.vec to <8 x double >
810
+ %2 = uitofp <8 x i16 > %strided.vec36 to <8 x double >
811
+ %3 = fadd fast <8 x double > %2 , %1
812
+ %4 = uitofp <8 x i16 > %strided.vec37 to <8 x double >
813
+ %5 = fadd fast <8 x double > %3 , %4
814
+ %6 = uitofp <8 x i16 > %strided.vec38 to <8 x double >
815
+ %7 = fadd fast <8 x double > %5 , %6
816
+ %8 = getelementptr inbounds double , ptr %y , i64 %index
817
+ store <8 x double > %7 , ptr %8 , align 8
818
+ %index.next = add nuw i64 %index , 8
819
+ %9 = icmp eq i64 %index.next , 1024
820
+ br i1 %9 , label %for.cond.cleanup , label %vector.body
821
+
822
+ for.cond.cleanup:
823
+ ret void
824
+ }
825
+
0 commit comments