Skip to content

Commit 3d7e56f

Browse files
authored
[AArch64][clang][llvm] Add structured sparsity outer product (TMOP) intrinsics (llvm#135145)
Implement all {BF/F/S/U/SU/US}TMOP intrinsics in clang and llvm following the ACLE in https://github.com/ARM-software/acle/pull/380/files
1 parent b9ce185 commit 3d7e56f

File tree

11 files changed

+605
-71
lines changed

11 files changed

+605
-71
lines changed

clang/include/clang/Basic/arm_sme.td

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -907,6 +907,33 @@ let SMETargetGuard = "sme-f16f16" in {
907907
}
908908

909909

910+
////////////////////////////////////////////////////////////////////////////////
911+
// SME2 - TMOP, SUTMOP, USTMOP
912+
913+
let SMETargetGuard = "sme2,sme-tmop" in {
914+
def SVTMOPA_ZA32 : Inst<"svtmopa_lane_za32[_{d}_{d}]", "vi2d[i", "hbf", MergeNone, "aarch64_sme_ftmopa_za32", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>;
915+
def SVSTMOPA_ZA32 : Inst<"svtmopa_lane_za32[_{d}_{d}]", "vi2d[i", "cs", MergeNone, "aarch64_sme_stmopa_za32", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>;
916+
def SVUTMOPA_ZA32 : Inst<"svtmopa_lane_za32[_{d}_{d}]", "vi2d[i", "UcUs", MergeNone, "aarch64_sme_utmopa_za32", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>;
917+
def SVSUTMOPA_ZA32 : Inst<"svtmopa_lane_za32[_{d}_{3}]", "vi2u[i", "c", MergeNone, "aarch64_sme_sutmopa_za32", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>;
918+
def SVUSTMOPA_ZA32 : Inst<"svtmopa_lane_za32[_{d}_{3}]", "vi2x[i", "Uc", MergeNone, "aarch64_sme_ustmopa_za32", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>;
919+
}
920+
921+
let SMETargetGuard = "sme2,sme-tmop,sme-f16f16" in {
922+
def SVTMOPA_F16 : Inst<"svtmopa_lane_za16[_{d}_{d}]", "vi2d[i", "h", MergeNone, "aarch64_sme_ftmopa_za16", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_1>, ImmCheck<4, ImmCheck0_3>]>;
923+
}
924+
925+
let SMETargetGuard = "sme2,sme-tmop,sme-b16b16" in {
926+
def SVTMOPA_BF16 : Inst<"svtmopa_lane_za16[_{d}_{d}]", "vi2d[i", "b", MergeNone, "aarch64_sme_ftmopa_za16", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_1>, ImmCheck<4, ImmCheck0_3>]>;
927+
}
928+
929+
let SMETargetGuard = "sme2,sme-tmop,sme-f8f16" in {
930+
def SVTMOPA_ZA16_FPM : Inst<"svtmopa_lane_za16[_{d}_{d}]", "vi2.dd[i>", "m", MergeNone, "aarch64_sme_ftmopa_za16", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_1>, ImmCheck<4, ImmCheck0_3>]>;
931+
}
932+
933+
let SMETargetGuard = "sme2,sme-tmop,sme-f8f32" in {
934+
def SVTMOPA_ZA32_FPM : Inst<"svtmopa_lane_za32[_{d}_{d}]", "vi2.dd[i>", "m", MergeNone, "aarch64_sme_ftmopa_za32", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>;
935+
}
936+
910937
multiclass ZAReadz<string n_suffix, string vg_num, string t, string i_prefix, list<ImmCheck> ch> {
911938
let SMETargetGuard = "sme2p1" in {
912939
def NAME # _H : SInst<"svreadz_hor_" # n_suffix # "_{d}_vg" # vg_num, vg_num # "im", t,
Lines changed: 202 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,202 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2+
3+
// REQUIRES: aarch64-registered-target
4+
// RUN: %clang_cc1 -triple aarch64 -target-feature +sme-tmop -target-feature +sme-f16f16 -target-feature +sme-f8f32 -target-feature +sme-b16b16 -target-feature +sme-f8f16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
5+
// RUN: %clang_cc1 -triple aarch64 -target-feature +sme-tmop -target-feature +sme-f16f16 -target-feature +sme-f8f32 -target-feature +sme-b16b16 -target-feature +sme-f8f16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
6+
// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -triple aarch64 -target-feature +sme-tmop -target-feature +sme-f16f16 -target-feature +sme-f8f32 -target-feature +sme-b16b16 -target-feature +sme-f8f16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
7+
// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -triple aarch64 -target-feature +sme-tmop -target-feature +sme-f16f16 -target-feature +sme-f8f32 -target-feature +sme-b16b16 -target-feature +sme-f8f16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
8+
// RUN: %clang_cc1 -triple aarch64 -target-feature +sme-tmop -target-feature +sme-f16f16 -target-feature +sme-f8f32 -target-feature +sme-b16b16 -target-feature +sme-f8f16 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
9+
10+
#include <arm_sme.h>
11+
12+
#ifdef SME_OVERLOADED_FORMS
13+
#define SME_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3
14+
#else
15+
#define SME_ACLE_FUNC(A1,A2,A3) A1##A2##A3
16+
#endif
17+
18+
// CHECK-LABEL: @test_svtmopa_lane_za32_s8_s8(
19+
// CHECK-NEXT: entry:
20+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.stmopa.za32.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], <vscale x 16 x i8> [[ZK:%.*]], i32 3)
21+
// CHECK-NEXT: ret void
22+
//
23+
// CPP-CHECK-LABEL: @_Z28test_svtmopa_lane_za32_s8_s810svint8x2_tu10__SVInt8_tu11__SVUint8_t(
24+
// CPP-CHECK-NEXT: entry:
25+
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.stmopa.za32.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], <vscale x 16 x i8> [[ZK:%.*]], i32 3)
26+
// CPP-CHECK-NEXT: ret void
27+
//
28+
void test_svtmopa_lane_za32_s8_s8(svint8x2_t zn, svint8_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") {
29+
SME_ACLE_FUNC(svtmopa_lane_za32,_s8_s8,)(1, zn, zm, zk, 3);
30+
}
31+
32+
// CHECK-LABEL: @test_svtmopa_lane_za32_u8_u8(
33+
// CHECK-NEXT: entry:
34+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.utmopa.za32.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], <vscale x 16 x i8> [[ZK:%.*]], i32 3)
35+
// CHECK-NEXT: ret void
36+
//
37+
// CPP-CHECK-LABEL: @_Z28test_svtmopa_lane_za32_u8_u811svuint8x2_tu11__SVUint8_tS0_(
38+
// CPP-CHECK-NEXT: entry:
39+
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.utmopa.za32.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], <vscale x 16 x i8> [[ZK:%.*]], i32 3)
40+
// CPP-CHECK-NEXT: ret void
41+
//
42+
void test_svtmopa_lane_za32_u8_u8(svuint8x2_t zn, svuint8_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") {
43+
SME_ACLE_FUNC(svtmopa_lane_za32,_u8_u8,)(1, zn, zm, zk, 3);
44+
}
45+
46+
// CHECK-LABEL: @test_svtmopa_lane_za32_s8_u8(
47+
// CHECK-NEXT: entry:
48+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.sutmopa.za32.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], <vscale x 16 x i8> [[ZK:%.*]], i32 3)
49+
// CHECK-NEXT: ret void
50+
//
51+
// CPP-CHECK-LABEL: @_Z28test_svtmopa_lane_za32_s8_u810svint8x2_tu11__SVUint8_tS0_(
52+
// CPP-CHECK-NEXT: entry:
53+
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sutmopa.za32.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], <vscale x 16 x i8> [[ZK:%.*]], i32 3)
54+
// CPP-CHECK-NEXT: ret void
55+
//
56+
void test_svtmopa_lane_za32_s8_u8(svint8x2_t zn, svuint8_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") {
57+
SME_ACLE_FUNC(svtmopa_lane_za32,_s8_u8,)(1, zn, zm, zk, 3);
58+
}
59+
60+
// CHECK-LABEL: @test_svtmopa_lane_za32_u8_s8(
61+
// CHECK-NEXT: entry:
62+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.ustmopa.za32.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], <vscale x 16 x i8> [[ZK:%.*]], i32 3)
63+
// CHECK-NEXT: ret void
64+
//
65+
// CPP-CHECK-LABEL: @_Z28test_svtmopa_lane_za32_u8_s811svuint8x2_tu10__SVInt8_tu11__SVUint8_t(
66+
// CPP-CHECK-NEXT: entry:
67+
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ustmopa.za32.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], <vscale x 16 x i8> [[ZK:%.*]], i32 3)
68+
// CPP-CHECK-NEXT: ret void
69+
//
70+
void test_svtmopa_lane_za32_u8_s8(svuint8x2_t zn, svint8_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") {
71+
SME_ACLE_FUNC(svtmopa_lane_za32,_u8_s8,)(1, zn, zm, zk, 3);
72+
}
73+
74+
// CHECK-LABEL: @test_svtmopa_lane_za32_s16_s16(
75+
// CHECK-NEXT: entry:
76+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.stmopa.za32.nxv8i16(i32 1, <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZM:%.*]], <vscale x 16 x i8> [[ZK:%.*]], i32 3)
77+
// CHECK-NEXT: ret void
78+
//
79+
// CPP-CHECK-LABEL: @_Z30test_svtmopa_lane_za32_s16_s1611svint16x2_tu11__SVInt16_tu11__SVUint8_t(
80+
// CPP-CHECK-NEXT: entry:
81+
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.stmopa.za32.nxv8i16(i32 1, <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZM:%.*]], <vscale x 16 x i8> [[ZK:%.*]], i32 3)
82+
// CPP-CHECK-NEXT: ret void
83+
//
84+
void test_svtmopa_lane_za32_s16_s16(svint16x2_t zn, svint16_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") {
85+
SME_ACLE_FUNC(svtmopa_lane_za32,_s16_s16,)(1, zn, zm, zk, 3);
86+
}
87+
88+
// CHECK-LABEL: @test_svtmopa_lane_za32_u16_u16(
89+
// CHECK-NEXT: entry:
90+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.utmopa.za32.nxv8i16(i32 1, <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZM:%.*]], <vscale x 16 x i8> [[ZK:%.*]], i32 3)
91+
// CHECK-NEXT: ret void
92+
//
93+
// CPP-CHECK-LABEL: @_Z30test_svtmopa_lane_za32_u16_u1612svuint16x2_tu12__SVUint16_tu11__SVUint8_t(
94+
// CPP-CHECK-NEXT: entry:
95+
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.utmopa.za32.nxv8i16(i32 1, <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZM:%.*]], <vscale x 16 x i8> [[ZK:%.*]], i32 3)
96+
// CPP-CHECK-NEXT: ret void
97+
//
98+
void test_svtmopa_lane_za32_u16_u16(svuint16x2_t zn, svuint16_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") {
99+
SME_ACLE_FUNC(svtmopa_lane_za32,_u16_u16,)(1, zn, zm, zk, 3);
100+
}
101+
102+
// CHECK-LABEL: @test_svtmopa_lane_za32_f16_f16(
103+
// CHECK-NEXT: entry:
104+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.ftmopa.za32.nxv8f16(i32 1, <vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]], <vscale x 8 x half> [[ZM:%.*]], <vscale x 16 x i8> [[ZK:%.*]], i32 3)
105+
// CHECK-NEXT: ret void
106+
//
107+
// CPP-CHECK-LABEL: @_Z30test_svtmopa_lane_za32_f16_f1613svfloat16x2_tu13__SVFloat16_tu11__SVUint8_t(
108+
// CPP-CHECK-NEXT: entry:
109+
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ftmopa.za32.nxv8f16(i32 1, <vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]], <vscale x 8 x half> [[ZM:%.*]], <vscale x 16 x i8> [[ZK:%.*]], i32 3)
110+
// CPP-CHECK-NEXT: ret void
111+
//
112+
void test_svtmopa_lane_za32_f16_f16(svfloat16x2_t zn, svfloat16_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") {
113+
SME_ACLE_FUNC(svtmopa_lane_za32,_f16_f16,)(1, zn, zm, zk, 3);
114+
}
115+
116+
// CHECK-LABEL: @test_svtmopa_lane_za32_f32_f32(
117+
// CHECK-NEXT: entry:
118+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.ftmopa.za32.nxv4f32(i32 1, <vscale x 4 x float> [[ZN_COERCE0:%.*]], <vscale x 4 x float> [[ZN_COERCE1:%.*]], <vscale x 4 x float> [[ZM:%.*]], <vscale x 16 x i8> [[ZK:%.*]], i32 3)
119+
// CHECK-NEXT: ret void
120+
//
121+
// CPP-CHECK-LABEL: @_Z30test_svtmopa_lane_za32_f32_f3213svfloat32x2_tu13__SVFloat32_tu11__SVUint8_t(
122+
// CPP-CHECK-NEXT: entry:
123+
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ftmopa.za32.nxv4f32(i32 1, <vscale x 4 x float> [[ZN_COERCE0:%.*]], <vscale x 4 x float> [[ZN_COERCE1:%.*]], <vscale x 4 x float> [[ZM:%.*]], <vscale x 16 x i8> [[ZK:%.*]], i32 3)
124+
// CPP-CHECK-NEXT: ret void
125+
//
126+
void test_svtmopa_lane_za32_f32_f32(svfloat32x2_t zn, svfloat32_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") {
127+
SME_ACLE_FUNC(svtmopa_lane_za32,_f32_f32,)(1, zn, zm, zk, 3);
128+
}
129+
130+
// CHECK-LABEL: @test_svtmopa_lane_za32_bf16_bf16(
131+
// CHECK-NEXT: entry:
132+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.ftmopa.za32.nxv8bf16(i32 1, <vscale x 8 x bfloat> [[ZN_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE1:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]], <vscale x 16 x i8> [[ZK:%.*]], i32 3)
133+
// CHECK-NEXT: ret void
134+
//
135+
// CPP-CHECK-LABEL: @_Z32test_svtmopa_lane_za32_bf16_bf1614svbfloat16x2_tu14__SVBfloat16_tu11__SVUint8_t(
136+
// CPP-CHECK-NEXT: entry:
137+
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ftmopa.za32.nxv8bf16(i32 1, <vscale x 8 x bfloat> [[ZN_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE1:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]], <vscale x 16 x i8> [[ZK:%.*]], i32 3)
138+
// CPP-CHECK-NEXT: ret void
139+
//
140+
void test_svtmopa_lane_za32_bf16_bf16(svbfloat16x2_t zn, svbfloat16_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") {
141+
SME_ACLE_FUNC(svtmopa_lane_za32,_bf16_bf16,)(1, zn, zm, zk, 3);
142+
}
143+
144+
// CHECK-LABEL: @test_svtmopa_lane_za16_f16_f16(
145+
// CHECK-NEXT: entry:
146+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.ftmopa.za16.nxv8f16(i32 1, <vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]], <vscale x 8 x half> [[ZM:%.*]], <vscale x 16 x i8> [[ZK:%.*]], i32 3)
147+
// CHECK-NEXT: ret void
148+
//
149+
// CPP-CHECK-LABEL: @_Z30test_svtmopa_lane_za16_f16_f1613svfloat16x2_tu13__SVFloat16_tu11__SVUint8_t(
150+
// CPP-CHECK-NEXT: entry:
151+
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ftmopa.za16.nxv8f16(i32 1, <vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]], <vscale x 8 x half> [[ZM:%.*]], <vscale x 16 x i8> [[ZK:%.*]], i32 3)
152+
// CPP-CHECK-NEXT: ret void
153+
//
154+
void test_svtmopa_lane_za16_f16_f16(svfloat16x2_t zn, svfloat16_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") {
155+
SME_ACLE_FUNC(svtmopa_lane_za16,_f16_f16,)(1, zn, zm, zk, 3);
156+
}
157+
158+
// CHECK-LABEL: @test_svtmopa_lane_za16_bf16_bf16(
159+
// CHECK-NEXT: entry:
160+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.ftmopa.za16.nxv8bf16(i32 1, <vscale x 8 x bfloat> [[ZN_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE1:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]], <vscale x 16 x i8> [[ZK:%.*]], i32 3)
161+
// CHECK-NEXT: ret void
162+
//
163+
// CPP-CHECK-LABEL: @_Z32test_svtmopa_lane_za16_bf16_bf1614svbfloat16x2_tu14__SVBfloat16_tu11__SVUint8_t(
164+
// CPP-CHECK-NEXT: entry:
165+
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ftmopa.za16.nxv8bf16(i32 1, <vscale x 8 x bfloat> [[ZN_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE1:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]], <vscale x 16 x i8> [[ZK:%.*]], i32 3)
166+
// CPP-CHECK-NEXT: ret void
167+
//
168+
void test_svtmopa_lane_za16_bf16_bf16(svbfloat16x2_t zn, svbfloat16_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") {
169+
SME_ACLE_FUNC(svtmopa_lane_za16,_bf16_bf16,)(1, zn, zm, zk, 3);
170+
}
171+
172+
// CHECK-LABEL: @test_svtmopa_lane_za16_mf8_mf8_fpm(
173+
// CHECK-NEXT: entry:
174+
// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR:%.*]])
175+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.ftmopa.za16.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], <vscale x 16 x i8> [[ZK:%.*]], i32 3)
176+
// CHECK-NEXT: ret void
177+
//
178+
// CPP-CHECK-LABEL: @_Z34test_svtmopa_lane_za16_mf8_mf8_fpm13svmfloat8x2_tu13__SVMfloat8_tu11__SVUint8_tm(
179+
// CPP-CHECK-NEXT: entry:
180+
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR:%.*]])
181+
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ftmopa.za16.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], <vscale x 16 x i8> [[ZK:%.*]], i32 3)
182+
// CPP-CHECK-NEXT: ret void
183+
//
184+
void test_svtmopa_lane_za16_mf8_mf8_fpm(svmfloat8x2_t zn, svmfloat8_t zm, svuint8_t zk, fpm_t fpmr) __arm_streaming __arm_inout("za") {
185+
SME_ACLE_FUNC(svtmopa_lane_za16,_mf8_mf8,_fpm)(1, zn, zm, zk, 3, fpmr);
186+
}
187+
188+
// CHECK-LABEL: @test_svtmopa_lane_za32_mf8_mf8_fpm(
189+
// CHECK-NEXT: entry:
190+
// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR:%.*]])
191+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.ftmopa.za32.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], <vscale x 16 x i8> [[ZK:%.*]], i32 3)
192+
// CHECK-NEXT: ret void
193+
//
194+
// CPP-CHECK-LABEL: @_Z34test_svtmopa_lane_za32_mf8_mf8_fpm13svmfloat8x2_tu13__SVMfloat8_tu11__SVUint8_tm(
195+
// CPP-CHECK-NEXT: entry:
196+
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR:%.*]])
197+
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ftmopa.za32.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], <vscale x 16 x i8> [[ZK:%.*]], i32 3)
198+
// CPP-CHECK-NEXT: ret void
199+
//
200+
void test_svtmopa_lane_za32_mf8_mf8_fpm(svmfloat8x2_t zn, svmfloat8_t zm, svuint8_t zk, fpm_t fpmr) __arm_streaming __arm_inout("za") {
201+
SME_ACLE_FUNC(svtmopa_lane_za32,_mf8_mf8,_fpm)(1, zn, zm, zk, 3, fpmr);
202+
}

0 commit comments

Comments
 (0)