Skip to content

Commit 05e10ee

Browse files
committed
[AArch64][SVE2] Add bfloat16 support to whilerw/whilewr intrinsics
Reviewed By: fpetrogalli Differential Revision: https://reviews.llvm.org/D82399
1 parent fd2c4b8 commit 05e10ee

File tree

5 files changed

+97
-0
lines changed

5 files changed

+97
-0
lines changed

clang/include/clang/Basic/arm_sve.td

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1962,6 +1962,11 @@ def SVWHILEWR_S : SInst<"svwhilewr[_{1}]", "Pcc", "iUif", MergeNone, "aarch64_sv
19621962
def SVWHILEWR_D : SInst<"svwhilewr[_{1}]", "Pcc", "lUld", MergeNone, "aarch64_sve_whilewr_d", [IsOverloadWhileRW]>;
19631963
}
19641964

1965+
let ArchGuard = "defined(__ARM_FEATURE_SVE2) && defined(__ARM_FEATURE_SVE_BF16)" in {
1966+
def SVWHILERW_H_BF16 : SInst<"svwhilerw[_{1}]", "Pcc", "b", MergeNone, "aarch64_sve_whilerw_h", [IsOverloadWhileRW]>;
1967+
def SVWHILEWR_H_BF16 : SInst<"svwhilewr[_{1}]", "Pcc", "b", MergeNone, "aarch64_sve_whilewr_h", [IsOverloadWhileRW]>;
1968+
}
1969+
19651970
////////////////////////////////////////////////////////////////////////////////
19661971
// SVE2 - Extended table lookup/permute
19671972
let ArchGuard = "defined(__ARM_FEATURE_SVE2)" in {

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7726,6 +7726,8 @@ CodeGenFunction::getSVEPredType(SVETypeFlags TypeFlags) {
77267726
case SVETypeFlags::EltTyInt64:
77277727
return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
77287728

7729+
case SVETypeFlags::EltTyBFloat16:
7730+
return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
77297731
case SVETypeFlags::EltTyFloat16:
77307732
return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
77317733
case SVETypeFlags::EltTyFloat32:
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -D__ARM_FEATURE_SVE_BF16 -triple aarch64-none-linux-gnu -target-feature +sve2 -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
2+
// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -D__ARM_FEATURE_SVE_BF16 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2 -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
3+
4+
// Test expected warnings for implicit declaration when +sve2 is missing
5+
// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -D__ARM_FEATURE_SVE_BF16 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error %s
6+
// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -D__ARM_FEATURE_SVE_BF16 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -fsyntax-only -verify=overload -verify-ignore-unexpected=error %s
7+
8+
// Test expected warnings for implicit declaration when +bf16 is missing
9+
// NOTE: +bf16 doesn't currently imply __ARM_FEATURE_SVE_BF16, once the
10+
// implementation is complete it will, at which point -target-feature +bf16
11+
// should be removed.
12+
// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve2 -target-feature +bf16 -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error %s
13+
14+
// Test expected ambiguous call error for overloaded form when +bf16 is missing
15+
// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2 -target-feature +bf16 -fallow-half-arguments-and-returns -fsyntax-only -verify=overload-bf16 -verify-ignore-unexpected=note %s
16+
17+
#include <arm_sve.h>
18+
19+
#ifdef SVE_OVERLOADED_FORMS
20+
// A simple used,unused... macro, long enough to represent any SVE builtin.
21+
#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
22+
#else
23+
#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
24+
#endif
25+
26+
svbool_t test_svwhilerw_bf16(const bfloat16_t *op1, const bfloat16_t *op2)
27+
{
28+
// CHECK-LABEL: test_svwhilerw_bf16
29+
// CHECK: %[[INTRINSIC:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.whilerw.h.nxv8i1.p0bf16(bfloat* %op1, bfloat* %op2)
30+
// CHECK: %[[INTRINSIC_REINT:.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %[[INTRINSIC]])
31+
// CHECK: ret <vscale x 16 x i1> %[[INTRINSIC_REINT]]
32+
// overload-warning@+3 {{implicit declaration of function 'svwhilerw'}}
33+
// expected-warning@+2 {{implicit declaration of function 'svwhilerw_bf16'}}
34+
// overload-bf16-error@+1 {{call to 'svwhilerw' is ambiguous}}
35+
return SVE_ACLE_FUNC(svwhilerw,_bf16,,)(op1, op2);
36+
}
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -D__ARM_FEATURE_SVE_BF16 -triple aarch64-none-linux-gnu -target-feature +sve2 -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
2+
// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -D__ARM_FEATURE_SVE_BF16 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2 -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
3+
4+
// Test expected warnings for implicit declaration when +sve2 is missing
5+
// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -D__ARM_FEATURE_SVE_BF16 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error %s
6+
// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -D__ARM_FEATURE_SVE_BF16 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -fsyntax-only -verify=overload -verify-ignore-unexpected=error %s
7+
8+
// Test expected warnings for implicit declaration when +bf16 is missing
9+
// NOTE: +bf16 doesn't currently imply __ARM_FEATURE_SVE_BF16, once the
10+
// implementation is complete it will, at which point -target-feature +bf16
11+
// should be removed.
12+
// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve2 -target-feature +bf16 -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error %s
13+
14+
// Test expected ambiguous call error for overloaded form when +bf16 is missing
15+
// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2 -target-feature +bf16 -fallow-half-arguments-and-returns -fsyntax-only -verify=overload-bf16 -verify-ignore-unexpected=note %s
16+
17+
#include <arm_sve.h>
18+
19+
#ifdef SVE_OVERLOADED_FORMS
20+
// A simple used,unused... macro, long enough to represent any SVE builtin.
21+
#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
22+
#else
23+
#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
24+
#endif
25+
26+
svbool_t test_svwhilewr_bf16(const bfloat16_t *op1, const bfloat16_t *op2)
27+
{
28+
// CHECK-LABEL: test_svwhilewr_bf16
29+
// CHECK: %[[INTRINSIC:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.whilewr.h.nxv8i1.p0bf16(bfloat* %op1, bfloat* %op2)
30+
// CHECK: %[[INTRINSIC_REINT:.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %[[INTRINSIC]])
31+
// CHECK: ret <vscale x 16 x i1> %[[INTRINSIC_REINT]]
32+
// overload-warning@+3 {{implicit declaration of function 'svwhilewr'}}
33+
// expected-warning@+2 {{implicit declaration of function 'svwhilewr_bf16'}}
34+
// overload-bf16-error@+1 {{call to 'svwhilewr' is ambiguous}}
35+
return SVE_ACLE_FUNC(svwhilewr,_bf16,,)(op1, op2);
36+
}

llvm/test/CodeGen/AArch64/sve2-intrinsics-contiguous-conflict-detection.ll

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,14 @@ define <vscale x 2 x i1> @whilerw_i64(i64* %a, i64* %b) {
3636
ret <vscale x 2 x i1> %out
3737
}
3838

39+
define <vscale x 8 x i1> @whilerw_bfloat(bfloat* %a, bfloat* %b) {
40+
; CHECK-LABEL: whilerw_bfloat:
41+
; CHECK: whilerw p0.h, x0, x1
42+
; CHECK-NEXT: ret
43+
%out = call <vscale x 8 x i1> @llvm.aarch64.sve.whilerw.h.nx8i1.bf16.bf16(bfloat* %a, bfloat* %b)
44+
ret <vscale x 8 x i1> %out
45+
}
46+
3947
define <vscale x 8 x i1> @whilerw_half(half* %a, half* %b) {
4048
; CHECK-LABEL: whilerw_half:
4149
; CHECK: whilerw p0.h, x0, x1
@@ -96,6 +104,14 @@ define <vscale x 2 x i1> @whilewr_i64(i64* %a, i64* %b) {
96104
ret <vscale x 2 x i1> %out
97105
}
98106

107+
define <vscale x 8 x i1> @whilewr_bfloat(bfloat* %a, bfloat* %b) {
108+
; CHECK-LABEL: whilewr_bfloat:
109+
; CHECK: whilewr p0.h, x0, x1
110+
; CHECK-NEXT: ret
111+
%out = call <vscale x 8 x i1> @llvm.aarch64.sve.whilewr.h.nx8i1.bf16.bf16(bfloat* %a, bfloat* %b)
112+
ret <vscale x 8 x i1> %out
113+
}
114+
99115
define <vscale x 8 x i1> @whilewr_half(half* %a, half* %b) {
100116
; CHECK-LABEL: whilewr_half:
101117
; CHECK: whilewr p0.h, x0, x1
@@ -125,6 +141,7 @@ declare <vscale x 8 x i1> @llvm.aarch64.sve.whilerw.h.nx8i1(i16* %a, i16* %b)
125141
declare <vscale x 4 x i1> @llvm.aarch64.sve.whilerw.s.nx4i1(i32* %a, i32* %b)
126142
declare <vscale x 2 x i1> @llvm.aarch64.sve.whilerw.d.nx2i1(i64* %a, i64* %b)
127143

144+
declare <vscale x 8 x i1> @llvm.aarch64.sve.whilerw.h.nx8i1.bf16.bf16(bfloat* %a, bfloat* %b)
128145
declare <vscale x 8 x i1> @llvm.aarch64.sve.whilerw.h.nx8i1.f16.f16(half* %a, half* %b)
129146
declare <vscale x 4 x i1> @llvm.aarch64.sve.whilerw.s.nx4i1.f32.f32(float* %a, float* %b)
130147
declare <vscale x 2 x i1> @llvm.aarch64.sve.whilerw.d.nx2i1.f64.f64(double* %a, double* %b)
@@ -134,6 +151,7 @@ declare <vscale x 8 x i1> @llvm.aarch64.sve.whilewr.h.nx8i1(i16* %a, i16* %b)
134151
declare <vscale x 4 x i1> @llvm.aarch64.sve.whilewr.s.nx4i1(i32* %a, i32* %b)
135152
declare <vscale x 2 x i1> @llvm.aarch64.sve.whilewr.d.nx2i1(i64* %a, i64* %b)
136153

154+
declare <vscale x 8 x i1> @llvm.aarch64.sve.whilewr.h.nx8i1.bf16.bf16(bfloat* %a, bfloat* %b)
137155
declare <vscale x 8 x i1> @llvm.aarch64.sve.whilewr.h.nx8i1.f16.f16(half* %a, half* %b)
138156
declare <vscale x 4 x i1> @llvm.aarch64.sve.whilewr.s.nx4i1.f32.f32(float* %a, float* %b)
139157
declare <vscale x 2 x i1> @llvm.aarch64.sve.whilewr.d.nx2i1.f64.f64(double* %a, double* %b)

0 commit comments

Comments
 (0)