Skip to content

Commit 200a925

Browse files
[Clang][SVE2.1] Add builtins and intrinsics for SVBFMLSLB/T
As described in: ARM-software/acle#257 Patch by: Kerry McLaughlin <[email protected]> Reviewed By: david-arm Differential Revision: https://reviews.llvm.org/D151461
1 parent 6cfb642 commit 200a925

File tree

7 files changed

+164
-4
lines changed

7 files changed

+164
-4
lines changed

clang/include/clang/Basic/arm_sve.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1959,6 +1959,12 @@ def SVDOT_X2_F : SInst<"svdot[_{d}_{2}_{3}]", "ddhh", "f", MergeNone, "aarch64_
19591959
def SVDOT_LANE_X2_S : SInst<"svdot_lane[_{d}_{2}_{3}]", "ddhhi", "i", MergeNone, "aarch64_sve_sdot_lane_x2", [], [ImmCheck<3, ImmCheck0_3>]>;
19601960
def SVDOT_LANE_X2_U : SInst<"svdot_lane[_{d}_{2}_{3}]", "ddhhi", "Ui", MergeNone, "aarch64_sve_udot_lane_x2", [], [ImmCheck<3, ImmCheck0_3>]>;
19611961
def SVDOT_LANE_X2_F : SInst<"svdot_lane[_{d}_{2}_{3}]", "ddhhi", "f", MergeNone, "aarch64_sve_fdot_lane_x2", [], [ImmCheck<3, ImmCheck0_3>]>;
1962+
1963+
def SVBFMLSLB : SInst<"svbfmlslb[_{d}]", "dd$$", "f", MergeNone, "aarch64_sve_bfmlslb", [IsOverloadNone], []>;
1964+
def SVBFMLSLT : SInst<"svbfmlslt[_{d}]", "dd$$", "f", MergeNone, "aarch64_sve_bfmlslt", [IsOverloadNone], []>;
1965+
1966+
def SVBFMLSLB_LANE : SInst<"svbfmlslb_lane[_{d}]", "dd$$i", "f", MergeNone, "aarch64_sve_bfmlslb_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>;
1967+
def SVBFMLSLT_LANE : SInst<"svbfmlslt_lane[_{d}]", "dd$$i", "f", MergeNone, "aarch64_sve_bfmlslt_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>;
19621968
}
19631969

19641970
let TargetGuard = "sve2p1" in {

clang/include/clang/Basic/arm_sve_sme_incl.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@
9999
// O: svfloat16_t
100100
// M: svfloat32_t
101101
// N: svfloat64_t
102+
// $: svbfloat16_t
102103

103104
// J: Prefetch type (sv_prfop)
104105

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2+
// REQUIRES: aarch64-registered-target
3+
4+
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
5+
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
6+
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
7+
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
8+
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
9+
10+
#include <arm_sve.h>
11+
12+
#ifdef SVE_OVERLOADED_FORMS
13+
// A simple used,unused... macro, long enough to represent any SVE builtin.
14+
#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
15+
#else
16+
#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
17+
#endif
18+
19+
// BFMLSLB
20+
21+
22+
// CHECK-LABEL: @test_bfmlslb(
23+
// CHECK-NEXT: entry:
24+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlslb(<vscale x 4 x float> [[ZDA:%.*]], <vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]])
25+
// CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
26+
//
27+
// CPP-CHECK-LABEL: @_Z12test_bfmlslbu13__SVFloat32_tu14__SVBFloat16_tu14__SVBFloat16_t(
28+
// CPP-CHECK-NEXT: entry:
29+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlslb(<vscale x 4 x float> [[ZDA:%.*]], <vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]])
30+
// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
31+
//
32+
svfloat32_t test_bfmlslb(svfloat32_t zda, svbfloat16_t zn, svbfloat16_t zm)
33+
{
34+
return SVE_ACLE_FUNC(svbfmlslb,_f32,,)(zda, zn, zm);
35+
}
36+
37+
38+
// CHECK-LABEL: @test_bfmlslb_lane(
39+
// CHECK-NEXT: entry:
40+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlslb.lane(<vscale x 4 x float> [[ZDA:%.*]], <vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]], i32 7)
41+
// CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
42+
//
43+
// CPP-CHECK-LABEL: @_Z17test_bfmlslb_laneu13__SVFloat32_tu14__SVBFloat16_tu14__SVBFloat16_t(
44+
// CPP-CHECK-NEXT: entry:
45+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlslb.lane(<vscale x 4 x float> [[ZDA:%.*]], <vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]], i32 7)
46+
// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
47+
//
48+
svfloat32_t test_bfmlslb_lane(svfloat32_t zda, svbfloat16_t zn, svbfloat16_t zm)
49+
{
50+
return SVE_ACLE_FUNC(svbfmlslb_lane,_f32,,)(zda, zn, zm, 7);
51+
}
52+
53+
// BFMLSLT
54+
55+
56+
// CHECK-LABEL: @test_bfmlslt(
57+
// CHECK-NEXT: entry:
58+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlslt(<vscale x 4 x float> [[ZDA:%.*]], <vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]])
59+
// CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
60+
//
61+
// CPP-CHECK-LABEL: @_Z12test_bfmlsltu13__SVFloat32_tu14__SVBFloat16_tu14__SVBFloat16_t(
62+
// CPP-CHECK-NEXT: entry:
63+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlslt(<vscale x 4 x float> [[ZDA:%.*]], <vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]])
64+
// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
65+
//
66+
svfloat32_t test_bfmlslt(svfloat32_t zda, svbfloat16_t zn, svbfloat16_t zm)
67+
{
68+
return SVE_ACLE_FUNC(svbfmlslt,_f32,,)(zda, zn, zm);
69+
}
70+
71+
72+
// CHECK-LABEL: @test_bfmlslt_lane(
73+
// CHECK-NEXT: entry:
74+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlslt.lane(<vscale x 4 x float> [[ZDA:%.*]], <vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]], i32 7)
75+
// CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
76+
//
77+
// CPP-CHECK-LABEL: @_Z17test_bfmlslt_laneu13__SVFloat32_tu14__SVBFloat16_tu14__SVBFloat16_t(
78+
// CPP-CHECK-NEXT: entry:
79+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlslt.lane(<vscale x 4 x float> [[ZDA:%.*]], <vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]], i32 7)
80+
// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
81+
//
82+
svfloat32_t test_bfmlslt_lane(svfloat32_t zda, svbfloat16_t zn, svbfloat16_t zm)
83+
{
84+
return SVE_ACLE_FUNC(svbfmlslt_lane,_f32,,)(zda, zn, zm, 7);
85+
}

clang/utils/TableGen/SveEmitter.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -852,6 +852,13 @@ void SVEType::applyModifier(char Mod) {
852852
NumVectors = 0;
853853
Signed = false;
854854
break;
855+
case '$':
856+
Predicate = false;
857+
Svcount = false;
858+
Float = false;
859+
BFloat = true;
860+
ElementBitwidth = 16;
861+
break;
855862
case '}':
856863
Predicate = false;
857864
Signed = true;

llvm/include/llvm/IR/IntrinsicsAArch64.td

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3011,6 +3011,16 @@ let TargetPrefix = "aarch64" in {
30113011
[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
30123012
[IntrNoMem]>;
30133013

3014+
class SME2_BFMLS_Intrinsic
3015+
: DefaultAttrsIntrinsic<[llvm_nxv4f32_ty],
3016+
[llvm_nxv4f32_ty, llvm_nxv8bf16_ty, llvm_nxv8bf16_ty],
3017+
[IntrNoMem]>;
3018+
3019+
class SME2_BFMLS_Lane_Intrinsic
3020+
: DefaultAttrsIntrinsic<[llvm_nxv4f32_ty],
3021+
[llvm_nxv4f32_ty, llvm_nxv8bf16_ty, llvm_nxv8bf16_ty, llvm_i32_ty],
3022+
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
3023+
30143024
class SME2_ZA_ArrayVector_Read_VG2_Intrinsic
30153025
: DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
30163026
[llvm_i32_ty],
@@ -3214,6 +3224,12 @@ let TargetPrefix = "aarch64" in {
32143224
def int_aarch64_sme_usmla_za32_lane_vg4x2 : SME2_Matrix_ArrayVector_VG2_Multi_Index_Intrinsic;
32153225
def int_aarch64_sme_usmla_za32_lane_vg4x4 : SME2_Matrix_ArrayVector_VG4_Multi_Index_Intrinsic;
32163226

3227+
def int_aarch64_sve_bfmlslb : SME2_BFMLS_Intrinsic;
3228+
def int_aarch64_sve_bfmlslb_lane : SME2_BFMLS_Lane_Intrinsic;
3229+
3230+
def int_aarch64_sve_bfmlslt : SME2_BFMLS_Intrinsic;
3231+
def int_aarch64_sve_bfmlslt_lane : SME2_BFMLS_Lane_Intrinsic;
3232+
32173233
// Multi-vector signed saturating doubling multiply high
32183234

32193235
def int_aarch64_sve_sqdmulh_single_vgx2 : SME2_VG2_Multi_Single_Intrinsic;

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3753,12 +3753,14 @@ defm PSEL_PPPRI : sve2_int_perm_sel_p<"psel", int_aarch64_sve_psel>;
37533753

37543754
let Predicates = [HasSVE2p1_or_HasSME2] in {
37553755
defm FCLAMP_ZZZ : sve2p1_fclamp<"fclamp", int_aarch64_sve_fclamp>;
3756+
37563757
defm FDOT_ZZZ_S : sve_float_dot<0b0, 0b0, ZPR32, ZPR16, "fdot", nxv8f16, int_aarch64_sve_fdot_x2>;
37573758
defm FDOT_ZZZI_S : sve_float_dot_indexed<0b0, 0b00, ZPR16, ZPR3b16, "fdot", nxv8f16, int_aarch64_sve_fdot_lane_x2>;
3758-
def BFMLSLB_ZZZ_S : sve2_fp_mla_long<0b110, "bfmlslb">;
3759-
def BFMLSLT_ZZZ_S : sve2_fp_mla_long<0b111, "bfmlslt">;
3760-
def BFMLSLB_ZZZI_S : sve2_fp_mla_long_by_indexed_elem<0b110, "bfmlslb">;
3761-
def BFMLSLT_ZZZI_S : sve2_fp_mla_long_by_indexed_elem<0b111, "bfmlslt">;
3759+
3760+
defm BFMLSLB_ZZZ_S : sve2_fp_mla_long<0b110, "bfmlslb", nxv4f32, nxv8bf16, int_aarch64_sve_bfmlslb>;
3761+
defm BFMLSLT_ZZZ_S : sve2_fp_mla_long<0b111, "bfmlslt", nxv4f32, nxv8bf16, int_aarch64_sve_bfmlslt>;
3762+
defm BFMLSLB_ZZZI_S : sve2_fp_mla_long_by_indexed_elem<0b110, "bfmlslb", nxv4f32, nxv8bf16, int_aarch64_sve_bfmlslb_lane>;
3763+
defm BFMLSLT_ZZZI_S : sve2_fp_mla_long_by_indexed_elem<0b111, "bfmlslt", nxv4f32, nxv8bf16, int_aarch64_sve_bfmlslt_lane>;
37623764

37633765
defm SDOT_ZZZ_HtoS : sve2p1_two_way_dot_vv<"sdot", 0b0, int_aarch64_sve_sdot_x2>;
37643766
defm UDOT_ZZZ_HtoS : sve2p1_two_way_dot_vv<"udot", 0b1, int_aarch64_sve_udot_x2>;
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2p1 -mattr=+b16b16 -verify-machineinstrs < %s | FileCheck %s
3+
4+
define <vscale x 4 x float> @bfmlslb_f32(<vscale x 4 x float> %zda, <vscale x 8 x bfloat> %zn, <vscale x 8 x bfloat> %zm) {
5+
; CHECK-LABEL: bfmlslb_f32:
6+
; CHECK: // %bb.0:
7+
; CHECK-NEXT: bfmlslb z0.s, z1.h, z2.h
8+
; CHECK-NEXT: ret
9+
%out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlslb(<vscale x 4 x float> %zda, <vscale x 8 x bfloat> %zn, <vscale x 8 x bfloat> %zm)
10+
ret <vscale x 4 x float> %out
11+
}
12+
13+
define <vscale x 4 x float> @bfmlslt_f32(<vscale x 4 x float> %zda, <vscale x 8 x bfloat> %zn, <vscale x 8 x bfloat> %zm) {
14+
; CHECK-LABEL: bfmlslt_f32:
15+
; CHECK: // %bb.0:
16+
; CHECK-NEXT: bfmlslt z0.s, z1.h, z2.h
17+
; CHECK-NEXT: ret
18+
%out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlslt(<vscale x 4 x float> %zda, <vscale x 8 x bfloat> %zn, <vscale x 8 x bfloat> %zm)
19+
ret <vscale x 4 x float> %out
20+
}
21+
22+
define <vscale x 4 x float> @bfmlslb_lane_f32(<vscale x 4 x float> %zda, <vscale x 8 x bfloat> %zn, <vscale x 8 x bfloat> %zm) {
23+
; CHECK-LABEL: bfmlslb_lane_f32:
24+
; CHECK: // %bb.0:
25+
; CHECK-NEXT: bfmlslb z0.s, z1.h, z2.h[7]
26+
; CHECK-NEXT: ret
27+
%out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlslb.lane(<vscale x 4 x float> %zda, <vscale x 8 x bfloat> %zn, <vscale x 8 x bfloat> %zm, i32 7)
28+
ret <vscale x 4 x float> %out
29+
}
30+
31+
define <vscale x 4 x float> @bfmlslt_lane_f32(<vscale x 4 x float> %zda, <vscale x 8 x bfloat> %zn, <vscale x 8 x bfloat> %zm) {
32+
; CHECK-LABEL: bfmlslt_lane_f32:
33+
; CHECK: // %bb.0:
34+
; CHECK-NEXT: bfmlslt z0.s, z1.h, z2.h[7]
35+
; CHECK-NEXT: ret
36+
%out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlslt.lane(<vscale x 4 x float> %zda, <vscale x 8 x bfloat> %zn, <vscale x 8 x bfloat> %zm, i32 7)
37+
ret <vscale x 4 x float> %out
38+
}
39+
40+
declare <vscale x 4 x float> @llvm.aarch64.sve.bfmlslb(<vscale x 4 x float>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
41+
declare <vscale x 4 x float> @llvm.aarch64.sve.bfmlslt(<vscale x 4 x float>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
42+
declare <vscale x 4 x float> @llvm.aarch64.sve.bfmlslb.lane(<vscale x 4 x float>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, i32)
43+
declare <vscale x 4 x float> @llvm.aarch64.sve.bfmlslt.lane(<vscale x 4 x float>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, i32)

0 commit comments

Comments
 (0)