Skip to content

[Clang] Make sdot builtins available to SME #77792

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions clang/include/clang/Basic/arm_sve.td
Original file line number Diff line number Diff line change
Expand Up @@ -2039,13 +2039,13 @@ let TargetGuard = "sve2p1|sme2" in {
defm STNT1 : MultiVecStore<"stnt1">;
}

let TargetGuard = "sve2p1" in {
def SVDOT_X2_S : SInst<"svdot[_{d}_{2}]", "ddhh", "i", MergeNone, "aarch64_sve_sdot_x2", [], []>;
def SVDOT_X2_U : SInst<"svdot[_{d}_{2}]", "ddhh", "Ui", MergeNone, "aarch64_sve_udot_x2", [], []>;
def SVDOT_X2_F : SInst<"svdot[_{d}_{2}]", "ddhh", "f", MergeNone, "aarch64_sve_fdot_x2", [], []>;
def SVDOT_LANE_X2_S : SInst<"svdot_lane[_{d}_{2}]", "ddhhi", "i", MergeNone, "aarch64_sve_sdot_lane_x2", [], [ImmCheck<3, ImmCheck0_3>]>;
def SVDOT_LANE_X2_U : SInst<"svdot_lane[_{d}_{2}]", "ddhhi", "Ui", MergeNone, "aarch64_sve_udot_lane_x2", [], [ImmCheck<3, ImmCheck0_3>]>;
def SVDOT_LANE_X2_F : SInst<"svdot_lane[_{d}_{2}]", "ddhhi", "f", MergeNone, "aarch64_sve_fdot_lane_x2", [], [ImmCheck<3, ImmCheck0_3>]>;
let TargetGuard = "sve2p1|sme2" in {
def SVDOT_X2_S : SInst<"svdot[_{d}_{2}]", "ddhh", "i", MergeNone, "aarch64_sve_sdot_x2", [IsStreamingOrSVE2p1], []>;
def SVDOT_X2_U : SInst<"svdot[_{d}_{2}]", "ddhh", "Ui", MergeNone, "aarch64_sve_udot_x2", [IsStreamingOrSVE2p1], []>;
def SVDOT_X2_F : SInst<"svdot[_{d}_{2}]", "ddhh", "f", MergeNone, "aarch64_sve_fdot_x2", [IsStreamingOrSVE2p1], []>;
def SVDOT_LANE_X2_S : SInst<"svdot_lane[_{d}_{2}]", "ddhhi", "i", MergeNone, "aarch64_sve_sdot_lane_x2", [IsStreamingOrSVE2p1], [ImmCheck<3, ImmCheck0_3>]>;
def SVDOT_LANE_X2_U : SInst<"svdot_lane[_{d}_{2}]", "ddhhi", "Ui", MergeNone, "aarch64_sve_udot_lane_x2", [IsStreamingOrSVE2p1], [ImmCheck<3, ImmCheck0_3>]>;
def SVDOT_LANE_X2_F : SInst<"svdot_lane[_{d}_{2}]", "ddhhi", "f", MergeNone, "aarch64_sve_fdot_lane_x2", [IsStreamingOrSVE2p1], [ImmCheck<3, ImmCheck0_3>]>;
}

let TargetGuard = "sve2p1|sme2" in {
Expand Down
21 changes: 15 additions & 6 deletions clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_dot.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,20 @@
// REQUIRES: aarch64-registered-target
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -DTEST_SME2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -DTEST_SME2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -DTEST_SME2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
#include <arm_sve.h>

#ifndef TEST_SME2
#define ATTR
#else
#define ATTR __arm_streaming
#endif

#ifdef SVE_OVERLOADED_FORMS
// A simple used,unused... macro, long enough to represent any SVE builtin.
#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3
Expand All @@ -24,7 +33,7 @@
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sdot.x2.nxv4i32(<vscale x 4 x i32> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[OP3:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
svint32_t test_svdot_s32_x2(svint32_t op1, svint16_t op2, svint16_t op3)
svint32_t test_svdot_s32_x2(svint32_t op1, svint16_t op2, svint16_t op3) ATTR
{
return SVE_ACLE_FUNC(svdot,_s32_s16,)(op1, op2, op3);
}
Expand All @@ -39,7 +48,7 @@ svint32_t test_svdot_s32_x2(svint32_t op1, svint16_t op2, svint16_t op3)
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.udot.x2.nxv4i32(<vscale x 4 x i32> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[OP3:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
svuint32_t test_svdot_u32_x2(svuint32_t op1, svuint16_t op2, svuint16_t op3)
svuint32_t test_svdot_u32_x2(svuint32_t op1, svuint16_t op2, svuint16_t op3) ATTR
{
return SVE_ACLE_FUNC(svdot,_u32_u16,)(op1, op2, op3);
}
Expand All @@ -54,7 +63,7 @@ svuint32_t test_svdot_u32_x2(svuint32_t op1, svuint16_t op2, svuint16_t op3)
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fdot.x2.nxv4f32(<vscale x 4 x float> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x half> [[OP3:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
//
svfloat32_t test_svdot_f32_x2(svfloat32_t op1, svfloat16_t op2, svfloat16_t op3)
svfloat32_t test_svdot_f32_x2(svfloat32_t op1, svfloat16_t op2, svfloat16_t op3) ATTR
{
return SVE_ACLE_FUNC(svdot,_f32_f16,)(op1, op2, op3);
}
Expand All @@ -71,7 +80,7 @@ svfloat32_t test_svdot_f32_x2(svfloat32_t op1, svfloat16_t op2, svfloat16_t op3)
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sdot.lane.x2.nxv4i32(<vscale x 4 x i32> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[OP3:%.*]], i32 3)
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
svint32_t test_svdot_lane_s32_x2(svint32_t op1, svint16_t op2, svint16_t op3)
svint32_t test_svdot_lane_s32_x2(svint32_t op1, svint16_t op2, svint16_t op3) ATTR
{
return SVE_ACLE_FUNC(svdot_lane,_s32_s16,)(op1, op2, op3, 3);
}
Expand All @@ -86,7 +95,7 @@ svint32_t test_svdot_lane_s32_x2(svint32_t op1, svint16_t op2, svint16_t op3)
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.udot.lane.x2.nxv4i32(<vscale x 4 x i32> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[OP3:%.*]], i32 3)
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
svuint32_t test_svdot_lane_u32_x2(svuint32_t op1, svuint16_t op2, svuint16_t op3)
svuint32_t test_svdot_lane_u32_x2(svuint32_t op1, svuint16_t op2, svuint16_t op3) ATTR
{
return SVE_ACLE_FUNC(svdot_lane,_u32_u16,)(op1, op2, op3, 3);
}
Expand All @@ -101,7 +110,7 @@ svuint32_t test_svdot_lane_u32_x2(svuint32_t op1, svuint16_t op2, svuint16_t op3
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fdot.lane.x2.nxv4f32(<vscale x 4 x float> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x half> [[OP3:%.*]], i32 3)
// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
//
svfloat32_t test_svdot_lane_f32_x2(svfloat32_t op1, svfloat16_t op2, svfloat16_t op3)
svfloat32_t test_svdot_lane_f32_x2(svfloat32_t op1, svfloat16_t op2, svfloat16_t op3) ATTR
{
return SVE_ACLE_FUNC(svdot_lane,_f32_f16,)(op1, op2, op3, 3);
}
1 change: 1 addition & 0 deletions llvm/test/CodeGen/AArch64/sve2p1-intrinsics-dots.ll
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1 < %s | FileCheck %s
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 < %s | FileCheck %s

define <vscale x 4 x i32> @sdot_x2(<vscale x 4 x i32> %zda, <vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm) {
; CHECK-LABEL: sdot_x2:
Expand Down