Skip to content

Commit 97f6261

Browse files
SC llvm teamSC llvm team
authored andcommitted
Merged main:460e84398a19 into amd-gfx:0afc460c951d
Local branch amd-gfx 0afc460 Merged main:3e49ce6ea16e into amd-gfx:10c5c6439baf Remote branch main 460e843 [RISCV] Add getSameRatioLMUL (llvm#69570)
2 parents 0afc460 + 460e843 commit 97f6261

File tree

201 files changed

+2309
-1284
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

201 files changed

+2309
-1284
lines changed

clang/include/clang/Basic/arm_sve.td

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1952,6 +1952,19 @@ def SVSTNT1B_VNUM_X4 : MInst<"svstnt1_vnum[_{2}_x4]", "v}pl4", "cUc", [IsStructS
19521952
def SVSTNT1H_VNUM_X4 : MInst<"svstnt1_vnum[_{2}_x4]", "v}pl4", "sUshb", [IsStructStore], MemEltTyDefault, "aarch64_sve_stnt1_pn_x4">;
19531953
def SVSTNT1W_VNUM_X4 : MInst<"svstnt1_vnum[_{2}_x4]", "v}pl4", "iUif", [IsStructStore], MemEltTyDefault, "aarch64_sve_stnt1_pn_x4">;
19541954
def SVSTNT1D_VNUM_X4 : MInst<"svstnt1_vnum[_{2}_x4]", "v}pl4", "lUld", [IsStructStore], MemEltTyDefault, "aarch64_sve_stnt1_pn_x4">;
1955+
1956+
def SVDOT_X2_S : SInst<"svdot[_{d}_{2}_{3}]", "ddhh", "i", MergeNone, "aarch64_sve_sdot_x2", [], []>;
1957+
def SVDOT_X2_U : SInst<"svdot[_{d}_{2}_{3}]", "ddhh", "Ui", MergeNone, "aarch64_sve_udot_x2", [], []>;
1958+
def SVDOT_X2_F : SInst<"svdot[_{d}_{2}_{3}]", "ddhh", "f", MergeNone, "aarch64_sve_fdot_x2", [], []>;
1959+
def SVDOT_LANE_X2_S : SInst<"svdot_lane[_{d}_{2}_{3}]", "ddhhi", "i", MergeNone, "aarch64_sve_sdot_lane_x2", [], [ImmCheck<3, ImmCheck0_3>]>;
1960+
def SVDOT_LANE_X2_U : SInst<"svdot_lane[_{d}_{2}_{3}]", "ddhhi", "Ui", MergeNone, "aarch64_sve_udot_lane_x2", [], [ImmCheck<3, ImmCheck0_3>]>;
1961+
def SVDOT_LANE_X2_F : SInst<"svdot_lane[_{d}_{2}_{3}]", "ddhhi", "f", MergeNone, "aarch64_sve_fdot_lane_x2", [], [ImmCheck<3, ImmCheck0_3>]>;
1962+
1963+
def SVBFMLSLB : SInst<"svbfmlslb[_{d}]", "dd$$", "f", MergeNone, "aarch64_sve_bfmlslb", [IsOverloadNone], []>;
1964+
def SVBFMLSLT : SInst<"svbfmlslt[_{d}]", "dd$$", "f", MergeNone, "aarch64_sve_bfmlslt", [IsOverloadNone], []>;
1965+
1966+
def SVBFMLSLB_LANE : SInst<"svbfmlslb_lane[_{d}]", "dd$$i", "f", MergeNone, "aarch64_sve_bfmlslb_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>;
1967+
def SVBFMLSLT_LANE : SInst<"svbfmlslt_lane[_{d}]", "dd$$i", "f", MergeNone, "aarch64_sve_bfmlslt_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>;
19551968
}
19561969

19571970
let TargetGuard = "sve2p1" in {

clang/include/clang/Basic/arm_sve_sme_incl.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@
9999
// O: svfloat16_t
100100
// M: svfloat32_t
101101
// N: svfloat64_t
102+
// $: svbfloat16_t
102103

103104
// J: Prefetch type (sv_prfop)
104105

clang/include/clang/Driver/Options.td

Lines changed: 30 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,8 @@ def pedantic_Group : OptionGroup<"<pedantic group>">, Group<f_Group>,
148148
DocFlatten;
149149

150150
def offload_Group : OptionGroup<"<offload group>">, Group<f_Group>,
151-
DocName<"Common Offloading options">;
151+
DocName<"Common Offloading options">,
152+
Visibility<[ClangOption, CLOption]>;
152153

153154
def opencl_Group : OptionGroup<"<opencl group>">, Group<f_Group>,
154155
DocName<"OpenCL options">;
@@ -157,13 +158,16 @@ def sycl_Group : OptionGroup<"<SYCL group>">, Group<f_Group>,
157158
DocName<"SYCL options">;
158159

159160
def cuda_Group : OptionGroup<"<CUDA group>">, Group<f_Group>,
160-
DocName<"CUDA options">;
161+
DocName<"CUDA options">,
162+
Visibility<[ClangOption, CLOption]>;
161163

162164
def hip_Group : OptionGroup<"<HIP group>">, Group<f_Group>,
163-
DocName<"HIP options">;
165+
DocName<"HIP options">,
166+
Visibility<[ClangOption, CLOption]>;
164167

165168
def m_Group : OptionGroup<"<m group>">, Group<CompileOnly_Group>,
166-
DocName<"Target-dependent compilation options">;
169+
DocName<"Target-dependent compilation options">,
170+
Visibility<[ClangOption, CLOption]>;
167171

168172
// Feature groups - these take command line options that correspond directly to
169173
// target specific features and can be translated directly from command line
@@ -5167,14 +5171,16 @@ def prebind__all__twolevel__modules : Flag<["-"], "prebind_all_twolevel_modules"
51675171
def prebind : Flag<["-"], "prebind">;
51685172
def preload : Flag<["-"], "preload">;
51695173
def print_file_name_EQ : Joined<["-", "--"], "print-file-name=">,
5170-
HelpText<"Print the full library path of <file>">, MetaVarName<"<file>">;
5174+
HelpText<"Print the full library path of <file>">, MetaVarName<"<file>">,
5175+
Visibility<[ClangOption, CLOption]>;
51715176
def print_ivar_layout : Flag<["-"], "print-ivar-layout">,
51725177
Visibility<[ClangOption, CC1Option]>,
51735178
HelpText<"Enable Objective-C Ivar layout bitmap print trace">,
51745179
MarshallingInfoFlag<LangOpts<"ObjCGCBitmapPrint">>;
51755180
def print_libgcc_file_name : Flag<["-", "--"], "print-libgcc-file-name">,
51765181
HelpText<"Print the library path for the currently used compiler runtime "
5177-
"library (\"libgcc.a\" or \"libclang_rt.builtins.*.a\")">;
5182+
"library (\"libgcc.a\" or \"libclang_rt.builtins.*.a\")">,
5183+
Visibility<[ClangOption, CLOption]>;
51785184
def print_multi_directory : Flag<["-", "--"], "print-multi-directory">;
51795185
def print_multi_lib : Flag<["-", "--"], "print-multi-lib">;
51805186
def print_multi_flags : Flag<["-", "--"], "print-multi-flags-experimental">,
@@ -5183,27 +5189,34 @@ def print_multi_os_directory : Flag<["-", "--"], "print-multi-os-directory">,
51835189
Flags<[Unsupported]>;
51845190
def print_target_triple : Flag<["-", "--"], "print-target-triple">,
51855191
HelpText<"Print the normalized target triple">,
5186-
Visibility<[ClangOption, FlangOption]>;
5192+
Visibility<[ClangOption, FlangOption, CLOption]>;
51875193
def print_effective_triple : Flag<["-", "--"], "print-effective-triple">,
51885194
HelpText<"Print the effective target triple">,
5189-
Visibility<[ClangOption, FlangOption]>;
5195+
Visibility<[ClangOption, FlangOption, CLOption]>;
51905196
// GCC --disable-multiarch, GCC --enable-multiarch (upstream and Debian
51915197
// specific) have different behaviors. We choose not to support the option.
51925198
def : Flag<["-", "--"], "print-multiarch">, Flags<[Unsupported]>;
51935199
def print_prog_name_EQ : Joined<["-", "--"], "print-prog-name=">,
5194-
HelpText<"Print the full program path of <name>">, MetaVarName<"<name>">;
5200+
HelpText<"Print the full program path of <name>">, MetaVarName<"<name>">,
5201+
Visibility<[ClangOption, CLOption]>;
51955202
def print_resource_dir : Flag<["-", "--"], "print-resource-dir">,
5196-
HelpText<"Print the resource directory pathname">;
5203+
HelpText<"Print the resource directory pathname">,
5204+
Visibility<[ClangOption, CLOption]>;
51975205
def print_search_dirs : Flag<["-", "--"], "print-search-dirs">,
5198-
HelpText<"Print the paths used for finding libraries and programs">;
5206+
HelpText<"Print the paths used for finding libraries and programs">,
5207+
Visibility<[ClangOption, CLOption]>;
51995208
def print_targets : Flag<["-", "--"], "print-targets">,
5200-
HelpText<"Print the registered targets">;
5209+
HelpText<"Print the registered targets">,
5210+
Visibility<[ClangOption, CLOption]>;
52015211
def print_rocm_search_dirs : Flag<["-", "--"], "print-rocm-search-dirs">,
5202-
HelpText<"Print the paths used for finding ROCm installation">;
5212+
HelpText<"Print the paths used for finding ROCm installation">,
5213+
Visibility<[ClangOption, CLOption]>;
52035214
def print_runtime_dir : Flag<["-", "--"], "print-runtime-dir">,
5204-
HelpText<"Print the directory pathname containing clangs runtime libraries">;
5215+
HelpText<"Print the directory pathname containing clangs runtime libraries">,
5216+
Visibility<[ClangOption, CLOption]>;
52055217
def print_diagnostic_options : Flag<["-", "--"], "print-diagnostic-options">,
5206-
HelpText<"Print all of Clang's warning options">;
5218+
HelpText<"Print all of Clang's warning options">,
5219+
Visibility<[ClangOption, CLOption]>;
52075220
def private__bundle : Flag<["-"], "private_bundle">;
52085221
def pthreads : Flag<["-"], "pthreads">;
52095222
defm pthread : BoolOption<"", "pthread",
@@ -5230,7 +5243,7 @@ def resource_dir_EQ : Joined<["-"], "resource-dir=">, Flags<[NoXarchOption]>,
52305243
Visibility<[ClangOption, CLOption, DXCOption]>,
52315244
Alias<resource_dir>;
52325245
def rpath : Separate<["-"], "rpath">, Flags<[LinkerInput]>, Group<Link_Group>;
5233-
def rtlib_EQ : Joined<["-", "--"], "rtlib=">,
5246+
def rtlib_EQ : Joined<["-", "--"], "rtlib=">, Visibility<[ClangOption, CLOption]>,
52345247
HelpText<"Compiler runtime library to use">;
52355248
def frtlib_add_rpath: Flag<["-"], "frtlib-add-rpath">, Flags<[NoArgumentUnused]>,
52365249
HelpText<"Add -rpath with architecture-specific resource directory to the linker flags. "
@@ -5396,7 +5409,7 @@ def w : Flag<["-"], "w">, HelpText<"Suppress all warnings">,
53965409
MarshallingInfoFlag<DiagnosticOpts<"IgnoreWarnings">>;
53975410
def x : JoinedOrSeparate<["-"], "x">,
53985411
Flags<[NoXarchOption]>,
5399-
Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>,
5412+
Visibility<[ClangOption, CC1Option, FlangOption, FC1Option, CLOption]>,
54005413
HelpText<"Treat subsequent input files as having type <language>">,
54015414
MetaVarName<"<language>">;
54025415
def y : Joined<["-"], "y">;

clang/lib/Driver/Driver.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2589,8 +2589,11 @@ void Driver::BuildInputs(const ToolChain &TC, DerivedArgList &Args,
25892589
Diag(clang::diag::note_drv_t_option_is_global);
25902590
}
25912591

2592+
// CUDA/HIP and their preprocessor expansions can be accepted by CL mode.
25922593
// Warn -x after last input file has no effect
2593-
if (!IsCLMode()) {
2594+
auto LastXArg = Args.getLastArgValue(options::OPT_x);
2595+
const llvm::StringSet<> ValidXArgs = {"cuda", "hip", "cui", "hipi"};
2596+
if (!IsCLMode() || ValidXArgs.find(LastXArg) != ValidXArgs.end()) {
25942597
Arg *LastXArg = Args.getLastArgNoClaim(options::OPT_x);
25952598
Arg *LastInputArg = Args.getLastArgNoClaim(options::OPT_INPUT);
25962599
if (LastXArg && LastInputArg &&
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2+
// REQUIRES: aarch64-registered-target
3+
4+
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
5+
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
6+
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
7+
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
8+
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
9+
10+
#include <arm_sve.h>
11+
12+
#ifdef SVE_OVERLOADED_FORMS
13+
// A simple used,unused... macro, long enough to represent any SVE builtin.
14+
#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
15+
#else
16+
#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
17+
#endif
18+
19+
// BFMLSLB
20+
21+
22+
// CHECK-LABEL: @test_bfmlslb(
23+
// CHECK-NEXT: entry:
24+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlslb(<vscale x 4 x float> [[ZDA:%.*]], <vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]])
25+
// CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
26+
//
27+
// CPP-CHECK-LABEL: @_Z12test_bfmlslbu13__SVFloat32_tu14__SVBFloat16_tu14__SVBFloat16_t(
28+
// CPP-CHECK-NEXT: entry:
29+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlslb(<vscale x 4 x float> [[ZDA:%.*]], <vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]])
30+
// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
31+
//
32+
svfloat32_t test_bfmlslb(svfloat32_t zda, svbfloat16_t zn, svbfloat16_t zm)
33+
{
34+
return SVE_ACLE_FUNC(svbfmlslb,_f32,,)(zda, zn, zm);
35+
}
36+
37+
38+
// CHECK-LABEL: @test_bfmlslb_lane(
39+
// CHECK-NEXT: entry:
40+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlslb.lane(<vscale x 4 x float> [[ZDA:%.*]], <vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]], i32 7)
41+
// CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
42+
//
43+
// CPP-CHECK-LABEL: @_Z17test_bfmlslb_laneu13__SVFloat32_tu14__SVBFloat16_tu14__SVBFloat16_t(
44+
// CPP-CHECK-NEXT: entry:
45+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlslb.lane(<vscale x 4 x float> [[ZDA:%.*]], <vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]], i32 7)
46+
// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
47+
//
48+
svfloat32_t test_bfmlslb_lane(svfloat32_t zda, svbfloat16_t zn, svbfloat16_t zm)
49+
{
50+
return SVE_ACLE_FUNC(svbfmlslb_lane,_f32,,)(zda, zn, zm, 7);
51+
}
52+
53+
// BFMLSLT
54+
55+
56+
// CHECK-LABEL: @test_bfmlslt(
57+
// CHECK-NEXT: entry:
58+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlslt(<vscale x 4 x float> [[ZDA:%.*]], <vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]])
59+
// CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
60+
//
61+
// CPP-CHECK-LABEL: @_Z12test_bfmlsltu13__SVFloat32_tu14__SVBFloat16_tu14__SVBFloat16_t(
62+
// CPP-CHECK-NEXT: entry:
63+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlslt(<vscale x 4 x float> [[ZDA:%.*]], <vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]])
64+
// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
65+
//
66+
svfloat32_t test_bfmlslt(svfloat32_t zda, svbfloat16_t zn, svbfloat16_t zm)
67+
{
68+
return SVE_ACLE_FUNC(svbfmlslt,_f32,,)(zda, zn, zm);
69+
}
70+
71+
72+
// CHECK-LABEL: @test_bfmlslt_lane(
73+
// CHECK-NEXT: entry:
74+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlslt.lane(<vscale x 4 x float> [[ZDA:%.*]], <vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]], i32 7)
75+
// CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
76+
//
77+
// CPP-CHECK-LABEL: @_Z17test_bfmlslt_laneu13__SVFloat32_tu14__SVBFloat16_tu14__SVBFloat16_t(
78+
// CPP-CHECK-NEXT: entry:
79+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlslt.lane(<vscale x 4 x float> [[ZDA:%.*]], <vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]], i32 7)
80+
// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
81+
//
82+
svfloat32_t test_bfmlslt_lane(svfloat32_t zda, svbfloat16_t zn, svbfloat16_t zm)
83+
{
84+
return SVE_ACLE_FUNC(svbfmlslt_lane,_f32,,)(zda, zn, zm, 7);
85+
}
Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2+
// REQUIRES: aarch64-registered-target
3+
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
4+
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
5+
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
6+
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
7+
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
8+
#include <arm_sve.h>
9+
10+
#ifdef SVE_OVERLOADED_FORMS
11+
// A simple used,unused... macro, long enough to represent any SVE builtin.
12+
#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3
13+
#else
14+
#define SVE_ACLE_FUNC(A1,A2,A3) A1##A2##A3
15+
#endif
16+
17+
// CHECK-LABEL: @test_svdot_s32_x2(
18+
// CHECK-NEXT: entry:
19+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sdot.x2.nxv4i32(<vscale x 4 x i32> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[OP3:%.*]])
20+
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
21+
//
22+
// CPP-CHECK-LABEL: @_Z17test_svdot_s32_x2u11__SVInt32_tu11__SVInt16_tu11__SVInt16_t(
23+
// CPP-CHECK-NEXT: entry:
24+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sdot.x2.nxv4i32(<vscale x 4 x i32> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[OP3:%.*]])
25+
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
26+
//
27+
svint32_t test_svdot_s32_x2(svint32_t op1, svint16_t op2, svint16_t op3)
28+
{
29+
return SVE_ACLE_FUNC(svdot,_s32_s16_s16,)(op1, op2, op3);
30+
}
31+
32+
// CHECK-LABEL: @test_svdot_u32_x2(
33+
// CHECK-NEXT: entry:
34+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.udot.x2.nxv4i32(<vscale x 4 x i32> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[OP3:%.*]])
35+
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
36+
//
37+
// CPP-CHECK-LABEL: @_Z17test_svdot_u32_x2u12__SVUint32_tu12__SVUint16_tu12__SVUint16_t(
38+
// CPP-CHECK-NEXT: entry:
39+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.udot.x2.nxv4i32(<vscale x 4 x i32> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[OP3:%.*]])
40+
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
41+
//
42+
svuint32_t test_svdot_u32_x2(svuint32_t op1, svuint16_t op2, svuint16_t op3)
43+
{
44+
return SVE_ACLE_FUNC(svdot,_u32_u16_u16,)(op1, op2, op3);
45+
}
46+
47+
// CHECK-LABEL: @test_svdot_f32_x2(
48+
// CHECK-NEXT: entry:
49+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fdot.x2.nxv4f32(<vscale x 4 x float> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x half> [[OP3:%.*]])
50+
// CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
51+
//
52+
// CPP-CHECK-LABEL: @_Z17test_svdot_f32_x2u13__SVFloat32_tu13__SVFloat16_tu13__SVFloat16_t(
53+
// CPP-CHECK-NEXT: entry:
54+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fdot.x2.nxv4f32(<vscale x 4 x float> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x half> [[OP3:%.*]])
55+
// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
56+
//
57+
svfloat32_t test_svdot_f32_x2(svfloat32_t op1, svfloat16_t op2, svfloat16_t op3)
58+
{
59+
return SVE_ACLE_FUNC(svdot,_f32_f16_f16,)(op1, op2, op3);
60+
}
61+
62+
63+
64+
// CHECK-LABEL: @test_svdot_lane_s32_x2(
65+
// CHECK-NEXT: entry:
66+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sdot.lane.x2.nxv4i32(<vscale x 4 x i32> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[OP3:%.*]], i32 3)
67+
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
68+
//
69+
// CPP-CHECK-LABEL: @_Z22test_svdot_lane_s32_x2u11__SVInt32_tu11__SVInt16_tu11__SVInt16_t(
70+
// CPP-CHECK-NEXT: entry:
71+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sdot.lane.x2.nxv4i32(<vscale x 4 x i32> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[OP3:%.*]], i32 3)
72+
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
73+
//
74+
svint32_t test_svdot_lane_s32_x2(svint32_t op1, svint16_t op2, svint16_t op3)
75+
{
76+
return SVE_ACLE_FUNC(svdot_lane,_s32_s16_s16,)(op1, op2, op3, 3);
77+
}
78+
79+
// CHECK-LABEL: @test_svdot_lane_u32_x2(
80+
// CHECK-NEXT: entry:
81+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.udot.lane.x2.nxv4i32(<vscale x 4 x i32> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[OP3:%.*]], i32 3)
82+
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
83+
//
84+
// CPP-CHECK-LABEL: @_Z22test_svdot_lane_u32_x2u12__SVUint32_tu12__SVUint16_tu12__SVUint16_t(
85+
// CPP-CHECK-NEXT: entry:
86+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.udot.lane.x2.nxv4i32(<vscale x 4 x i32> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[OP3:%.*]], i32 3)
87+
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
88+
//
89+
svuint32_t test_svdot_lane_u32_x2(svuint32_t op1, svuint16_t op2, svuint16_t op3)
90+
{
91+
return SVE_ACLE_FUNC(svdot_lane,_u32_u16_u16,)(op1, op2, op3, 3);
92+
}
93+
94+
// CHECK-LABEL: @test_svdot_lane_f32_x2(
95+
// CHECK-NEXT: entry:
96+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fdot.lane.x2.nxv4f32(<vscale x 4 x float> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x half> [[OP3:%.*]], i32 3)
97+
// CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
98+
//
99+
// CPP-CHECK-LABEL: @_Z22test_svdot_lane_f32_x2u13__SVFloat32_tu13__SVFloat16_tu13__SVFloat16_t(
100+
// CPP-CHECK-NEXT: entry:
101+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fdot.lane.x2.nxv4f32(<vscale x 4 x float> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x half> [[OP3:%.*]], i32 3)
102+
// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
103+
//
104+
svfloat32_t test_svdot_lane_f32_x2(svfloat32_t op1, svfloat16_t op2, svfloat16_t op3)
105+
{
106+
return SVE_ACLE_FUNC(svdot_lane,_f32_f16_f16,)(op1, op2, op3, 3);
107+
}

0 commit comments

Comments
 (0)