Skip to content

Commit 1593bf4

Browse files
tclin914circYuan
andauthored
[RISCV] Support LLVM IR intrinsics for XAndesVDot (#140223)
This patch adds LLVM IR intrinsic support for XAndesVDot similiar to #139860. The document for the intrinsics can be found at: https://github.com/andestech/andes-vector-intrinsic-doc/blob/ast-v5_4_0-release-v5/auto-generated/andes-v5/intrinsic_funcs.adoc#andes-vector-dot-product-extensionxandesvdot and with policy variants https://github.com/andestech/andes-vector-intrinsic-doc/blob/ast-v5_4_0-release-v5/auto-generated/andes-v5/policy_funcs/intrinsic_funcs.adoc#andes-vector-dot-product-extensionxandesvdot The clang part will be added in a later patch. --------- Co-authored-by: Tony Chuan-Yue Yuan <[email protected]>
1 parent e3e949c commit 1593bf4

File tree

5 files changed

+912
-0
lines changed

5 files changed

+912
-0
lines changed

llvm/include/llvm/IR/IntrinsicsRISCVXAndes.td

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,4 +14,9 @@ let TargetPrefix = "riscv" in {
1414
// Andes Vector Packed FP16 Extension
1515
defm nds_vfpmadt : RISCVBinaryAAXRoundingMode;
1616
defm nds_vfpmadb : RISCVBinaryAAXRoundingMode;
17+
18+
// Andes Vector Dot Product Extension
19+
defm nds_vd4dots : RISCVTernaryWide;
20+
defm nds_vd4dotu : RISCVTernaryWide;
21+
defm nds_vd4dotsu : RISCVTernaryWide;
1722
}

llvm/lib/Target/RISCV/RISCVInstrInfoXAndes.td

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -388,6 +388,27 @@ multiclass VPatVFPMADBinaryV_VX_RM<string intrinsic, string instruction,
388388
}
389389
}
390390

391+
multiclass VPseudoVD4DOT_VV {
392+
foreach m = [V_MF2, V_M1, V_M2, V_M4, V_M8] in {
393+
defm "" : VPseudoBinaryV_VV<m>,
394+
SchedBinary<"WriteVIALUV", "ReadVIALUV", "ReadVIALUV", m.MX,
395+
forcePassthruRead=true>;
396+
}
397+
}
398+
399+
multiclass VPatTernaryVD4DOT_VV<string intrinsic, string instruction,
400+
list<VTypeInfoToWide> vtilist> {
401+
foreach vtiToWti = vtilist in {
402+
defvar vti = vtiToWti.Vti;
403+
defvar wti = vtiToWti.Wti;
404+
let Predicates = GetVTypePredicates<wti>.Predicates in
405+
defm : VPatTernaryWithPolicy<intrinsic, instruction, "VV",
406+
wti.Vector, vti.Vector, vti.Vector,
407+
wti.Mask, wti.Log2SEW, vti.LMul,
408+
wti.RegClass, vti.RegClass, vti.RegClass>;
409+
}
410+
}
411+
391412
//===----------------------------------------------------------------------===//
392413
// XAndesPerf
393414
//===----------------------------------------------------------------------===//
@@ -503,3 +524,25 @@ defm PseudoNDS_VFPMADB : VPseudoVFPMAD_VF_RM;
503524

504525
defm : VPatVFPMADBinaryV_VX_RM<"int_riscv_nds_vfpmadt", "PseudoNDS_VFPMADT", AllFP16Vectors>;
505526
defm : VPatVFPMADBinaryV_VX_RM<"int_riscv_nds_vfpmadb", "PseudoNDS_VFPMADB", AllFP16Vectors>;
527+
528+
let Predicates = [HasVendorXAndesVDot] in {
529+
defm PseudoNDS_VD4DOTS : VPseudoVD4DOT_VV;
530+
defm PseudoNDS_VD4DOTU : VPseudoVD4DOT_VV;
531+
defm PseudoNDS_VD4DOTSU : VPseudoVD4DOT_VV;
532+
}
533+
534+
defset list<VTypeInfoToWide> AllQuadWidenableVD4DOTVectors = {
535+
def : VTypeInfoToWide<VI8MF2, VI32MF2>;
536+
def : VTypeInfoToWide<VI8M1, VI32M1>;
537+
def : VTypeInfoToWide<VI8M2, VI32M2>;
538+
def : VTypeInfoToWide<VI8M4, VI32M4>;
539+
def : VTypeInfoToWide<VI8M8, VI32M8>;
540+
def : VTypeInfoToWide<VI16M1, VI64M1>;
541+
def : VTypeInfoToWide<VI16M2, VI64M2>;
542+
def : VTypeInfoToWide<VI16M4, VI64M4>;
543+
def : VTypeInfoToWide<VI16M8, VI64M8>;
544+
}
545+
546+
defm : VPatTernaryVD4DOT_VV<"int_riscv_nds_vd4dots", "PseudoNDS_VD4DOTS", AllQuadWidenableVD4DOTVectors>;
547+
defm : VPatTernaryVD4DOT_VV<"int_riscv_nds_vd4dotu", "PseudoNDS_VD4DOTU", AllQuadWidenableVD4DOTVectors>;
548+
defm : VPatTernaryVD4DOT_VV<"int_riscv_nds_vd4dotsu", "PseudoNDS_VD4DOTSU", AllQuadWidenableVD4DOTVectors>;
Lines changed: 288 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,288 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+zve64x,+xandesvdot \
3+
; RUN: -verify-machineinstrs -target-abi=ilp32 | FileCheck %s
4+
; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+zve64x,+xandesvdot \
5+
; RUN: -verify-machineinstrs -target-abi=lp64 | FileCheck %s
6+
7+
define <vscale x 1 x i32> @intrinsic_vd4dots_vv_nxv1i32_nxv4i8_nxv4i8(<vscale x 1 x i32> %0, <vscale x 4 x i8> %1, <vscale x 4 x i8> %2, iXLen %3) nounwind {
8+
; CHECK-LABEL: intrinsic_vd4dots_vv_nxv1i32_nxv4i8_nxv4i8:
9+
; CHECK: # %bb.0: # %entry
10+
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, ma
11+
; CHECK-NEXT: nds.vd4dots.vv v8, v9, v10
12+
; CHECK-NEXT: ret
13+
entry:
14+
%a = tail call <vscale x 1 x i32> @llvm.riscv.nds.vd4dots.nxv1i32.nxv4i8.nxv4i8(
15+
<vscale x 1 x i32> %0,
16+
<vscale x 4 x i8> %1,
17+
<vscale x 4 x i8> %2,
18+
iXLen %3, iXLen 2)
19+
ret <vscale x 1 x i32> %a
20+
}
21+
22+
define <vscale x 1 x i32> @intrinsic_vd4dots_mask_vv_nxv1i32_nxv4i8_nxv4i8(<vscale x 1 x i32> %0, <vscale x 4 x i8> %1, <vscale x 4 x i8> %2, <vscale x 1 x i1>%3, iXLen %4) nounwind {
23+
; CHECK-LABEL: intrinsic_vd4dots_mask_vv_nxv1i32_nxv4i8_nxv4i8:
24+
; CHECK: # %bb.0: # %entry
25+
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, ma
26+
; CHECK-NEXT: nds.vd4dots.vv v8, v9, v10, v0.t
27+
; CHECK-NEXT: ret
28+
entry:
29+
%a = tail call <vscale x 1 x i32> @llvm.riscv.nds.vd4dots.mask.nxv1i32.nxv4i8.nxv4i8(
30+
<vscale x 1 x i32> %0,
31+
<vscale x 4 x i8> %1,
32+
<vscale x 4 x i8> %2,
33+
<vscale x 1 x i1> %3,
34+
iXLen %4, iXLen 2)
35+
ret <vscale x 1 x i32> %a
36+
}
37+
38+
define <vscale x 2 x i32> @intrinsic_vd4dots_vv_nxv2i32_nxv8i8_nxv8i8(<vscale x 2 x i32> %0, <vscale x 8 x i8> %1, <vscale x 8 x i8> %2, iXLen %3) nounwind {
39+
; CHECK-LABEL: intrinsic_vd4dots_vv_nxv2i32_nxv8i8_nxv8i8:
40+
; CHECK: # %bb.0: # %entry
41+
; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma
42+
; CHECK-NEXT: nds.vd4dots.vv v8, v9, v10
43+
; CHECK-NEXT: ret
44+
entry:
45+
%a = tail call <vscale x 2 x i32> @llvm.riscv.nds.vd4dots.nxv2i32.nxv8i8.nxv8i8(
46+
<vscale x 2 x i32> %0,
47+
<vscale x 8 x i8> %1,
48+
<vscale x 8 x i8> %2,
49+
iXLen %3, iXLen 2)
50+
ret <vscale x 2 x i32> %a
51+
}
52+
53+
define <vscale x 2 x i32> @intrinsic_vd4dots_mask_vv_nxv2i32_nxv8i8_nxv8i8(<vscale x 2 x i32> %0, <vscale x 8 x i8> %1, <vscale x 8 x i8> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
54+
; CHECK-LABEL: intrinsic_vd4dots_mask_vv_nxv2i32_nxv8i8_nxv8i8:
55+
; CHECK: # %bb.0: # %entry
56+
; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma
57+
; CHECK-NEXT: nds.vd4dots.vv v8, v9, v10, v0.t
58+
; CHECK-NEXT: ret
59+
entry:
60+
%a = tail call <vscale x 2 x i32> @llvm.riscv.nds.vd4dots.mask.nxv2i32.nxv8i8.nxv8i8(
61+
<vscale x 2 x i32> %0,
62+
<vscale x 8 x i8> %1,
63+
<vscale x 8 x i8> %2,
64+
<vscale x 2 x i1> %3,
65+
iXLen %4, iXLen 2)
66+
ret <vscale x 2 x i32> %a
67+
}
68+
69+
define <vscale x 4 x i32> @intrinsic_vd4dots_vv_nxv4i32_nxv16i8_nxv16i8(<vscale x 4 x i32> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2, iXLen %3) nounwind {
70+
; CHECK-LABEL: intrinsic_vd4dots_vv_nxv4i32_nxv16i8_nxv16i8:
71+
; CHECK: # %bb.0: # %entry
72+
; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, ma
73+
; CHECK-NEXT: nds.vd4dots.vv v8, v10, v12
74+
; CHECK-NEXT: ret
75+
entry:
76+
%a = tail call <vscale x 4 x i32> @llvm.riscv.nds.vd4dots.nxv4i32.nxv16i8.nxv16i8(
77+
<vscale x 4 x i32> %0,
78+
<vscale x 16 x i8> %1,
79+
<vscale x 16 x i8> %2,
80+
iXLen %3, iXLen 2)
81+
ret <vscale x 4 x i32> %a
82+
}
83+
84+
define <vscale x 4 x i32> @intrinsic_vd4dots_mask_vv_nxv4i32_nxv16i8_nxv16i8(<vscale x 4 x i32> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
85+
; CHECK-LABEL: intrinsic_vd4dots_mask_vv_nxv4i32_nxv16i8_nxv16i8:
86+
; CHECK: # %bb.0: # %entry
87+
; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, ma
88+
; CHECK-NEXT: nds.vd4dots.vv v8, v10, v12, v0.t
89+
; CHECK-NEXT: ret
90+
entry:
91+
%a = tail call <vscale x 4 x i32> @llvm.riscv.nds.vd4dots.mask.nxv4i32.nxv16i8.nxv16i8(
92+
<vscale x 4 x i32> %0,
93+
<vscale x 16 x i8> %1,
94+
<vscale x 16 x i8> %2,
95+
<vscale x 4 x i1> %3,
96+
iXLen %4, iXLen 2)
97+
ret <vscale x 4 x i32> %a
98+
}
99+
100+
define <vscale x 8 x i32> @intrinsic_vd4dots_vv_nxv8i32_nxv32i8_nxv32i8(<vscale x 8 x i32> %0, <vscale x 32 x i8> %1, <vscale x 32 x i8> %2, iXLen %3) nounwind {
101+
; CHECK-LABEL: intrinsic_vd4dots_vv_nxv8i32_nxv32i8_nxv32i8:
102+
; CHECK: # %bb.0: # %entry
103+
; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, ma
104+
; CHECK-NEXT: nds.vd4dots.vv v8, v12, v16
105+
; CHECK-NEXT: ret
106+
entry:
107+
%a = tail call <vscale x 8 x i32> @llvm.riscv.nds.vd4dots.nxv8i32.nxv32i8.nxv32i8(
108+
<vscale x 8 x i32> %0,
109+
<vscale x 32 x i8> %1,
110+
<vscale x 32 x i8> %2,
111+
iXLen %3, iXLen 2)
112+
ret <vscale x 8 x i32> %a
113+
}
114+
115+
define <vscale x 8 x i32> @intrinsic_vd4dots_mask_vv_nxv8i32_nxv32i8_nxv32i8(<vscale x 8 x i32> %0, <vscale x 32 x i8> %1, <vscale x 32 x i8> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
116+
; CHECK-LABEL: intrinsic_vd4dots_mask_vv_nxv8i32_nxv32i8_nxv32i8:
117+
; CHECK: # %bb.0: # %entry
118+
; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, ma
119+
; CHECK-NEXT: nds.vd4dots.vv v8, v12, v16, v0.t
120+
; CHECK-NEXT: ret
121+
entry:
122+
%a = tail call <vscale x 8 x i32> @llvm.riscv.nds.vd4dots.mask.nxv8i32.nxv32i8.nxv32i8(
123+
<vscale x 8 x i32> %0,
124+
<vscale x 32 x i8> %1,
125+
<vscale x 32 x i8> %2,
126+
<vscale x 8 x i1> %3,
127+
iXLen %4, iXLen 2)
128+
ret <vscale x 8 x i32> %a
129+
}
130+
131+
define <vscale x 16 x i32> @intrinsic_vd4dots_vv_nxv16i32_nxv64i8_nxv64i8(<vscale x 16 x i32> %0, <vscale x 64 x i8> %1, <vscale x 64 x i8> %2, iXLen %3) nounwind {
132+
; CHECK-LABEL: intrinsic_vd4dots_vv_nxv16i32_nxv64i8_nxv64i8:
133+
; CHECK: # %bb.0: # %entry
134+
; CHECK-NEXT: vl8r.v v24, (a0)
135+
; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma
136+
; CHECK-NEXT: nds.vd4dots.vv v8, v16, v24
137+
; CHECK-NEXT: ret
138+
entry:
139+
%a = tail call <vscale x 16 x i32> @llvm.riscv.nds.vd4dots.nxv16i32.nxv64i8.nxv64i8(
140+
<vscale x 16 x i32> %0,
141+
<vscale x 64 x i8> %1,
142+
<vscale x 64 x i8> %2,
143+
iXLen %3, iXLen 2)
144+
ret <vscale x 16 x i32> %a
145+
}
146+
147+
define <vscale x 16 x i32> @intrinsic_vd4dots_mask_vv_nxv16i32_nxv64i8_nxv64i8(<vscale x 16 x i32> %0, <vscale x 64 x i8> %1, <vscale x 64 x i8> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
148+
; CHECK-LABEL: intrinsic_vd4dots_mask_vv_nxv16i32_nxv64i8_nxv64i8:
149+
; CHECK: # %bb.0: # %entry
150+
; CHECK-NEXT: vl8r.v v24, (a0)
151+
; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma
152+
; CHECK-NEXT: nds.vd4dots.vv v8, v16, v24, v0.t
153+
; CHECK-NEXT: ret
154+
entry:
155+
%a = tail call <vscale x 16 x i32> @llvm.riscv.nds.vd4dots.mask.nxv16i32.nxv64i8.nxv64i8(
156+
<vscale x 16 x i32> %0,
157+
<vscale x 64 x i8> %1,
158+
<vscale x 64 x i8> %2,
159+
<vscale x 16 x i1> %3,
160+
iXLen %4, iXLen 2)
161+
ret <vscale x 16 x i32> %a
162+
}
163+
164+
define <vscale x 1 x i64> @intrinsic_vd4dots_vv_nxv1i64_nxv4i16_nxv4i16(<vscale x 1 x i64> %0, <vscale x 4 x i16> %1, <vscale x 4 x i16> %2, iXLen %3) nounwind {
165+
; CHECK-LABEL: intrinsic_vd4dots_vv_nxv1i64_nxv4i16_nxv4i16:
166+
; CHECK: # %bb.0: # %entry
167+
; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, ma
168+
; CHECK-NEXT: nds.vd4dots.vv v8, v9, v10
169+
; CHECK-NEXT: ret
170+
entry:
171+
%a = tail call <vscale x 1 x i64> @llvm.riscv.nds.vd4dots.nxv1i64.nxv4i16.nxv4i16(
172+
<vscale x 1 x i64> %0,
173+
<vscale x 4 x i16> %1,
174+
<vscale x 4 x i16> %2,
175+
iXLen %3, iXLen 2)
176+
ret <vscale x 1 x i64> %a
177+
}
178+
179+
define <vscale x 1 x i64> @intrinsic_vd4dots_mask_vv_nxv1i64_nxv4i16_nxv4i16(<vscale x 1 x i64> %0, <vscale x 4 x i16> %1, <vscale x 4 x i16> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
180+
; CHECK-LABEL: intrinsic_vd4dots_mask_vv_nxv1i64_nxv4i16_nxv4i16:
181+
; CHECK: # %bb.0: # %entry
182+
; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, ma
183+
; CHECK-NEXT: nds.vd4dots.vv v8, v9, v10, v0.t
184+
; CHECK-NEXT: ret
185+
entry:
186+
%a = tail call <vscale x 1 x i64> @llvm.riscv.nds.vd4dots.mask.nxv1i64.nxv4i16.nxv4i16(
187+
<vscale x 1 x i64> %0,
188+
<vscale x 4 x i16> %1,
189+
<vscale x 4 x i16> %2,
190+
<vscale x 1 x i1> %3,
191+
iXLen %4, iXLen 2)
192+
ret <vscale x 1 x i64> %a
193+
}
194+
195+
define <vscale x 2 x i64> @intrinsic_vd4dots_vv_nxv2i64_nxv8i16_nxv8i16(<vscale x 2 x i64> %0, <vscale x 8 x i16> %1, <vscale x 8 x i16> %2, iXLen %3) nounwind {
196+
; CHECK-LABEL: intrinsic_vd4dots_vv_nxv2i64_nxv8i16_nxv8i16:
197+
; CHECK: # %bb.0: # %entry
198+
; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, ma
199+
; CHECK-NEXT: nds.vd4dots.vv v8, v10, v12
200+
; CHECK-NEXT: ret
201+
entry:
202+
%a = tail call <vscale x 2 x i64> @llvm.riscv.nds.vd4dots.nxv2i64.nxv8i16.nxv8i16(
203+
<vscale x 2 x i64> %0,
204+
<vscale x 8 x i16> %1,
205+
<vscale x 8 x i16> %2,
206+
iXLen %3, iXLen 2)
207+
ret <vscale x 2 x i64> %a
208+
}
209+
210+
define <vscale x 2 x i64> @intrinsic_vd4dots_mask_vv_nxv2i64_nxv8i16_nxv8i16(<vscale x 2 x i64> %0, <vscale x 8 x i16> %1, <vscale x 8 x i16> %2, <vscale x 2 x i1>%3, iXLen %4) nounwind {
211+
; CHECK-LABEL: intrinsic_vd4dots_mask_vv_nxv2i64_nxv8i16_nxv8i16:
212+
; CHECK: # %bb.0: # %entry
213+
; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, ma
214+
; CHECK-NEXT: nds.vd4dots.vv v8, v10, v12, v0.t
215+
; CHECK-NEXT: ret
216+
entry:
217+
%a = tail call <vscale x 2 x i64> @llvm.riscv.nds.vd4dots.mask.nxv2i64.nxv8i16.nxv8i16(
218+
<vscale x 2 x i64> %0,
219+
<vscale x 8 x i16> %1,
220+
<vscale x 8 x i16> %2,
221+
<vscale x 2 x i1> %3,
222+
iXLen %4, iXLen 2)
223+
ret <vscale x 2 x i64> %a
224+
}
225+
226+
define <vscale x 4 x i64> @intrinsic_vd4dots_vv_nxv4i64_nxv16i16_nxv16i16(<vscale x 4 x i64> %0, <vscale x 16 x i16> %1, <vscale x 16 x i16> %2, iXLen %3) nounwind {
227+
; CHECK-LABEL: intrinsic_vd4dots_vv_nxv4i64_nxv16i16_nxv16i16:
228+
; CHECK: # %bb.0: # %entry
229+
; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, ma
230+
; CHECK-NEXT: nds.vd4dots.vv v8, v12, v16
231+
; CHECK-NEXT: ret
232+
entry:
233+
%a = tail call <vscale x 4 x i64> @llvm.riscv.nds.vd4dots.nxv4i64.nxv16i16.nxv16i16(
234+
<vscale x 4 x i64> %0,
235+
<vscale x 16 x i16> %1,
236+
<vscale x 16 x i16> %2,
237+
iXLen %3, iXLen 2)
238+
ret <vscale x 4 x i64> %a
239+
}
240+
241+
define <vscale x 4 x i64> @intrinsic_vd4dots_mask_vv_nxv4i64_nxv16i16_nxv16i16(<vscale x 4 x i64> %0, <vscale x 16 x i16> %1, <vscale x 16 x i16> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
242+
; CHECK-LABEL: intrinsic_vd4dots_mask_vv_nxv4i64_nxv16i16_nxv16i16:
243+
; CHECK: # %bb.0: # %entry
244+
; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, ma
245+
; CHECK-NEXT: nds.vd4dots.vv v8, v12, v16, v0.t
246+
; CHECK-NEXT: ret
247+
entry:
248+
%a = tail call <vscale x 4 x i64> @llvm.riscv.nds.vd4dots.mask.nxv4i64.nxv16i16.nxv16i16(
249+
<vscale x 4 x i64> %0,
250+
<vscale x 16 x i16> %1,
251+
<vscale x 16 x i16> %2,
252+
<vscale x 4 x i1> %3,
253+
iXLen %4, iXLen 2)
254+
ret <vscale x 4 x i64> %a
255+
}
256+
257+
define <vscale x 8 x i64> @intrinsic_vd4dots_vv_nxv8i64_nxv32i16_nxv32i16(<vscale x 8 x i64> %0, <vscale x 32 x i16> %1, <vscale x 32 x i16> %2, iXLen %3) nounwind {
258+
; CHECK-LABEL: intrinsic_vd4dots_vv_nxv8i64_nxv32i16_nxv32i16:
259+
; CHECK: # %bb.0: # %entry
260+
; CHECK-NEXT: vl8re16.v v24, (a0)
261+
; CHECK-NEXT: vsetvli zero, a1, e64, m8, tu, ma
262+
; CHECK-NEXT: nds.vd4dots.vv v8, v16, v24
263+
; CHECK-NEXT: ret
264+
entry:
265+
%a = tail call <vscale x 8 x i64> @llvm.riscv.nds.vd4dots.nxv8i64.nxv32i16.nxv32i16(
266+
<vscale x 8 x i64> %0,
267+
<vscale x 32 x i16> %1,
268+
<vscale x 32 x i16> %2,
269+
iXLen %3, iXLen 2)
270+
ret <vscale x 8 x i64> %a
271+
}
272+
273+
define <vscale x 8 x i64> @intrinsic_vd4dots_mask_vv_nxv8i64_nxv32i16_nxv32i16(<vscale x 8 x i64> %0, <vscale x 32 x i16> %1, <vscale x 32 x i16> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
274+
; CHECK-LABEL: intrinsic_vd4dots_mask_vv_nxv8i64_nxv32i16_nxv32i16:
275+
; CHECK: # %bb.0: # %entry
276+
; CHECK-NEXT: vl8re16.v v24, (a0)
277+
; CHECK-NEXT: vsetvli zero, a1, e64, m8, tu, ma
278+
; CHECK-NEXT: nds.vd4dots.vv v8, v16, v24, v0.t
279+
; CHECK-NEXT: ret
280+
entry:
281+
%a = tail call <vscale x 8 x i64> @llvm.riscv.nds.vd4dots.mask.nxv8i64.nxv32i16.nxv32i16(
282+
<vscale x 8 x i64> %0,
283+
<vscale x 32 x i16> %1,
284+
<vscale x 32 x i16> %2,
285+
<vscale x 8 x i1> %3,
286+
iXLen %4, iXLen 2)
287+
ret <vscale x 8 x i64> %a
288+
}

0 commit comments

Comments
 (0)