Skip to content

Commit 1a2147f

Browse files
committed
[VE] Add vsum and vfsum intrinsic instructions
Add vsum and vfsum intrinsic instructions and regression tests. Reviewed By: simoll Differential Revision: https://reviews.llvm.org/D92938
1 parent 6266f36 commit 1a2147f

File tree

4 files changed

+182
-0
lines changed

4 files changed

+182
-0
lines changed

llvm/include/llvm/IR/IntrinsicsVEVL.gen.td

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1084,3 +1084,13 @@ let TargetPrefix = "ve" in def int_ve_vl_pvfmksgenan_Mvl : GCCBuiltin<"__builtin
10841084
let TargetPrefix = "ve" in def int_ve_vl_pvfmksgenan_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmksgenan_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
10851085
let TargetPrefix = "ve" in def int_ve_vl_pvfmkslenan_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkslenan_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
10861086
let TargetPrefix = "ve" in def int_ve_vl_pvfmkslenan_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmkslenan_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
1087+
let TargetPrefix = "ve" in def int_ve_vl_vsumwsx_vvl : GCCBuiltin<"__builtin_ve_vl_vsumwsx_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
1088+
let TargetPrefix = "ve" in def int_ve_vl_vsumwsx_vvml : GCCBuiltin<"__builtin_ve_vl_vsumwsx_vvml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
1089+
let TargetPrefix = "ve" in def int_ve_vl_vsumwzx_vvl : GCCBuiltin<"__builtin_ve_vl_vsumwzx_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
1090+
let TargetPrefix = "ve" in def int_ve_vl_vsumwzx_vvml : GCCBuiltin<"__builtin_ve_vl_vsumwzx_vvml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
1091+
let TargetPrefix = "ve" in def int_ve_vl_vsuml_vvl : GCCBuiltin<"__builtin_ve_vl_vsuml_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
1092+
let TargetPrefix = "ve" in def int_ve_vl_vsuml_vvml : GCCBuiltin<"__builtin_ve_vl_vsuml_vvml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
1093+
let TargetPrefix = "ve" in def int_ve_vl_vfsumd_vvl : GCCBuiltin<"__builtin_ve_vl_vfsumd_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
1094+
let TargetPrefix = "ve" in def int_ve_vl_vfsumd_vvml : GCCBuiltin<"__builtin_ve_vl_vfsumd_vvml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
1095+
let TargetPrefix = "ve" in def int_ve_vl_vfsums_vvl : GCCBuiltin<"__builtin_ve_vl_vfsums_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
1096+
let TargetPrefix = "ve" in def int_ve_vl_vfsums_vvml : GCCBuiltin<"__builtin_ve_vl_vfsums_vvml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;

llvm/lib/Target/VE/VEInstrIntrinsicVL.gen.td

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1307,3 +1307,13 @@ def : Pat<(int_ve_vl_pvfmksgenan_Mvl v256f64:$vz, i32:$vl), (VFMKSyvl CC_GENAN,
13071307
def : Pat<(int_ve_vl_pvfmksgenan_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKSyvyl CC_GENAN, v256f64:$vz, v512i1:$vm, i32:$vl)>;
13081308
def : Pat<(int_ve_vl_pvfmkslenan_Mvl v256f64:$vz, i32:$vl), (VFMKSyvl CC_LENAN, v256f64:$vz, i32:$vl)>;
13091309
def : Pat<(int_ve_vl_pvfmkslenan_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKSyvyl CC_LENAN, v256f64:$vz, v512i1:$vm, i32:$vl)>;
1310+
def : Pat<(int_ve_vl_vsumwsx_vvl v256f64:$vy, i32:$vl), (VSUMWSXvl v256f64:$vy, i32:$vl)>;
1311+
def : Pat<(int_ve_vl_vsumwsx_vvml v256f64:$vy, v256i1:$vm, i32:$vl), (VSUMWSXvml v256f64:$vy, v256i1:$vm, i32:$vl)>;
1312+
def : Pat<(int_ve_vl_vsumwzx_vvl v256f64:$vy, i32:$vl), (VSUMWZXvl v256f64:$vy, i32:$vl)>;
1313+
def : Pat<(int_ve_vl_vsumwzx_vvml v256f64:$vy, v256i1:$vm, i32:$vl), (VSUMWZXvml v256f64:$vy, v256i1:$vm, i32:$vl)>;
1314+
def : Pat<(int_ve_vl_vsuml_vvl v256f64:$vy, i32:$vl), (VSUMLvl v256f64:$vy, i32:$vl)>;
1315+
def : Pat<(int_ve_vl_vsuml_vvml v256f64:$vy, v256i1:$vm, i32:$vl), (VSUMLvml v256f64:$vy, v256i1:$vm, i32:$vl)>;
1316+
def : Pat<(int_ve_vl_vfsumd_vvl v256f64:$vy, i32:$vl), (VFSUMDvl v256f64:$vy, i32:$vl)>;
1317+
def : Pat<(int_ve_vl_vfsumd_vvml v256f64:$vy, v256i1:$vm, i32:$vl), (VFSUMDvml v256f64:$vy, v256i1:$vm, i32:$vl)>;
1318+
def : Pat<(int_ve_vl_vfsums_vvl v256f64:$vy, i32:$vl), (VFSUMSvl v256f64:$vy, i32:$vl)>;
1319+
def : Pat<(int_ve_vl_vfsums_vvml v256f64:$vy, v256i1:$vm, i32:$vl), (VFSUMSvml v256f64:$vy, v256i1:$vm, i32:$vl)>;
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
; RUN: llc < %s -mtriple=ve -mattr=+vpu | FileCheck %s
2+
3+
;;; Test vector floating sum intrinsic instructions
4+
;;;
5+
;;; Note:
6+
;;; We test VFSUM*vl and VFSUM*vml instructions.
7+
8+
; Function Attrs: nounwind readnone
9+
define fastcc <256 x double> @vfsumd_vvl(<256 x double> %0) {
10+
; CHECK-LABEL: vfsumd_vvl:
11+
; CHECK: # %bb.0:
12+
; CHECK-NEXT: lea %s0, 256
13+
; CHECK-NEXT: lvl %s0
14+
; CHECK-NEXT: vfsum.d %v0, %v0
15+
; CHECK-NEXT: b.l.t (, %s10)
16+
%2 = tail call fast <256 x double> @llvm.ve.vl.vfsumd.vvl(<256 x double> %0, i32 256)
17+
ret <256 x double> %2
18+
}
19+
20+
; Function Attrs: nounwind readnone
21+
declare <256 x double> @llvm.ve.vl.vfsumd.vvl(<256 x double>, i32)
22+
23+
; Function Attrs: nounwind readnone
24+
define fastcc <256 x double> @vfsumd_vvml(<256 x double> %0, <256 x i1> %1) {
25+
; CHECK-LABEL: vfsumd_vvml:
26+
; CHECK: # %bb.0:
27+
; CHECK-NEXT: lea %s0, 256
28+
; CHECK-NEXT: lvl %s0
29+
; CHECK-NEXT: vfsum.d %v0, %v0, %vm1
30+
; CHECK-NEXT: b.l.t (, %s10)
31+
%3 = tail call fast <256 x double> @llvm.ve.vl.vfsumd.vvml(<256 x double> %0, <256 x i1> %1, i32 256)
32+
ret <256 x double> %3
33+
}
34+
35+
; Function Attrs: nounwind readnone
36+
declare <256 x double> @llvm.ve.vl.vfsumd.vvml(<256 x double>, <256 x i1>, i32)
37+
38+
; Function Attrs: nounwind readnone
39+
define fastcc <256 x double> @vfsums_vvl(<256 x double> %0) {
40+
; CHECK-LABEL: vfsums_vvl:
41+
; CHECK: # %bb.0:
42+
; CHECK-NEXT: lea %s0, 256
43+
; CHECK-NEXT: lvl %s0
44+
; CHECK-NEXT: vfsum.s %v0, %v0
45+
; CHECK-NEXT: b.l.t (, %s10)
46+
%2 = tail call fast <256 x double> @llvm.ve.vl.vfsums.vvl(<256 x double> %0, i32 256)
47+
ret <256 x double> %2
48+
}
49+
50+
; Function Attrs: nounwind readnone
51+
declare <256 x double> @llvm.ve.vl.vfsums.vvl(<256 x double>, i32)
52+
53+
; Function Attrs: nounwind readnone
54+
define fastcc <256 x double> @vfsums_vvml(<256 x double> %0, <256 x i1> %1) {
55+
; CHECK-LABEL: vfsums_vvml:
56+
; CHECK: # %bb.0:
57+
; CHECK-NEXT: lea %s0, 256
58+
; CHECK-NEXT: lvl %s0
59+
; CHECK-NEXT: vfsum.s %v0, %v0, %vm1
60+
; CHECK-NEXT: b.l.t (, %s10)
61+
%3 = tail call fast <256 x double> @llvm.ve.vl.vfsums.vvml(<256 x double> %0, <256 x i1> %1, i32 256)
62+
ret <256 x double> %3
63+
}
64+
65+
; Function Attrs: nounwind readnone
66+
declare <256 x double> @llvm.ve.vl.vfsums.vvml(<256 x double>, <256 x i1>, i32)
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
; RUN: llc < %s -mtriple=ve -mattr=+vpu | FileCheck %s
2+
3+
;;; Test vector sum intrinsic instructions
4+
;;;
5+
;;; Note:
6+
;;; We test VSUM*vl and VSUM*vml instructions.
7+
8+
; Function Attrs: nounwind readnone
9+
define fastcc <256 x double> @vsumwsx_vvl(<256 x double> %0) {
10+
; CHECK-LABEL: vsumwsx_vvl:
11+
; CHECK: # %bb.0:
12+
; CHECK-NEXT: lea %s0, 256
13+
; CHECK-NEXT: lvl %s0
14+
; CHECK-NEXT: vsum.w.sx %v0, %v0
15+
; CHECK-NEXT: b.l.t (, %s10)
16+
%2 = tail call fast <256 x double> @llvm.ve.vl.vsumwsx.vvl(<256 x double> %0, i32 256)
17+
ret <256 x double> %2
18+
}
19+
20+
; Function Attrs: nounwind readnone
21+
declare <256 x double> @llvm.ve.vl.vsumwsx.vvl(<256 x double>, i32)
22+
23+
; Function Attrs: nounwind readnone
24+
define fastcc <256 x double> @vsumwsx_vvml(<256 x double> %0, <256 x i1> %1) {
25+
; CHECK-LABEL: vsumwsx_vvml:
26+
; CHECK: # %bb.0:
27+
; CHECK-NEXT: lea %s0, 256
28+
; CHECK-NEXT: lvl %s0
29+
; CHECK-NEXT: vsum.w.sx %v0, %v0, %vm1
30+
; CHECK-NEXT: b.l.t (, %s10)
31+
%3 = tail call fast <256 x double> @llvm.ve.vl.vsumwsx.vvml(<256 x double> %0, <256 x i1> %1, i32 256)
32+
ret <256 x double> %3
33+
}
34+
35+
; Function Attrs: nounwind readnone
36+
declare <256 x double> @llvm.ve.vl.vsumwsx.vvml(<256 x double>, <256 x i1>, i32)
37+
38+
; Function Attrs: nounwind readnone
39+
define fastcc <256 x double> @vsumwzx_vvl(<256 x double> %0) {
40+
; CHECK-LABEL: vsumwzx_vvl:
41+
; CHECK: # %bb.0:
42+
; CHECK-NEXT: lea %s0, 256
43+
; CHECK-NEXT: lvl %s0
44+
; CHECK-NEXT: vsum.w.zx %v0, %v0
45+
; CHECK-NEXT: b.l.t (, %s10)
46+
%2 = tail call fast <256 x double> @llvm.ve.vl.vsumwzx.vvl(<256 x double> %0, i32 256)
47+
ret <256 x double> %2
48+
}
49+
50+
; Function Attrs: nounwind readnone
51+
declare <256 x double> @llvm.ve.vl.vsumwzx.vvl(<256 x double>, i32)
52+
53+
; Function Attrs: nounwind readnone
54+
define fastcc <256 x double> @vsumwzx_vvml(<256 x double> %0, <256 x i1> %1) {
55+
; CHECK-LABEL: vsumwzx_vvml:
56+
; CHECK: # %bb.0:
57+
; CHECK-NEXT: lea %s0, 256
58+
; CHECK-NEXT: lvl %s0
59+
; CHECK-NEXT: vsum.w.zx %v0, %v0, %vm1
60+
; CHECK-NEXT: b.l.t (, %s10)
61+
%3 = tail call fast <256 x double> @llvm.ve.vl.vsumwzx.vvml(<256 x double> %0, <256 x i1> %1, i32 256)
62+
ret <256 x double> %3
63+
}
64+
65+
; Function Attrs: nounwind readnone
66+
declare <256 x double> @llvm.ve.vl.vsumwzx.vvml(<256 x double>, <256 x i1>, i32)
67+
68+
; Function Attrs: nounwind readnone
69+
define fastcc <256 x double> @vsuml_vvl(<256 x double> %0) {
70+
; CHECK-LABEL: vsuml_vvl:
71+
; CHECK: # %bb.0:
72+
; CHECK-NEXT: lea %s0, 256
73+
; CHECK-NEXT: lvl %s0
74+
; CHECK-NEXT: vsum.l %v0, %v0
75+
; CHECK-NEXT: b.l.t (, %s10)
76+
%2 = tail call fast <256 x double> @llvm.ve.vl.vsuml.vvl(<256 x double> %0, i32 256)
77+
ret <256 x double> %2
78+
}
79+
80+
; Function Attrs: nounwind readnone
81+
declare <256 x double> @llvm.ve.vl.vsuml.vvl(<256 x double>, i32)
82+
83+
; Function Attrs: nounwind readnone
84+
define fastcc <256 x double> @vsuml_vvml(<256 x double> %0, <256 x i1> %1) {
85+
; CHECK-LABEL: vsuml_vvml:
86+
; CHECK: # %bb.0:
87+
; CHECK-NEXT: lea %s0, 256
88+
; CHECK-NEXT: lvl %s0
89+
; CHECK-NEXT: vsum.l %v0, %v0, %vm1
90+
; CHECK-NEXT: b.l.t (, %s10)
91+
%3 = tail call fast <256 x double> @llvm.ve.vl.vsuml.vvml(<256 x double> %0, <256 x i1> %1, i32 256)
92+
ret <256 x double> %3
93+
}
94+
95+
; Function Attrs: nounwind readnone
96+
declare <256 x double> @llvm.ve.vl.vsuml.vvml(<256 x double>, <256 x i1>, i32)

0 commit comments

Comments
 (0)