Skip to content

Commit e3475f5

Browse files
author
Esme-Yi
committed
[PowerPC] Add builtins for xvtdiv(dp|sp) and xvtsqrt(dp|sp).
Summary: This patch implements the builtins for xvtdivdp, xvtdivsp, xvtsqrtdp, xvtsqrtsp. The instructions correspond to the following builtins: int vec_test_swdiv(vector double v1, vector double v2); int vec_test_swdivs(vector float v1, vector float v2); int vec_test_swsqrt(vector double v1); int vec_test_swsqrts(vector float v1); This patch depends on D88274, which fixes the bug in copying from CRRC to GPRC/G8RC. Reviewed By: steven.zhang, amyk Differential Revision: https://reviews.llvm.org/D88278
1 parent 2ccbf3d commit e3475f5

File tree

6 files changed

+121
-0
lines changed

6 files changed

+121
-0
lines changed

clang/include/clang/Basic/BuiltinsPPC.def

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -558,6 +558,11 @@ BUILTIN(__builtin_vsx_xxeval, "V2ULLiV2ULLiV2ULLiV2ULLiIi", "")
558558

559559
BUILTIN(__builtin_vsx_xvtlsbb, "iV16UcUi", "")
560560

561+
BUILTIN(__builtin_vsx_xvtdivdp, "iV2dV2d", "")
562+
BUILTIN(__builtin_vsx_xvtdivsp, "iV4fV4f", "")
563+
BUILTIN(__builtin_vsx_xvtsqrtdp, "iV2d", "")
564+
BUILTIN(__builtin_vsx_xvtsqrtsp, "iV4f", "")
565+
561566
// P10 Vector Permute Extended built-in.
562567
BUILTIN(__builtin_vsx_xxpermx, "V16UcV16UcV16UcV16UcIi", "")
563568

clang/lib/Headers/altivec.h

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3504,6 +3504,20 @@ vec_div(vector signed __int128 __a, vector signed __int128 __b) {
35043504
}
35053505
#endif __POWER10_VECTOR__
35063506

3507+
/* vec_xvtdiv */
3508+
3509+
#ifdef __VSX__
3510+
static __inline__ int __ATTRS_o_ai vec_test_swdiv(vector double __a,
3511+
vector double __b) {
3512+
return __builtin_vsx_xvtdivdp(__a, __b);
3513+
}
3514+
3515+
static __inline__ int __ATTRS_o_ai vec_test_swdivs(vector float __a,
3516+
vector float __b) {
3517+
return __builtin_vsx_xvtdivsp(__a, __b);
3518+
}
3519+
#endif
3520+
35073521
/* vec_dss */
35083522

35093523
#define vec_dss __builtin_altivec_dss
@@ -8057,6 +8071,18 @@ vec_vrsqrtefp(vector float __a) {
80578071
return __builtin_altivec_vrsqrtefp(__a);
80588072
}
80598073

8074+
/* vec_xvtsqrt */
8075+
8076+
#ifdef __VSX__
8077+
static __inline__ int __ATTRS_o_ai vec_test_swsqrt(vector double __a) {
8078+
return __builtin_vsx_xvtsqrtdp(__a);
8079+
}
8080+
8081+
static __inline__ int __ATTRS_o_ai vec_test_swsqrts(vector float __a) {
8082+
return __builtin_vsx_xvtsqrtsp(__a);
8083+
}
8084+
#endif
8085+
80608086
/* vec_sel */
80618087

80628088
#define __builtin_altivec_vsel_4si vec_sel

clang/test/CodeGen/builtins-ppc-vsx.c

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ vector unsigned long long res_vull;
5252
vector signed __int128 res_vslll;
5353

5454
double res_d;
55+
int res_i;
5556
float res_af[4];
5657
double res_ad[2];
5758
signed char res_asc[16];
@@ -878,6 +879,23 @@ void test1() {
878879
// CHECK: call <2 x double> @llvm.ppc.vsx.xvrsqrtedp(<2 x double> %{{[0-9]+}})
879880
// CHECK-LE: call <2 x double> @llvm.ppc.vsx.xvrsqrtedp(<2 x double> %{{[0-9]+}})
880881

882+
res_i = vec_test_swsqrt(vd);
883+
// CHECK: call i32 @llvm.ppc.vsx.xvtsqrtdp(<2 x double> %{{[0-9]+}})
884+
// CHECK-LE: call i32 @llvm.ppc.vsx.xvtsqrtdp(<2 x double> %{{[0-9]+}})
885+
886+
res_i = vec_test_swsqrts(vf);
887+
// CHECK: call i32 @llvm.ppc.vsx.xvtsqrtsp(<4 x float> %{{[0-9]+}})
888+
// CHECK-LE: call i32 @llvm.ppc.vsx.xvtsqrtsp(<4 x float> %{{[0-9]+}})
889+
890+
res_i = vec_test_swdiv(vd, vd);
891+
// CHECK: call i32 @llvm.ppc.vsx.xvtdivdp(<2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}})
892+
// CHECK-LE: call i32 @llvm.ppc.vsx.xvtdivdp(<2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}})
893+
894+
res_i = vec_test_swdivs(vf, vf);
895+
// CHECK: call i32 @llvm.ppc.vsx.xvtdivsp(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}})
896+
// CHECK-LE: call i32 @llvm.ppc.vsx.xvtdivsp(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}})
897+
898+
881899
dummy();
882900
// CHECK: call void @dummy()
883901
// CHECK-LE: call void @dummy()

llvm/include/llvm/IR/IntrinsicsPowerPC.td

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1249,6 +1249,16 @@ def int_ppc_vsx_xxinsertw :
12491249
def int_ppc_vsx_xvtlsbb :
12501250
PowerPC_VSX_Intrinsic<"xvtlsbb", [llvm_i32_ty],
12511251
[llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
1252+
def int_ppc_vsx_xvtdivdp :
1253+
PowerPC_VSX_Intrinsic<"xvtdivdp", [llvm_i32_ty],
1254+
[llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
1255+
def int_ppc_vsx_xvtdivsp :
1256+
PowerPC_VSX_Intrinsic<"xvtdivsp", [llvm_i32_ty],
1257+
[llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
1258+
def int_ppc_vsx_xvtsqrtdp :
1259+
PowerPC_VSX_Intrinsic<"xvtsqrtdp", [llvm_i32_ty], [llvm_v2f64_ty], [IntrNoMem]>;
1260+
def int_ppc_vsx_xvtsqrtsp :
1261+
PowerPC_VSX_Intrinsic<"xvtsqrtsp", [llvm_i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
12521262
def int_ppc_vsx_xxeval :
12531263
PowerPC_VSX_Intrinsic<"xxeval", [llvm_v2i64_ty],
12541264
[llvm_v2i64_ty, llvm_v2i64_ty,

llvm/lib/Target/PowerPC/PPCInstrVSX.td

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2591,6 +2591,16 @@ def : Pat<(int_ppc_vsx_xvdivsp v4f32:$A, v4f32:$B),
25912591
def : Pat<(int_ppc_vsx_xvdivdp v2f64:$A, v2f64:$B),
25922592
(XVDIVDP $A, $B)>;
25932593

2594+
// Vector test for software divide and sqrt.
2595+
def : Pat<(i32 (int_ppc_vsx_xvtdivdp v2f64:$A, v2f64:$B)),
2596+
(COPY_TO_REGCLASS (XVTDIVDP $A, $B), GPRC)>;
2597+
def : Pat<(i32 (int_ppc_vsx_xvtdivsp v4f32:$A, v4f32:$B)),
2598+
(COPY_TO_REGCLASS (XVTDIVSP $A, $B), GPRC)>;
2599+
def : Pat<(i32 (int_ppc_vsx_xvtsqrtdp v2f64:$A)),
2600+
(COPY_TO_REGCLASS (XVTSQRTDP $A), GPRC)>;
2601+
def : Pat<(i32 (int_ppc_vsx_xvtsqrtsp v4f32:$A)),
2602+
(COPY_TO_REGCLASS (XVTSQRTSP $A), GPRC)>;
2603+
25942604
// Reciprocal estimate
25952605
def : Pat<(int_ppc_vsx_xvresp v4f32:$A),
25962606
(XVRESP $A)>;

llvm/test/CodeGen/PowerPC/vsx_builtins.ll

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,3 +54,55 @@ define void @test4(<2 x double> %a, i8* %b) {
5454
}
5555
; Function Attrs: nounwind readnone
5656
declare void @llvm.ppc.vsx.stxvd2x.be(<2 x double>, i8*)
57+
58+
define i32 @test_vec_test_swdiv(<2 x double> %a, <2 x double> %b) {
59+
; CHECK-LABEL: test_vec_test_swdiv:
60+
; CHECK: # %bb.0: # %entry
61+
; CHECK-NEXT: xvtdivdp cr0, v2, v3
62+
; CHECK-NEXT: mfocrf r3, 128
63+
; CHECK-NEXT: srwi r3, r3, 28
64+
; CHECK-NEXT: blr
65+
entry:
66+
%0 = tail call i32 @llvm.ppc.vsx.xvtdivdp(<2 x double> %a, <2 x double> %b)
67+
ret i32 %0
68+
}
69+
declare i32 @llvm.ppc.vsx.xvtdivdp(<2 x double>, <2 x double>)
70+
71+
define i32 @test_vec_test_swdivs(<4 x float> %a, <4 x float> %b) {
72+
; CHECK-LABEL: test_vec_test_swdivs:
73+
; CHECK: # %bb.0: # %entry
74+
; CHECK-NEXT: xvtdivsp cr0, v2, v3
75+
; CHECK-NEXT: mfocrf r3, 128
76+
; CHECK-NEXT: srwi r3, r3, 28
77+
; CHECK-NEXT: blr
78+
entry:
79+
%0 = tail call i32 @llvm.ppc.vsx.xvtdivsp(<4 x float> %a, <4 x float> %b)
80+
ret i32 %0
81+
}
82+
declare i32 @llvm.ppc.vsx.xvtdivsp(<4 x float>, <4 x float>)
83+
84+
define i32 @test_vec_test_swsqrt(<2 x double> %a) {
85+
; CHECK-LABEL: test_vec_test_swsqrt:
86+
; CHECK: # %bb.0: # %entry
87+
; CHECK-NEXT: xvtsqrtdp cr0, v2
88+
; CHECK-NEXT: mfocrf r3, 128
89+
; CHECK-NEXT: srwi r3, r3, 28
90+
; CHECK-NEXT: blr
91+
entry:
92+
%0 = tail call i32 @llvm.ppc.vsx.xvtsqrtdp(<2 x double> %a)
93+
ret i32 %0
94+
}
95+
declare i32 @llvm.ppc.vsx.xvtsqrtdp(<2 x double>)
96+
97+
define i32 @test_vec_test_swsqrts(<4 x float> %a) {
98+
; CHECK-LABEL: test_vec_test_swsqrts:
99+
; CHECK: # %bb.0: # %entry
100+
; CHECK-NEXT: xvtsqrtsp cr0, v2
101+
; CHECK-NEXT: mfocrf r3, 128
102+
; CHECK-NEXT: srwi r3, r3, 28
103+
; CHECK-NEXT: blr
104+
entry:
105+
%0 = tail call i32 @llvm.ppc.vsx.xvtsqrtsp(<4 x float> %a)
106+
ret i32 %0
107+
}
108+
declare i32 @llvm.ppc.vsx.xvtsqrtsp(<4 x float>)

0 commit comments

Comments
 (0)