Skip to content

Commit 3136cbe

Browse files
committed
[PowerPC] Implement Vector Shift Builtins
This patch implements the builtins for the vector shifts (shl, srl, sra), and adds the appropriate test cases for these builtins. The builtins utilize the vector shift instructions introduced within ISA 3.1. Differential Revision: https://reviews.llvm.org/D83338
1 parent a31c89c commit 3136cbe

File tree

5 files changed

+180
-0
lines changed

5 files changed

+180
-0
lines changed

clang/lib/Headers/altivec.h

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17321,6 +17321,53 @@ vec_test_lsbb_all_zeros(vector unsigned char __a) {
1732117321
return __builtin_vsx_xvtlsbb(__a, 0);
1732217322
}
1732317323
#endif /* __VSX__ */
17324+
17325+
/* vs[l | r | ra] */
17326+
17327+
static __inline__ vector unsigned __int128 __ATTRS_o_ai
17328+
vec_sl(vector unsigned __int128 __a, vector unsigned __int128 __b) {
17329+
return __a << (__b % (vector unsigned __int128)(sizeof(unsigned __int128) *
17330+
__CHAR_BIT__));
17331+
}
17332+
17333+
static __inline__ vector signed __int128 __ATTRS_o_ai
17334+
vec_sl(vector signed __int128 __a, vector unsigned __int128 __b) {
17335+
return __a << (__b % (vector unsigned __int128)(sizeof(unsigned __int128) *
17336+
__CHAR_BIT__));
17337+
}
17338+
17339+
static __inline__ vector unsigned __int128 __ATTRS_o_ai
17340+
vec_sr(vector unsigned __int128 __a, vector unsigned __int128 __b) {
17341+
return __a >> (__b % (vector unsigned __int128)(sizeof(unsigned __int128) *
17342+
__CHAR_BIT__));
17343+
}
17344+
17345+
static __inline__ vector signed __int128 __ATTRS_o_ai
17346+
vec_sr(vector signed __int128 __a, vector unsigned __int128 __b) {
17347+
return (
17348+
vector signed __int128)(((vector unsigned __int128)__a) >>
17349+
(__b %
17350+
(vector unsigned __int128)(sizeof(
17351+
unsigned __int128) *
17352+
__CHAR_BIT__)));
17353+
}
17354+
17355+
static __inline__ vector unsigned __int128 __ATTRS_o_ai
17356+
vec_sra(vector unsigned __int128 __a, vector unsigned __int128 __b) {
17357+
return (
17358+
vector unsigned __int128)(((vector signed __int128)__a) >>
17359+
(__b %
17360+
(vector unsigned __int128)(sizeof(
17361+
unsigned __int128) *
17362+
__CHAR_BIT__)));
17363+
}
17364+
17365+
static __inline__ vector signed __int128 __ATTRS_o_ai
17366+
vec_sra(vector signed __int128 __a, vector unsigned __int128 __b) {
17367+
return __a >> (__b % (vector unsigned __int128)(sizeof(unsigned __int128) *
17368+
__CHAR_BIT__));
17369+
}
17370+
1732417371
#endif /* __POWER10_VECTOR__ */
1732517372

1732617373
#undef __ATTRS_o_ai

clang/test/CodeGen/builtins-ppc-p10vector.c

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
#include <altivec.h>
1010

11+
vector signed __int128 vi128a;
1112
vector signed char vsca, vscb;
1213
vector unsigned char vuca, vucb, vucc;
1314
vector signed short vssa, vssb;
@@ -778,6 +779,49 @@ void test_vec_xst_trunc_ull(vector unsigned __int128 __a, signed long long __b,
778779
vec_xst_trunc(__a, __b, __c);
779780
}
780781

782+
vector unsigned __int128 test_vec_slq_unsigned (void) {
783+
// CHECK-LABEL: test_vec_slq_unsigned
784+
// CHECK: shl <1 x i128> %{{.+}}, %{{.+}}
785+
// CHECK: ret <1 x i128> %{{.+}}
786+
return vec_sl(vui128a, vui128b);
787+
}
788+
789+
vector signed __int128 test_vec_slq_signed (void) {
790+
// CHECK-LABEL: test_vec_slq_signed
791+
// CHECK: shl <1 x i128> %{{.+}}, %{{.+}}
792+
// CHECK: ret <1 x i128>
793+
return vec_sl(vi128a, vui128a);
794+
}
795+
796+
vector unsigned __int128 test_vec_srq_unsigned (void) {
797+
// CHECK-LABEL: test_vec_srq_unsigned
798+
// CHECK: lshr <1 x i128> %{{.+}}, %{{.+}}
799+
// CHECK: ret <1 x i128>
800+
return vec_sr(vui128a, vui128b);
801+
}
802+
803+
vector signed __int128 test_vec_srq_signed (void) {
804+
// CHECK-LABEL: test_vec_srq_signed
805+
// CHECK: lshr <1 x i128> %{{.+}}, %{{.+}}
806+
// CHECK: ret <1 x i128>
807+
return vec_sr(vi128a, vui128a);
808+
}
809+
810+
vector unsigned __int128 test_vec_sraq_unsigned (void) {
811+
// CHECK-LABEL: test_vec_sraq_unsigned
812+
// CHECK: ashr <1 x i128> %{{.+}}, %{{.+}}
813+
// CHECK: ret <1 x i128>
814+
return vec_sra(vui128a, vui128b);
815+
}
816+
817+
vector signed __int128 test_vec_sraq_signed (void) {
818+
// CHECK-LABEL: test_vec_sraq_signed
819+
// CHECK: ashr <1 x i128> %{{.+}}, %{{.+}}
820+
// CHECK: ret <1 x i128>
821+
return vec_sra(vi128a, vui128a);
822+
}
823+
824+
781825
int test_vec_test_lsbb_all_ones(void) {
782826
// CHECK: @llvm.ppc.vsx.xvtlsbb(<16 x i8> %{{.+}}, i32 1
783827
// CHECK-NEXT: ret i32

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1128,6 +1128,9 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
11281128
if (Subtarget.has64BitSupport())
11291129
setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
11301130

1131+
if (Subtarget.isISA3_1())
1132+
setOperationAction(ISD::SRA, MVT::v1i128, Legal);
1133+
11311134
setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);
11321135

11331136
if (!isPPC64) {

llvm/lib/Target/PowerPC/PPCInstrPrefix.td

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1288,6 +1288,18 @@ let Predicates = [IsISA3_1] in {
12881288
(EXTRACT_SUBREG (XVTLSBB (COPY_TO_REGCLASS $XB, VSRC)), sub_lt)>;
12891289
def : Pat<(i32 (int_ppc_vsx_xvtlsbb v16i8:$XB, 0)),
12901290
(EXTRACT_SUBREG (XVTLSBB (COPY_TO_REGCLASS $XB, VSRC)), sub_eq)>;
1291+
def : Pat<(v1i128 (shl v1i128:$VRA, v1i128:$VRB)),
1292+
(v1i128 (VSLQ v1i128:$VRA, v1i128:$VRB))>;
1293+
def : Pat<(v1i128 (PPCshl v1i128:$VRA, v1i128:$VRB)),
1294+
(v1i128 (VSLQ v1i128:$VRA, v1i128:$VRB))>;
1295+
def : Pat<(v1i128 (srl v1i128:$VRA, v1i128:$VRB)),
1296+
(v1i128 (VSRQ v1i128:$VRA, v1i128:$VRB))>;
1297+
def : Pat<(v1i128 (PPCsrl v1i128:$VRA, v1i128:$VRB)),
1298+
(v1i128 (VSRQ v1i128:$VRA, v1i128:$VRB))>;
1299+
def : Pat<(v1i128 (sra v1i128:$VRA, v1i128:$VRB)),
1300+
(v1i128 (VSRAQ v1i128:$VRA, v1i128:$VRB))>;
1301+
def : Pat<(v1i128 (PPCsra v1i128:$VRA, v1i128:$VRB)),
1302+
(v1i128 (VSRAQ v1i128:$VRA, v1i128:$VRB))>;
12911303
}
12921304

12931305
let AddedComplexity = 400, Predicates = [IsISA3_1] in {
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
3+
; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
4+
; RUN: FileCheck %s
5+
6+
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
7+
; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
8+
; RUN: FileCheck %s
9+
10+
; These test cases demonstrate that the vector shift quadword instructions
11+
; introduced within Power10 are correctly exploited.
12+
13+
define dso_local <1 x i128> @test_vec_vslq(<1 x i128> %a, <1 x i128> %b) {
14+
; CHECK-LABEL: test_vec_vslq:
15+
; CHECK: # %bb.0: # %entry
16+
; CHECK-NEXT: vslq v2, v2, v3
17+
; CHECK-NEXT: blr
18+
entry:
19+
%rem = urem <1 x i128> %b, <i128 128>
20+
%shl = shl <1 x i128> %a, %rem
21+
ret <1 x i128> %shl
22+
}
23+
24+
define dso_local <1 x i128> @test_vec_vsrq(<1 x i128> %a, <1 x i128> %b) {
25+
; CHECK-LABEL: test_vec_vsrq:
26+
; CHECK: # %bb.0: # %entry
27+
; CHECK-NEXT: vsrq v2, v2, v3
28+
; CHECK-NEXT: blr
29+
entry:
30+
%rem = urem <1 x i128> %b, <i128 128>
31+
%shr = lshr <1 x i128> %a, %rem
32+
ret <1 x i128> %shr
33+
}
34+
35+
define dso_local <1 x i128> @test_vec_vsraq(<1 x i128> %a, <1 x i128> %b) {
36+
; CHECK-LABEL: test_vec_vsraq:
37+
; CHECK: # %bb.0: # %entry
38+
; CHECK-NEXT: vsraq v2, v2, v3
39+
; CHECK-NEXT: blr
40+
entry:
41+
%rem = urem <1 x i128> %b, <i128 128>
42+
%shr = ashr <1 x i128> %a, %rem
43+
ret <1 x i128> %shr
44+
}
45+
46+
define dso_local <1 x i128> @test_vec_vslq2(<1 x i128> %a, <1 x i128> %b) {
47+
; CHECK-LABEL: test_vec_vslq2:
48+
; CHECK: # %bb.0: # %entry
49+
; CHECK-NEXT: vslq v2, v2, v3
50+
; CHECK-NEXT: blr
51+
entry:
52+
%shl = shl <1 x i128> %a, %b
53+
ret <1 x i128> %shl
54+
}
55+
56+
define dso_local <1 x i128> @test_vec_vsrq2(<1 x i128> %a, <1 x i128> %b) {
57+
; CHECK-LABEL: test_vec_vsrq2:
58+
; CHECK: # %bb.0: # %entry
59+
; CHECK-NEXT: vsrq v2, v2, v3
60+
; CHECK-NEXT: blr
61+
entry:
62+
%shr = lshr <1 x i128> %a, %b
63+
ret <1 x i128> %shr
64+
}
65+
66+
define dso_local <1 x i128> @test_vec_vsraq2(<1 x i128> %a, <1 x i128> %b) {
67+
; CHECK-LABEL: test_vec_vsraq2:
68+
; CHECK: # %bb.0: # %entry
69+
; CHECK-NEXT: vsraq v2, v2, v3
70+
; CHECK-NEXT: blr
71+
entry:
72+
%shr = ashr <1 x i128> %a, %b
73+
ret <1 x i128> %shr
74+
}

0 commit comments

Comments
 (0)