Skip to content

Commit 2c7688c

Browse files
committed
Implement mffsl in pre-Power9 targets
1 parent 2d62858 commit 2c7688c

File tree

7 files changed

+104
-40
lines changed

7 files changed

+104
-40
lines changed

clang/include/clang/Basic/BuiltinsPPC.def

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ TARGET_BUILTIN(__builtin_ppc_extract_sig, "ULLid", "", "power9-vector")
152152
BUILTIN(__builtin_ppc_mtfsb0, "vUIi", "")
153153
BUILTIN(__builtin_ppc_mtfsb1, "vUIi", "")
154154
BUILTIN(__builtin_ppc_mffs, "d", "")
155-
TARGET_BUILTIN(__builtin_ppc_mffsl, "d", "", "isa-v30-instructions")
155+
TARGET_BUILTIN(__builtin_ppc_mffsl, "d", "", "")
156156
BUILTIN(__builtin_ppc_mtfsf, "vUIiUi", "")
157157
BUILTIN(__builtin_ppc_mtfsfi, "vUIiUIi", "")
158158
BUILTIN(__builtin_ppc_set_fpscr_rn, "di", "")

clang/lib/Headers/ppc_wrappers/smmintrin.h

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -83,12 +83,7 @@ extern __inline __m128d
8383

8484
switch (__rounding) {
8585
case _MM_FROUND_TO_NEAREST_INT:
86-
#ifdef _ARCH_PWR9
8786
__fpscr_save.__fr = __builtin_ppc_mffsl();
88-
#else
89-
__fpscr_save.__fr = __builtin_ppc_mffs();
90-
__fpscr_save.__fpscr &= 0x70007f0ffL;
91-
#endif
9287
__attribute__((fallthrough));
9388
case _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC:
9489
__builtin_ppc_set_fpscr_rn(0b00);
@@ -133,12 +128,7 @@ extern __inline __m128d
133128
*/
134129
__asm__("" : : "wa"(__r));
135130
/* Restore enabled exceptions. */
136-
#ifdef _ARCH_PWR9
137131
__fpscr_save.__fr = __builtin_ppc_mffsl();
138-
#else
139-
__fpscr_save.__fr = __builtin_ppc_mffs();
140-
__fpscr_save.__fpscr &= 0x70007f0ffL;
141-
#endif
142132
__fpscr_save.__fpscr |= __enables_save.__fpscr;
143133
__builtin_ppc_mtfsf(0b00000011, __fpscr_save.__fr);
144134
}
@@ -184,12 +174,7 @@ extern __inline __m128
184174

185175
switch (__rounding) {
186176
case _MM_FROUND_TO_NEAREST_INT:
187-
#ifdef _ARCH_PWR9
188177
__fpscr_save.__fr = __builtin_ppc_mffsl();
189-
#else
190-
__fpscr_save.__fr = __builtin_ppc_mffs();
191-
__fpscr_save.__fpscr &= 0x70007f0ffL;
192-
#endif
193178
__attribute__((fallthrough));
194179
case _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC:
195180
__builtin_ppc_set_fpscr_rn(0b00);
@@ -234,12 +219,7 @@ extern __inline __m128
234219
*/
235220
__asm__("" : : "wa"(__r));
236221
/* Restore enabled exceptions. */
237-
#ifdef _ARCH_PWR9
238222
__fpscr_save.__fr = __builtin_ppc_mffsl();
239-
#else
240-
__fpscr_save.__fr = __builtin_ppc_mffs();
241-
__fpscr_save.__fpscr &= 0x70007f0ffL;
242-
#endif
243223
__fpscr_save.__fpscr |= __enables_save.__fpscr;
244224
__builtin_ppc_mtfsf(0b00000011, __fpscr_save.__fr);
245225
}

clang/test/CodeGen/PowerPC/builtins-ppc.c

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
// REQUIRES: powerpc-registered-target
22
// RUN: %clang_cc1 -triple powerpc-unknown-unknown -emit-llvm %s -o - \
33
// RUN: | FileCheck %s
4-
// RUN: %clang_cc1 -triple powerpc-unknown-unknown -emit-llvm %s -o - \
5-
// RUN: -target-cpu pwr9 | FileCheck %s --check-prefixes=P9,CHECK
64

75
void test_eh_return_data_regno()
86
{
@@ -45,10 +43,8 @@ void test_builtin_ppc_flm() {
4543
// CHECK: call double @llvm.ppc.setflm(double %2)
4644
res = __builtin_setflm(res);
4745

48-
#ifdef _ARCH_PWR9
49-
// P9: call double @llvm.ppc.mffsl()
46+
// CHECK: call double @llvm.ppc.mffsl()
5047
res = __builtin_ppc_mffsl();
51-
#endif
5248
}
5349

5450
double test_builtin_unpack_ldbl(long double x) {

clang/test/CodeGen/PowerPC/ppc-smmintrin.c

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -247,8 +247,7 @@ test_round() {
247247
// CHECK: call double @llvm.ppc.readflm()
248248
// CHECK: call void @llvm.ppc.mtfsf(i32 3, double %{{[0-9a-zA-Z_.]+}})
249249
// CHECK: %{{[0-9a-zA-Z_.]+}} = call <4 x float> asm "", "=^wa,0"
250-
// CHECK: call double @llvm.ppc.readflm()
251-
// P10: call double @llvm.ppc.mffsl()
250+
// CHECK: call double @llvm.ppc.mffsl()
252251
// CHECK: call double @llvm.ppc.setrnd(i32 0)
253252
// CHECK: %{{[0-9a-zA-Z_.]+}} = call <4 x float> asm "", "=^wa,0"
254253
// CHECK: call <4 x float> @vec_rint(float vector[4])
@@ -259,8 +258,7 @@ test_round() {
259258
// CHECK: call <4 x float> @vec_trunc(float vector[4])
260259
// CHECK: call <4 x float> @vec_rint(float vector[4])
261260
// CHECK: call void asm sideeffect "", "^wa"
262-
// CHECK: call double @llvm.ppc.readflm()
263-
// P10: call double @llvm.ppc.mffsl()
261+
// CHECK: call double @llvm.ppc.mffsl()
264262
// CHECK: call void @llvm.ppc.mtfsf(i32 3, double %{{[0-9a-zA-Z_.]+}})
265263

266264
// CHECK-LABEL: define available_externally <4 x float> @_mm_round_ss(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef signext %{{[0-9a-zA-Z_.]+}})
@@ -271,8 +269,7 @@ test_round() {
271269
// CHECK: call double @llvm.ppc.readflm()
272270
// CHECK: call void @llvm.ppc.mtfsf(i32 3, double %{{[0-9a-zA-Z_.]+}})
273271
// CHECK: %{{[0-9a-zA-Z_.]+}} = call <2 x double> asm "", "=^wa,0"
274-
// CHECK: call double @llvm.ppc.readflm()
275-
// P10: call double @llvm.ppc.mffsl()
272+
// CHECK: call double @llvm.ppc.mffsl()
276273
// CHECK: call double @llvm.ppc.setrnd(i32 0)
277274
// CHECK: %{{[0-9a-zA-Z_.]+}} = call <2 x double> asm "", "=^wa,0"
278275
// CHECK: call <2 x double> @vec_rint(double vector[2])
@@ -283,8 +280,7 @@ test_round() {
283280
// CHECK: call <2 x double> @vec_trunc(double vector[2])
284281
// CHECK: call <2 x double> @vec_rint(double vector[2])
285282
// CHECK: call void asm sideeffect "", "^wa"
286-
// CHECK: call double @llvm.ppc.readflm()
287-
// P10: call double @llvm.ppc.mffsl()
283+
// CHECK: call double @llvm.ppc.mffsl()
288284
// CHECK: call void @llvm.ppc.mtfsf(i32 3, double %{{[0-9a-zA-Z_.]+}})
289285

290286
// CHECK-LABEL: define available_externally <2 x double> @_mm_round_sd(<2 x double> noundef %{{[0-9a-zA-Z_.]+}}, <2 x double> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef signext %{{[0-9a-zA-Z_.]+}})

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Lines changed: 51 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -646,8 +646,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
646646
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom);
647647
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v2f64, Custom);
648648

649-
// To handle counter-based loop conditions.
650649
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom);
650+
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
651651

652652
setOperationAction(ISD::INTRINSIC_VOID, MVT::i8, Custom);
653653
setOperationAction(ISD::INTRINSIC_VOID, MVT::i16, Custom);
@@ -11595,6 +11595,50 @@ SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
1159511595
llvm_unreachable("ERROR:Should return for all cases within swtich.");
1159611596
}
1159711597

11598+
// Lower mffsl intrinsic with mffs in targets without ISA 3.0
11599+
static SDValue lowerMFFSL(SDValue Op, SelectionDAG &DAG,
11600+
const PPCSubtarget &Subtarget) {
11601+
assert(cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue() ==
11602+
Intrinsic::ppc_mffsl &&
11603+
"Should only be called on int_ppc_mffsl");
11604+
if (Subtarget.isISA3_0())
11605+
return Op;
11606+
11607+
SDLoc dl(Op);
11608+
SDValue Chain = Op.getOperand(0);
11609+
SDValue MFFS = DAG.getNode(PPCISD::MFFS, dl, {MVT::f64, MVT::Other}, Chain);
11610+
Chain = MFFS.getValue(1);
11611+
11612+
if (Subtarget.isPPC64()) {
11613+
SDValue Int = DAG.getNode(ISD::BITCAST, dl, MVT::i64, MFFS);
11614+
// Mask 29-31, 45-51 and 56-63 bits
11615+
SDValue Masked = DAG.getNode(ISD::AND, dl, MVT::i64, Int,
11616+
DAG.getConstant(0x70007f0ffULL, dl, MVT::i64));
11617+
SDValue Cast = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Masked);
11618+
return DAG.getMergeValues({Cast, Chain}, dl);
11619+
}
11620+
11621+
MachineFunction &MF = DAG.getMachineFunction();
11622+
MachinePointerInfo PtrInfo;
11623+
int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
11624+
SDValue Base = DAG.getFrameIndex(SSFI, MVT::i32);
11625+
Chain = DAG.getStore(Chain, dl, MFFS, Base, PtrInfo);
11626+
11627+
assert(!Subtarget.isLittleEndian() && "32-bit little endian is unsupported!");
11628+
SDValue Offset4 = DAG.getNode(ISD::ADD, dl, MVT::i32, Base,
11629+
DAG.getConstant(4, dl, MVT::i32));
11630+
SDValue Hi = DAG.getLoad(MVT::i32, dl, Chain, Base, PtrInfo);
11631+
SDValue Lo = DAG.getLoad(MVT::i32, dl, Hi.getValue(1), Offset4, PtrInfo);
11632+
Chain = Lo.getValue(1);
11633+
Hi =
11634+
DAG.getNode(ISD::AND, dl, MVT::i32, Hi, DAG.getConstant(7, dl, MVT::i32));
11635+
Lo = DAG.getNode(ISD::AND, dl, MVT::i32, Lo,
11636+
DAG.getConstant(0x7f0ffULL, dl, MVT::i32));
11637+
Chain = DAG.getStore(Chain, dl, Hi, Base, PtrInfo);
11638+
Chain = DAG.getStore(Chain, dl, Lo, Offset4, PtrInfo);
11639+
return DAG.getLoad(MVT::f64, dl, Chain, Base, PtrInfo);
11640+
}
11641+
1159811642
/// LowerOperation - Provide custom lowering hooks for some operations.
1159911643
///
1160011644
SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
@@ -11669,8 +11713,12 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
1166911713
return LowerFP_ROUND(Op, DAG);
1167011714
case ISD::ROTL: return LowerROTL(Op, DAG);
1167111715

11672-
// For counter-based loop handling.
11673-
case ISD::INTRINSIC_W_CHAIN: return SDValue();
11716+
case ISD::INTRINSIC_W_CHAIN: {
11717+
if (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue() ==
11718+
Intrinsic::ppc_mffsl)
11719+
return lowerMFFSL(Op, DAG, Subtarget);
11720+
return SDValue();
11721+
}
1167411722

1167511723
case ISD::BITCAST: return LowerBITCAST(Op, DAG);
1167611724

llvm/lib/Target/PowerPC/PPCInstrInfo.td

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3188,7 +3188,6 @@ def : Pat<(PPCtc_return CTRRC:$dst, imm:$imm),
31883188
(TCRETURNri CTRRC:$dst, imm:$imm)>;
31893189

31903190
def : Pat<(int_ppc_readflm), (MFFS)>;
3191-
def : Pat<(int_ppc_mffsl), (MFFSL)>;
31923191

31933192
// Hi and Lo for Darwin Global Addresses.
31943193
def : Pat<(PPChi tglobaladdr:$in, 0), (LIS tglobaladdr:$in)>;
@@ -4510,6 +4509,9 @@ def : Pat<(int_ppc_dcbfl xoaddr:$dst),
45104509
def : Pat<(int_ppc_dcbflp xoaddr:$dst),
45114510
(DCBF 3, xoaddr:$dst)>;
45124511

4512+
let Predicates = [IsISA3_0] in
4513+
def : Pat<(int_ppc_mffsl), (MFFSL)>;
4514+
45134515
let Predicates = [IsISA3_1] in {
45144516
def DCBFPS : PPCAsmPseudo<"dcbfps $dst", (ins memrr:$dst)>;
45154517
def DCBSTPS : PPCAsmPseudo<"dcbstps $dst", (ins memrr:$dst)>;

llvm/test/CodeGen/PowerPC/read-set-flm.ll

Lines changed: 44 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
1-
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
21
; RUN: llc < %s -mtriple powerpc64le-unknown-linux | FileCheck %s
2+
; RUN: llc < %s -mtriple powerpc64le-unknown-linux -mcpu=pwr9 | FileCheck %s \
3+
; RUN: --check-prefix=P9
4+
; RUN: llc < %s -mtriple powerpc64-ibm-aix | FileCheck %s --check-prefix=BE
5+
; RUN: llc < %s -mtriple powerpc-ibm-aix | FileCheck %s --check-prefix=BE32
36
; RUN: llc < %s -mtriple powerpc64le-unknown-linux -debug-only=machine-scheduler \
47
; RUN: 2>&1 | FileCheck %s --check-prefix=LOG
58
; REQUIRES: asserts
@@ -151,8 +154,47 @@ entry:
151154
define double @mffsl() {
152155
; CHECK-LABEL: mffsl:
153156
; CHECK: # %bb.0: # %entry
154-
; CHECK-NEXT: mffsl 1
157+
; CHECK-NEXT: mffs 0
158+
; CHECK-NEXT: lis 4, -8192
159+
; CHECK-NEXT: mffprd 3, 0
160+
; CHECK-NEXT: ori 4, 4, 65055
161+
; CHECK-NEXT: rldicl 4, 4, 3, 29
162+
; CHECK-NEXT: and 3, 3, 4
163+
; CHECK-NEXT: mtfprd 1, 3
155164
; CHECK-NEXT: blr
165+
;
166+
; P9-LABEL: mffsl:
167+
; P9: # %bb.0: # %entry
168+
; P9-NEXT: mffsl 1
169+
; P9-NEXT: blr
170+
;
171+
; BE-LABEL: mffsl:
172+
; BE: # %bb.0: # %entry
173+
; BE-NEXT: mffs 0
174+
; BE-NEXT: stfd 0, -16(1)
175+
; BE-NEXT: ld 3, -16(1)
176+
; BE-NEXT: lis 4, -8192
177+
; BE-NEXT: ori 4, 4, 65055
178+
; BE-NEXT: rldicl 4, 4, 3, 29
179+
; BE-NEXT: and 3, 3, 4
180+
; BE-NEXT: std 3, -8(1)
181+
; BE-NEXT: lfd 1, -8(1)
182+
; BE-NEXT: blr
183+
;
184+
; BE32-LABEL: mffsl:
185+
; BE32: # %bb.0: # %entry
186+
; BE32-NEXT: mffs 0
187+
; BE32-NEXT: stfd 0, -8(1)
188+
; BE32-NEXT: lis 4, 7
189+
; BE32-NEXT: lwz 3, -4(1)
190+
; BE32-NEXT: ori 4, 4, 61695
191+
; BE32-NEXT: lwz 5, -8(1)
192+
; BE32-NEXT: and 3, 3, 4
193+
; BE32-NEXT: stw 3, -4(1)
194+
; BE32-NEXT: clrlwi 3, 5, 29
195+
; BE32-NEXT: stw 3, -8(1)
196+
; BE32-NEXT: lfd 1, -8(1)
197+
; BE32-NEXT: blr
156198
entry:
157199
%x = call double @llvm.ppc.mffsl()
158200
ret double %x

0 commit comments

Comments
 (0)