Skip to content

Commit 581a803

Browse files
committed
[PowerPC] Disable CTR Loop generate for fma with the PPC double double type.
It is possible to generate the llvm.fmuladd.ppcf128 intrinsic, and there is no actual FMA instruction that corresponds to this intrinsic call for ppcf128. Thus, this intrinsic needs to remain as a call as it cannot be lowered to any instruction, which also means we need to disable CTR loop generation for fma involving the ppcf128 type. This patch accomplishes this behaviour. Differential Revision: https://reviews.llvm.org/D107914
1 parent 571b0d8 commit 581a803

File tree

2 files changed

+116
-0
lines changed

2 files changed

+116
-0
lines changed

llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -485,6 +485,9 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo,
485485
case Intrinsic::experimental_constrained_sin:
486486
case Intrinsic::experimental_constrained_cos:
487487
return true;
488+
// There is no corresponding FMA instruction for PPC double double.
489+
// Thus, we need to disable CTR loop generation for this type.
490+
case Intrinsic::fmuladd:
488491
case Intrinsic::copysign:
489492
if (CI->getArgOperand(0)->getType()->getScalarType()->
490493
isPPC_FP128Ty())
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-asm-full-reg-names \
3+
; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=LE
4+
; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-asm-full-reg-names \
5+
; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9BE
6+
; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-asm-full-reg-names \
7+
; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=LE
8+
; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-asm-full-reg-names \
9+
; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8BE
10+
11+
declare ppc_fp128 @llvm.fmuladd.ppcf128(ppc_fp128, ppc_fp128, ppc_fp128) #2
12+
13+
define ppc_fp128 @test_ctr0() {
14+
; LE-LABEL: test_ctr0:
15+
; LE: # %bb.0: # %bb
16+
; LE-NEXT: mflr r0
17+
; LE-NEXT: .cfi_def_cfa_offset 48
18+
; LE-NEXT: .cfi_offset lr, 16
19+
; LE-NEXT: .cfi_offset r30, -16
20+
; LE-NEXT: std r30, -16(r1) # 8-byte Folded Spill
21+
; LE-NEXT: std r0, 16(r1)
22+
; LE-NEXT: stdu r1, -48(r1)
23+
; LE-NEXT: xxlxor f1, f1, f1
24+
; LE-NEXT: li r30, 0
25+
; LE-NEXT: xxlxor f2, f2, f2
26+
; LE-NEXT: .p2align 5
27+
; LE-NEXT: .LBB0_1: # %bb6
28+
; LE-NEXT: #
29+
; LE-NEXT: xxlxor f3, f3, f3
30+
; LE-NEXT: xxlxor f4, f4, f4
31+
; LE-NEXT: bl __gcc_qadd
32+
; LE-NEXT: nop
33+
; LE-NEXT: addi r30, r30, 4
34+
; LE-NEXT: cmpldi r30, 0
35+
; LE-NEXT: bne cr0, .LBB0_1
36+
; LE-NEXT: # %bb.2: # %bb14
37+
; LE-NEXT: addi r1, r1, 48
38+
; LE-NEXT: ld r0, 16(r1)
39+
; LE-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
40+
; LE-NEXT: mtlr r0
41+
; LE-NEXT: blr
42+
;
43+
; P9BE-LABEL: test_ctr0:
44+
; P9BE: # %bb.0: # %bb
45+
; P9BE-NEXT: mflr r0
46+
; P9BE-NEXT: std r0, 16(r1)
47+
; P9BE-NEXT: stdu r1, -128(r1)
48+
; P9BE-NEXT: .cfi_def_cfa_offset 128
49+
; P9BE-NEXT: .cfi_offset lr, 16
50+
; P9BE-NEXT: .cfi_offset r30, -16
51+
; P9BE-NEXT: std r30, 112(r1) # 8-byte Folded Spill
52+
; P9BE-NEXT: xxlxor f1, f1, f1
53+
; P9BE-NEXT: li r30, 0
54+
; P9BE-NEXT: xxlxor f2, f2, f2
55+
; P9BE-NEXT: .p2align 5
56+
; P9BE-NEXT: .LBB0_1: # %bb6
57+
; P9BE-NEXT: #
58+
; P9BE-NEXT: xxlxor f3, f3, f3
59+
; P9BE-NEXT: xxlxor f4, f4, f4
60+
; P9BE-NEXT: bl __gcc_qadd
61+
; P9BE-NEXT: nop
62+
; P9BE-NEXT: addi r30, r30, 4
63+
; P9BE-NEXT: cmpldi r30, 0
64+
; P9BE-NEXT: bne cr0, .LBB0_1
65+
; P9BE-NEXT: # %bb.2: # %bb14
66+
; P9BE-NEXT: ld r30, 112(r1) # 8-byte Folded Reload
67+
; P9BE-NEXT: addi r1, r1, 128
68+
; P9BE-NEXT: ld r0, 16(r1)
69+
; P9BE-NEXT: mtlr r0
70+
; P9BE-NEXT: blr
71+
;
72+
; P8BE-LABEL: test_ctr0:
73+
; P8BE: # %bb.0: # %bb
74+
; P8BE-NEXT: mflr r0
75+
; P8BE-NEXT: std r0, 16(r1)
76+
; P8BE-NEXT: stdu r1, -128(r1)
77+
; P8BE-NEXT: .cfi_def_cfa_offset 128
78+
; P8BE-NEXT: .cfi_offset lr, 16
79+
; P8BE-NEXT: .cfi_offset r30, -16
80+
; P8BE-NEXT: xxlxor f1, f1, f1
81+
; P8BE-NEXT: std r30, 112(r1) # 8-byte Folded Spill
82+
; P8BE-NEXT: li r30, 0
83+
; P8BE-NEXT: xxlxor f2, f2, f2
84+
; P8BE-NEXT: .p2align 5
85+
; P8BE-NEXT: .LBB0_1: # %bb6
86+
; P8BE-NEXT: #
87+
; P8BE-NEXT: xxlxor f3, f3, f3
88+
; P8BE-NEXT: xxlxor f4, f4, f4
89+
; P8BE-NEXT: bl __gcc_qadd
90+
; P8BE-NEXT: nop
91+
; P8BE-NEXT: addi r30, r30, 4
92+
; P8BE-NEXT: cmpldi r30, 0
93+
; P8BE-NEXT: bne cr0, .LBB0_1
94+
; P8BE-NEXT: # %bb.2: # %bb14
95+
; P8BE-NEXT: ld r30, 112(r1) # 8-byte Folded Reload
96+
; P8BE-NEXT: addi r1, r1, 128
97+
; P8BE-NEXT: ld r0, 16(r1)
98+
; P8BE-NEXT: mtlr r0
99+
; P8BE-NEXT: blr
100+
bb:
101+
br label %bb6
102+
103+
bb6: ; preds = %bb6, %bb
104+
%i = phi ppc_fp128 [ %i8, %bb6 ], [ 0xM00000000000000000000000000000000, %bb ]
105+
%i7 = phi i64 [ %i9, %bb6 ], [ 0, %bb ]
106+
%i8 = tail call ppc_fp128 @llvm.fmuladd.ppcf128(ppc_fp128 0xM00000000000000000000000000000000, ppc_fp128 0xM00000000000000000000000000000000, ppc_fp128 %i) #4
107+
%i9 = add i64 %i7, -4
108+
%i10 = icmp eq i64 %i9, 0
109+
br i1 %i10, label %bb14, label %bb6
110+
111+
bb14: ; preds = %bb6
112+
ret ppc_fp128 %i8
113+
}

0 commit comments

Comments
 (0)