Skip to content

Commit 649020c

Browse files
authored
[PowerPC] Change default for auto gen stxvp for cpu=future (#142826)
For cpu=future, we want to auto generate stxvp instructions by default.
1 parent a85525f commit 649020c

File tree

4 files changed

+14
-10
lines changed

4 files changed

+14
-10
lines changed

llvm/lib/Target/PowerPC/PPC.td

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -482,7 +482,8 @@ def ProcessorFeatures {
482482
// Future
483483
// For future CPU we assume that all of the existing features from Power11
484484
// still exist with the exception of those we know are Power11 specific.
485-
list<SubtargetFeature> FutureAdditionalFeatures = [FeatureISAFuture];
485+
list<SubtargetFeature> FutureAdditionalFeatures = [DirectivePwrFuture,
486+
FeatureISAFuture];
486487
list<SubtargetFeature> FutureSpecificFeatures = [];
487488
list<SubtargetFeature> FutureInheritableFeatures =
488489
!listconcat(P11InheritableFeatures, FutureAdditionalFeatures);

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1476,7 +1476,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
14761476

14771477
setMinFunctionAlignment(Align(4));
14781478

1479-
switch (Subtarget.getCPUDirective()) {
1479+
auto CPUDirective = Subtarget.getCPUDirective();
1480+
switch (CPUDirective) {
14801481
default: break;
14811482
case PPC::DIR_970:
14821483
case PPC::DIR_A2:
@@ -1508,15 +1509,14 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
15081509

15091510
// The Freescale cores do better with aggressive inlining of memcpy and
15101511
// friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1511-
if (Subtarget.getCPUDirective() == PPC::DIR_E500mc ||
1512-
Subtarget.getCPUDirective() == PPC::DIR_E5500) {
1512+
if (CPUDirective == PPC::DIR_E500mc || CPUDirective == PPC::DIR_E5500) {
15131513
MaxStoresPerMemset = 32;
15141514
MaxStoresPerMemsetOptSize = 16;
15151515
MaxStoresPerMemcpy = 32;
15161516
MaxStoresPerMemcpyOptSize = 8;
15171517
MaxStoresPerMemmove = 32;
15181518
MaxStoresPerMemmoveOptSize = 8;
1519-
} else if (Subtarget.getCPUDirective() == PPC::DIR_A2) {
1519+
} else if (CPUDirective == PPC::DIR_A2) {
15201520
// The A2 also benefits from (very) aggressive inlining of memcpy and
15211521
// friends. The overhead of a the function call, even when warm, can be
15221522
// over one hundred cycles.
@@ -1529,6 +1529,11 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
15291529
MaxLoadsPerMemcmpOptSize = 4;
15301530
}
15311531

1532+
// Enable generation of STXVP instructions by default for mcpu=future.
1533+
if (CPUDirective == PPC::DIR_PWR_FUTURE &&
1534+
DisableAutoPairedVecSt.getNumOccurrences() == 0)
1535+
DisableAutoPairedVecSt = false;
1536+
15321537
IsStrictFPEnabled = true;
15331538

15341539
// Let the subtarget (CPU) decide if a predictable select is more expensive

llvm/test/CodeGen/PowerPC/dmr-spill.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
22
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
3-
; RUN: -disable-auto-paired-vec-st=false -ppc-asm-full-reg-names \
3+
; RUN: -ppc-asm-full-reg-names \
44
; RUN: -ppc-vsr-nums-as-vr -mcpu=future < %s | FileCheck %s
55
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-aix \
6-
; RUN: -disable-auto-paired-vec-st=false -ppc-asm-full-reg-names \
6+
; RUN: -ppc-asm-full-reg-names \
77
; RUN: -ppc-vsr-nums-as-vr -mcpu=future < %s | FileCheck %s --check-prefix=AIX
88
; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-aix \
9-
; RUN: -disable-auto-paired-vec-st=false -ppc-asm-full-reg-names \
9+
; RUN: -ppc-asm-full-reg-names \
1010
; RUN: -ppc-vsr-nums-as-vr -mcpu=future < %s | FileCheck %s --check-prefix=AIX32
1111

1212
declare <1024 x i1> @llvm.ppc.mma.dmxvbf16gerx2pp(<1024 x i1>, <256 x i1>, <16 x i8>)

llvm/test/CodeGen/PowerPC/mmaplus-acc-spill.ll

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,9 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22
; This test is a copy of mma-acc-spill.ll except that it uses mcpu=future.
33
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
4-
; RUN: -disable-auto-paired-vec-st=false \
54
; RUN: -mcpu=future -ppc-asm-full-reg-names \
65
; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s
76
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
8-
; RUN: -disable-auto-paired-vec-st=false \
97
; RUN: -mcpu=future -ppc-asm-full-reg-names \
108
; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE
119

0 commit comments

Comments
 (0)