-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[PowerPC] special case small int constant for custom scalar_to_vector #109850
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-powerpc Author: None (RolandF77) ChangesSpecial case small int constant in the PPC custom lowering of scalar_to_vector. Full diff: https://github.com/llvm/llvm-project/pull/109850.diff 7 Files Affected:
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index d9847a21489e63..a23a77025143ae 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -11593,6 +11593,15 @@ SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
MachineFunction &MF = DAG.getMachineFunction();
SDValue Op0 = Op.getOperand(0);
+ EVT ValVT = Op0.getValueType();
+ unsigned EltSize = Op.getValueType().getScalarSizeInBits();
+ if (isa<ConstantSDNode>(Op0) && EltSize <= 32) {
+ int64_t IntVal = Op.getConstantOperandVal(0);
+ if (IntVal >= -16 && IntVal <= 15)
+ return getCanonicalConstSplat(IntVal, EltSize / 8, Op.getValueType(),
+ DAG, dl);
+ }
+
ReuseLoadInfo RLI;
if (Subtarget.hasLFIWAX() && Subtarget.hasVSX() &&
Op.getValueType() == MVT::v4i32 && Op0.getOpcode() == ISD::LOAD &&
@@ -11617,7 +11626,6 @@ SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
SDValue Val = Op0;
- EVT ValVT = Val.getValueType();
// P10 hardware store forwarding requires that a single store contains all
// the data for the load. P10 is able to merge a pair of adjacent stores. Try
// to avoid load hit store on P10 when running binaries compiled for older
diff --git a/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll b/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll
index fba6725e2b2a3f..2259b6e0f44df6 100644
--- a/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll
+++ b/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll
@@ -26,18 +26,14 @@ define <2 x i64> @build_v2i64_extload_0(ptr nocapture noundef readonly %p) {
;
; PWR7-LE-LABEL: build_v2i64_extload_0:
; PWR7-LE: # %bb.0: # %entry
-; PWR7-LE-NEXT: li 4, 0
-; PWR7-LE-NEXT: stw 4, -16(1)
-; PWR7-LE-NEXT: addis 4, 2, .LCPI0_0@toc@ha
; PWR7-LE-NEXT: lfiwzx 0, 0, 3
-; PWR7-LE-NEXT: addi 3, 1, -16
-; PWR7-LE-NEXT: addi 4, 4, .LCPI0_0@toc@l
-; PWR7-LE-NEXT: lxvd2x 1, 0, 4
-; PWR7-LE-NEXT: xxspltw 35, 0, 1
+; PWR7-LE-NEXT: addis 3, 2, .LCPI0_0@toc@ha
+; PWR7-LE-NEXT: xxlxor 36, 36, 36
+; PWR7-LE-NEXT: addi 3, 3, .LCPI0_0@toc@l
+; PWR7-LE-NEXT: xxspltw 34, 0, 1
; PWR7-LE-NEXT: lxvd2x 0, 0, 3
-; PWR7-LE-NEXT: xxswapd 34, 1
-; PWR7-LE-NEXT: xxswapd 36, 0
-; PWR7-LE-NEXT: vperm 2, 4, 3, 2
+; PWR7-LE-NEXT: xxswapd 35, 0
+; PWR7-LE-NEXT: vperm 2, 4, 2, 3
; PWR7-LE-NEXT: blr
;
; PWR8-LE-LABEL: build_v2i64_extload_0:
@@ -357,18 +353,14 @@ define <4 x i32> @build_v4i32_load_0(ptr nocapture noundef readonly %p) {
;
; PWR7-LE-LABEL: build_v4i32_load_0:
; PWR7-LE: # %bb.0: # %entry
-; PWR7-LE-NEXT: li 4, 0
-; PWR7-LE-NEXT: stw 4, -16(1)
-; PWR7-LE-NEXT: addis 4, 2, .LCPI8_0@toc@ha
; PWR7-LE-NEXT: lfiwzx 0, 0, 3
-; PWR7-LE-NEXT: addi 3, 1, -16
-; PWR7-LE-NEXT: addi 4, 4, .LCPI8_0@toc@l
-; PWR7-LE-NEXT: lxvd2x 1, 0, 4
-; PWR7-LE-NEXT: xxspltw 35, 0, 1
+; PWR7-LE-NEXT: addis 3, 2, .LCPI8_0@toc@ha
+; PWR7-LE-NEXT: xxlxor 36, 36, 36
+; PWR7-LE-NEXT: addi 3, 3, .LCPI8_0@toc@l
+; PWR7-LE-NEXT: xxspltw 34, 0, 1
; PWR7-LE-NEXT: lxvd2x 0, 0, 3
-; PWR7-LE-NEXT: xxswapd 34, 1
-; PWR7-LE-NEXT: xxswapd 36, 0
-; PWR7-LE-NEXT: vperm 2, 4, 3, 2
+; PWR7-LE-NEXT: xxswapd 35, 0
+; PWR7-LE-NEXT: vperm 2, 4, 2, 3
; PWR7-LE-NEXT: blr
;
; PWR8-LE-LABEL: build_v4i32_load_0:
@@ -412,18 +404,14 @@ define <4 x i32> @build_v4i32_load_1(ptr nocapture noundef readonly %p) {
;
; PWR7-LE-LABEL: build_v4i32_load_1:
; PWR7-LE: # %bb.0: # %entry
-; PWR7-LE-NEXT: li 4, 0
-; PWR7-LE-NEXT: stw 4, -16(1)
-; PWR7-LE-NEXT: addis 4, 2, .LCPI9_0@toc@ha
; PWR7-LE-NEXT: lfiwzx 0, 0, 3
-; PWR7-LE-NEXT: addi 3, 1, -16
-; PWR7-LE-NEXT: addi 4, 4, .LCPI9_0@toc@l
-; PWR7-LE-NEXT: lxvd2x 1, 0, 4
-; PWR7-LE-NEXT: xxspltw 35, 0, 1
+; PWR7-LE-NEXT: addis 3, 2, .LCPI9_0@toc@ha
+; PWR7-LE-NEXT: xxlxor 36, 36, 36
+; PWR7-LE-NEXT: addi 3, 3, .LCPI9_0@toc@l
+; PWR7-LE-NEXT: xxspltw 34, 0, 1
; PWR7-LE-NEXT: lxvd2x 0, 0, 3
-; PWR7-LE-NEXT: xxswapd 34, 1
-; PWR7-LE-NEXT: xxswapd 36, 0
-; PWR7-LE-NEXT: vperm 2, 3, 4, 2
+; PWR7-LE-NEXT: xxswapd 35, 0
+; PWR7-LE-NEXT: vperm 2, 2, 4, 3
; PWR7-LE-NEXT: blr
;
; PWR8-LE-LABEL: build_v4i32_load_1:
@@ -469,18 +457,14 @@ define <4 x i32> @build_v4i32_load_2(ptr nocapture noundef readonly %p) {
;
; PWR7-LE-LABEL: build_v4i32_load_2:
; PWR7-LE: # %bb.0: # %entry
-; PWR7-LE-NEXT: li 4, 0
-; PWR7-LE-NEXT: stw 4, -16(1)
-; PWR7-LE-NEXT: addis 4, 2, .LCPI10_0@toc@ha
; PWR7-LE-NEXT: lfiwzx 0, 0, 3
-; PWR7-LE-NEXT: addi 3, 1, -16
-; PWR7-LE-NEXT: addi 4, 4, .LCPI10_0@toc@l
-; PWR7-LE-NEXT: lxvd2x 1, 0, 4
-; PWR7-LE-NEXT: xxspltw 35, 0, 1
+; PWR7-LE-NEXT: addis 3, 2, .LCPI10_0@toc@ha
+; PWR7-LE-NEXT: xxlxor 36, 36, 36
+; PWR7-LE-NEXT: addi 3, 3, .LCPI10_0@toc@l
+; PWR7-LE-NEXT: xxspltw 34, 0, 1
; PWR7-LE-NEXT: lxvd2x 0, 0, 3
-; PWR7-LE-NEXT: xxswapd 34, 1
-; PWR7-LE-NEXT: xxswapd 36, 0
-; PWR7-LE-NEXT: vperm 2, 3, 4, 2
+; PWR7-LE-NEXT: xxswapd 35, 0
+; PWR7-LE-NEXT: vperm 2, 2, 4, 3
; PWR7-LE-NEXT: blr
;
; PWR8-LE-LABEL: build_v4i32_load_2:
@@ -524,18 +508,14 @@ define <4 x i32> @build_v4i32_load_3(ptr nocapture noundef readonly %p) {
;
; PWR7-LE-LABEL: build_v4i32_load_3:
; PWR7-LE: # %bb.0: # %entry
-; PWR7-LE-NEXT: li 4, 0
-; PWR7-LE-NEXT: stw 4, -16(1)
-; PWR7-LE-NEXT: addis 4, 2, .LCPI11_0@toc@ha
; PWR7-LE-NEXT: lfiwzx 0, 0, 3
-; PWR7-LE-NEXT: addi 3, 1, -16
-; PWR7-LE-NEXT: addi 4, 4, .LCPI11_0@toc@l
-; PWR7-LE-NEXT: lxvd2x 1, 0, 4
-; PWR7-LE-NEXT: xxspltw 35, 0, 1
+; PWR7-LE-NEXT: addis 3, 2, .LCPI11_0@toc@ha
+; PWR7-LE-NEXT: xxlxor 36, 36, 36
+; PWR7-LE-NEXT: addi 3, 3, .LCPI11_0@toc@l
+; PWR7-LE-NEXT: xxspltw 34, 0, 1
; PWR7-LE-NEXT: lxvd2x 0, 0, 3
-; PWR7-LE-NEXT: xxswapd 34, 1
-; PWR7-LE-NEXT: xxswapd 36, 0
-; PWR7-LE-NEXT: vperm 2, 3, 4, 2
+; PWR7-LE-NEXT: xxswapd 35, 0
+; PWR7-LE-NEXT: vperm 2, 2, 4, 3
; PWR7-LE-NEXT: blr
;
; PWR8-LE-LABEL: build_v4i32_load_3:
diff --git a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
index e1159e56e23ebe..7f6fdc7f88cd11 100644
--- a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
+++ b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
@@ -849,16 +849,12 @@ define dso_local <16 x i8> @no_RAUW_in_combine_during_legalize(ptr nocapture rea
;
; P8-AIX-32-LABEL: no_RAUW_in_combine_during_legalize:
; P8-AIX-32: # %bb.0: # %entry
-; P8-AIX-32-NEXT: li r5, 0
; P8-AIX-32-NEXT: slwi r4, r4, 2
-; P8-AIX-32-NEXT: xxlxor v3, v3, v3
-; P8-AIX-32-NEXT: stw r5, -16(r1)
+; P8-AIX-32-NEXT: xxlxor v2, v2, v2
; P8-AIX-32-NEXT: lfiwzx f0, r3, r4
-; P8-AIX-32-NEXT: addi r3, r1, -16
-; P8-AIX-32-NEXT: lxvw4x vs1, 0, r3
; P8-AIX-32-NEXT: xxspltw vs0, vs0, 1
-; P8-AIX-32-NEXT: xxmrghw v2, vs1, vs0
-; P8-AIX-32-NEXT: vmrghb v2, v2, v3
+; P8-AIX-32-NEXT: xxmrghw v3, v2, vs0
+; P8-AIX-32-NEXT: vmrghb v2, v3, v2
; P8-AIX-32-NEXT: blr
entry:
%idx.ext = sext i32 %offset to i64
diff --git a/llvm/test/CodeGen/PowerPC/const-stov.ll b/llvm/test/CodeGen/PowerPC/const-stov.ll
new file mode 100644
index 00000000000000..69c68a4f27371e
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/const-stov.ll
@@ -0,0 +1,164 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr -verify-machineinstrs \
+; RUN: -mtriple=powerpc64-- -mcpu=pwr7 < %s | FileCheck \
+; RUN: --check-prefix=PWR7-BE %s
+; RUN: llc -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr -verify-machineinstrs \
+; RUN: -mtriple=powerpc64-- -mcpu=pwr8 < %s | FileCheck \
+; RUN: --check-prefix=PWR8-BE %s
+; RUN: llc -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr -verify-machineinstrs \
+; RUN: -mtriple=powerpc64le-- -mcpu=pwr8 < %s | FileCheck \
+; RUN: --check-prefix=PWR8-LE %s
+
+define <16 x i8> @i8(ptr nocapture noundef readonly %p) {
+; PWR7-BE-LABEL: i8:
+; PWR7-BE: # %bb.0: # %entry
+; PWR7-BE-NEXT: lxvw4x v3, 0, r3
+; PWR7-BE-NEXT: addis r3, r2, .LCPI0_0@toc@ha
+; PWR7-BE-NEXT: vspltisb v2, 10
+; PWR7-BE-NEXT: addi r3, r3, .LCPI0_0@toc@l
+; PWR7-BE-NEXT: lxvw4x v4, 0, r3
+; PWR7-BE-NEXT: vperm v2, v3, v2, v4
+; PWR7-BE-NEXT: blr
+;
+; PWR8-BE-LABEL: i8:
+; PWR8-BE: # %bb.0: # %entry
+; PWR8-BE-NEXT: lxvw4x v2, 0, r3
+; PWR8-BE-NEXT: addis r3, r2, .LCPI0_0@toc@ha
+; PWR8-BE-NEXT: addi r3, r3, .LCPI0_0@toc@l
+; PWR8-BE-NEXT: lxvw4x v3, 0, r3
+; PWR8-BE-NEXT: li r3, 10
+; PWR8-BE-NEXT: mtvsrwz v4, r3
+; PWR8-BE-NEXT: vperm v2, v2, v4, v3
+; PWR8-BE-NEXT: blr
+;
+; PWR8-LE-LABEL: i8:
+; PWR8-LE: # %bb.0: # %entry
+; PWR8-LE-NEXT: lxvd2x vs0, 0, r3
+; PWR8-LE-NEXT: addis r3, r2, .LCPI0_0@toc@ha
+; PWR8-LE-NEXT: addi r3, r3, .LCPI0_0@toc@l
+; PWR8-LE-NEXT: xxswapd v2, vs0
+; PWR8-LE-NEXT: lxvd2x vs0, 0, r3
+; PWR8-LE-NEXT: li r3, 10
+; PWR8-LE-NEXT: mtvsrd v4, r3
+; PWR8-LE-NEXT: xxswapd v3, vs0
+; PWR8-LE-NEXT: vperm v2, v4, v2, v3
+; PWR8-LE-NEXT: blr
+entry:
+ %0 = load <16 x i8>, ptr %p, align 16
+ %vecinit1 = insertelement <16 x i8> %0, i8 10, i64 1
+ ret <16 x i8> %vecinit1
+}
+
+define <8 x i16> @i16(ptr nocapture noundef readonly %p) {
+; PWR7-BE-LABEL: i16:
+; PWR7-BE: # %bb.0: # %entry
+; PWR7-BE-NEXT: lxvw4x v3, 0, r3
+; PWR7-BE-NEXT: addis r3, r2, .LCPI1_0@toc@ha
+; PWR7-BE-NEXT: vspltish v2, 9
+; PWR7-BE-NEXT: addi r3, r3, .LCPI1_0@toc@l
+; PWR7-BE-NEXT: lxvw4x v4, 0, r3
+; PWR7-BE-NEXT: vperm v2, v3, v2, v4
+; PWR7-BE-NEXT: blr
+;
+; PWR8-BE-LABEL: i16:
+; PWR8-BE: # %bb.0: # %entry
+; PWR8-BE-NEXT: lxvw4x v2, 0, r3
+; PWR8-BE-NEXT: addis r3, r2, .LCPI1_0@toc@ha
+; PWR8-BE-NEXT: addi r3, r3, .LCPI1_0@toc@l
+; PWR8-BE-NEXT: lxvw4x v3, 0, r3
+; PWR8-BE-NEXT: li r3, 9
+; PWR8-BE-NEXT: mtvsrwz v4, r3
+; PWR8-BE-NEXT: vperm v2, v2, v4, v3
+; PWR8-BE-NEXT: blr
+;
+; PWR8-LE-LABEL: i16:
+; PWR8-LE: # %bb.0: # %entry
+; PWR8-LE-NEXT: lxvd2x vs0, 0, r3
+; PWR8-LE-NEXT: addis r3, r2, .LCPI1_0@toc@ha
+; PWR8-LE-NEXT: addi r3, r3, .LCPI1_0@toc@l
+; PWR8-LE-NEXT: xxswapd v2, vs0
+; PWR8-LE-NEXT: lxvd2x vs0, 0, r3
+; PWR8-LE-NEXT: li r3, 9
+; PWR8-LE-NEXT: mtvsrd v4, r3
+; PWR8-LE-NEXT: xxswapd v3, vs0
+; PWR8-LE-NEXT: vperm v2, v4, v2, v3
+; PWR8-LE-NEXT: blr
+entry:
+ %0 = load <8 x i16>, ptr %p, align 16
+ %vecinit1 = insertelement <8 x i16> %0, i16 9, i64 1
+ ret <8 x i16> %vecinit1
+}
+
+define <4 x i32> @i32(ptr nocapture noundef readonly %p) {
+; PWR7-BE-LABEL: i32:
+; PWR7-BE: # %bb.0: # %entry
+; PWR7-BE-NEXT: lxvw4x v3, 0, r3
+; PWR7-BE-NEXT: addis r3, r2, .LCPI2_0@toc@ha
+; PWR7-BE-NEXT: vspltisw v2, 7
+; PWR7-BE-NEXT: addi r3, r3, .LCPI2_0@toc@l
+; PWR7-BE-NEXT: lxvw4x v4, 0, r3
+; PWR7-BE-NEXT: vperm v2, v3, v2, v4
+; PWR7-BE-NEXT: blr
+;
+; PWR8-BE-LABEL: i32:
+; PWR8-BE: # %bb.0: # %entry
+; PWR8-BE-NEXT: lxvw4x v2, 0, r3
+; PWR8-BE-NEXT: addis r3, r2, .LCPI2_0@toc@ha
+; PWR8-BE-NEXT: addi r3, r3, .LCPI2_0@toc@l
+; PWR8-BE-NEXT: lxvw4x v3, 0, r3
+; PWR8-BE-NEXT: li r3, 7
+; PWR8-BE-NEXT: mtvsrwz v4, r3
+; PWR8-BE-NEXT: vperm v2, v2, v4, v3
+; PWR8-BE-NEXT: blr
+;
+; PWR8-LE-LABEL: i32:
+; PWR8-LE: # %bb.0: # %entry
+; PWR8-LE-NEXT: lxvd2x vs0, 0, r3
+; PWR8-LE-NEXT: addis r3, r2, .LCPI2_0@toc@ha
+; PWR8-LE-NEXT: addi r3, r3, .LCPI2_0@toc@l
+; PWR8-LE-NEXT: xxswapd v2, vs0
+; PWR8-LE-NEXT: lxvd2x vs0, 0, r3
+; PWR8-LE-NEXT: li r3, 7
+; PWR8-LE-NEXT: mtvsrwz v4, r3
+; PWR8-LE-NEXT: xxswapd v3, vs0
+; PWR8-LE-NEXT: vperm v2, v4, v2, v3
+; PWR8-LE-NEXT: blr
+entry:
+ %0 = load <4 x i32>, ptr %p, align 16
+ %vecinit1 = insertelement <4 x i32> %0, i32 7, i64 1
+ ret <4 x i32> %vecinit1
+}
+
+define <2 x i64> @i64(ptr nocapture noundef readonly %p) {
+; PWR7-BE-LABEL: i64:
+; PWR7-BE: # %bb.0: # %entry
+; PWR7-BE-NEXT: lxvd2x v2, 0, r3
+; PWR7-BE-NEXT: li r3, 10
+; PWR7-BE-NEXT: std r3, -16(r1)
+; PWR7-BE-NEXT: std r3, -8(r1)
+; PWR7-BE-NEXT: addi r3, r1, -16
+; PWR7-BE-NEXT: lxvd2x v3, 0, r3
+; PWR7-BE-NEXT: xxmrghd v2, v2, v3
+; PWR7-BE-NEXT: blr
+;
+; PWR8-BE-LABEL: i64:
+; PWR8-BE: # %bb.0: # %entry
+; PWR8-BE-NEXT: lxvd2x v2, 0, r3
+; PWR8-BE-NEXT: li r3, 10
+; PWR8-BE-NEXT: mtfprd f0, r3
+; PWR8-BE-NEXT: xxmrghd v2, v2, vs0
+; PWR8-BE-NEXT: blr
+;
+; PWR8-LE-LABEL: i64:
+; PWR8-LE: # %bb.0: # %entry
+; PWR8-LE-NEXT: lxvd2x vs0, 0, r3
+; PWR8-LE-NEXT: li r3, 10
+; PWR8-LE-NEXT: xxswapd v2, vs0
+; PWR8-LE-NEXT: mtfprd f0, r3
+; PWR8-LE-NEXT: xxpermdi v2, vs0, v2, 1
+; PWR8-LE-NEXT: blr
+entry:
+ %0 = load <2 x i64>, ptr %p, align 16
+ %vecinit1 = insertelement <2 x i64> %0, i64 10, i64 1
+ ret <2 x i64> %vecinit1
+}
diff --git a/llvm/test/CodeGen/PowerPC/load-and-splat.ll b/llvm/test/CodeGen/PowerPC/load-and-splat.ll
index c9ee3a51f41724..1993b1678b3ea4 100644
--- a/llvm/test/CodeGen/PowerPC/load-and-splat.ll
+++ b/llvm/test/CodeGen/PowerPC/load-and-splat.ll
@@ -356,11 +356,9 @@ define void @test6(ptr %a, ptr %in) {
;
; P9-AIX32-LABEL: test6:
; P9-AIX32: # %bb.0: # %entry
-; P9-AIX32-NEXT: li r5, 0
-; P9-AIX32-NEXT: stw r5, -16(r1)
; P9-AIX32-NEXT: lwz r5, L..C2(r2) # %const.0
; P9-AIX32-NEXT: lxvwsx vs1, 0, r4
-; P9-AIX32-NEXT: lxv vs2, -16(r1)
+; P9-AIX32-NEXT: xxlxor vs2, vs2, vs2
; P9-AIX32-NEXT: lxv vs0, 0(r5)
; P9-AIX32-NEXT: xxperm vs1, vs2, vs0
; P9-AIX32-NEXT: stxv vs1, 0(r3)
@@ -368,13 +366,10 @@ define void @test6(ptr %a, ptr %in) {
;
; P8-AIX32-LABEL: test6:
; P8-AIX32: # %bb.0: # %entry
-; P8-AIX32-NEXT: li r5, 0
-; P8-AIX32-NEXT: stw r5, -16(r1)
; P8-AIX32-NEXT: lfiwzx f0, 0, r4
; P8-AIX32-NEXT: lwz r4, L..C2(r2) # %const.0
+; P8-AIX32-NEXT: xxlxor v4, v4, v4
; P8-AIX32-NEXT: lxvw4x v3, 0, r4
-; P8-AIX32-NEXT: addi r4, r1, -16
-; P8-AIX32-NEXT: lxvw4x v4, 0, r4
; P8-AIX32-NEXT: xxspltw v2, vs0, 1
; P8-AIX32-NEXT: vperm v2, v4, v2, v3
; P8-AIX32-NEXT: stxvw4x v2, 0, r3
@@ -382,13 +377,10 @@ define void @test6(ptr %a, ptr %in) {
;
; P7-AIX32-LABEL: test6:
; P7-AIX32: # %bb.0: # %entry
-; P7-AIX32-NEXT: li r5, 0
-; P7-AIX32-NEXT: stw r5, -16(r1)
; P7-AIX32-NEXT: lfiwzx f0, 0, r4
; P7-AIX32-NEXT: lwz r4, L..C2(r2) # %const.0
+; P7-AIX32-NEXT: xxlxor v4, v4, v4
; P7-AIX32-NEXT: lxvw4x v3, 0, r4
-; P7-AIX32-NEXT: addi r4, r1, -16
-; P7-AIX32-NEXT: lxvw4x v4, 0, r4
; P7-AIX32-NEXT: xxspltw v2, vs0, 1
; P7-AIX32-NEXT: vperm v2, v4, v2, v3
; P7-AIX32-NEXT: stxvw4x v2, 0, r3
diff --git a/llvm/test/CodeGen/PowerPC/p10-splatImm32-undef.ll b/llvm/test/CodeGen/PowerPC/p10-splatImm32-undef.ll
index ad6a576fbf50ef..04e7110b669a9d 100644
--- a/llvm/test/CodeGen/PowerPC/p10-splatImm32-undef.ll
+++ b/llvm/test/CodeGen/PowerPC/p10-splatImm32-undef.ll
@@ -60,15 +60,13 @@ define hidden void @function1() {
; CHECK-LINUX-32: # %bb.0: # %entry
; CHECK-LINUX-32-NEXT: mflr r0
; CHECK-LINUX-32-NEXT: stw r0, 4(r1)
-; CHECK-LINUX-32-NEXT: stwu r1, -48(r1)
-; CHECK-LINUX-32-NEXT: .cfi_def_cfa_offset 48
+; CHECK-LINUX-32-NEXT: stwu r1, -32(r1)
+; CHECK-LINUX-32-NEXT: .cfi_def_cfa_offset 32
; CHECK-LINUX-32-NEXT: .cfi_offset lr, 4
; CHECK-LINUX-32-NEXT: bl call1
-; CHECK-LINUX-32-NEXT: li r4, 0
; CHECK-LINUX-32-NEXT: stw r3, 16(r1)
-; CHECK-LINUX-32-NEXT: stw r4, 32(r1)
-; CHECK-LINUX-32-NEXT: lwz r0, 52(r1)
-; CHECK-LINUX-32-NEXT: addi r1, r1, 48
+; CHECK-LINUX-32-NEXT: lwz r0, 36(r1)
+; CHECK-LINUX-32-NEXT: addi r1, r1, 32
; CHECK-LINUX-32-NEXT: mtlr r0
; CHECK-LINUX-32-NEXT: blr
;
@@ -76,13 +74,11 @@ define hidden void @function1() {
; CHECK-AIX-32: # %bb.0: # %entry
; CHECK-AIX-32-NEXT: mflr r0
; CHECK-AIX-32-NEXT: stw r0, 8(r1)
-; CHECK-AIX-32-NEXT: stwu r1, -96(r1)
+; CHECK-AIX-32-NEXT: stwu r1, -80(r1)
; CHECK-AIX-32-NEXT: bl .call1[PR]
; CHECK-AIX-32-NEXT: nop
-; CHECK-AIX-32-NEXT: li r4, 0
; CHECK-AIX-32-NEXT: stw r3, 64(r1)
-; CHECK-AIX-32-NEXT: stw r4, 80(r1)
-; CHECK-AIX-32-NEXT: addi r1, r1, 96
+; CHECK-AIX-32-NEXT: addi r1, r1, 80
; CHECK-AIX-32-NEXT: lwz r0, 8(r1)
; CHECK-AIX-32-NEXT: mtlr r0
; CHECK-AIX-32-NEXT: blr
diff --git a/llvm/test/CodeGen/PowerPC/ppc-32bit-build-vector.ll b/llvm/test/CodeGen/PowerPC/ppc-32bit-build-vector.ll
index 0171e27e80901d..35b478017383fa 100644
--- a/llvm/test/CodeGen/PowerPC/ppc-32bit-build-vector.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc-32bit-build-vector.ll
@@ -8,30 +8,27 @@
define dso_local fastcc void @BuildVectorICE() unnamed_addr {
; 32BIT-LABEL: BuildVectorICE:
; 32BIT: # %bb.0: # %entry
-; 32BIT-NEXT: stwu 1, -64(1)
-; 32BIT-NEXT: .cfi_def_cfa_offset 64
-; 32BIT-NEXT: li 4, .LCPI0_0@l
-; 32BIT-NEXT: lis 5, .LCPI0_0@ha
+; 32BIT-NEXT: stwu 1, -48(1)
+; 32BIT-NEXT: .cfi_def_cfa_offset 48
; 32BIT-NEXT: lxvw4x 34, 0, 3
-; 32BIT-NEXT: li 3, 0
-; 32BIT-NEXT: addi 6, 1, 48
-; 32BIT-NEXT: li 7, 0
-; 32BIT-NEXT: lxvw4x 35, 5, 4
+; 32BIT-NEXT: li 3, .LCPI0_0@l
+; 32BIT-NEXT: lis 4, .LCPI0_0@ha
+; 32BIT-NEXT: li 5, 0
+; 32BIT-NEXT: xxlxor 36, 36, 36
+; 32BIT-NEXT: lxvw4x 35, 4, 3
+; 32BIT-NEXT: addi 3, 1, 16
; 32BIT-NEXT: addi 4, 1, 32
-; 32BIT-NEXT: addi 5, 1, 16
; 32BIT-NEXT: .p2align 4
; 32BIT-NEXT: .LBB0_1: # %while.body
; 32BIT-NEXT: #
-; 32BIT-NEXT: stw 3, 32(1)
-; 32BIT-NEXT: stw 7, 16(1)
-; 32BIT-NEXT: lxvw4x 36, 0, 4
-; 32BIT-NEXT: lxvw4x 37, 0, 5
-; 32BIT-NEXT: vperm 4, 5, 4, 3
-; 32BIT-NEXT: vadduwm 4, 2, 4
-; 32BIT-NEXT: xxspltw 37, 36, 1
-; 32BIT-NEXT: vadduwm 4, 4, 5
-; 32BIT-NEXT: stxvw4x 36, 0, 6
-; 32BIT-NEXT: lwz 7, 48(1)
+; 32BIT-NEXT: stw 5, 16(1)
+; 32BIT-NEXT: lxvw4x 37, 0, 3
+; 32BIT-NEXT: vperm 5, 5, 4, 3
+; 32BIT-NEXT: vadduwm 5, 2, 5
+; 32BIT-NEXT: xxspltw 32, 37, 1
+; 32BIT-NEXT: vadduwm 5, 5, 0
+; 32BIT-NEXT: stxvw4x 37, 0, 4
+; 32BIT-NEXT: lwz 5, 32(1)
; 32BIT-NEXT: b .LBB0_1
;
; 64BIT-LABEL: BuildVectorICE:
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Might be a dumb question, but is it accurate to specifically say P7 in the title, when it affects more than just P7 (at least that is what it looks like from the tests)?
The title mentions pwr7 because that is the interesting case. Currently this case is hit on pwr8+ in 32-bit mode as well, but that is a bug - scalar_to_vector should really be legal there. Processors older than pwr7 that have vectors will also be hit for some types but are not really interesting for performance. pwr7 is mostly interesting for ISVs building for it and running on something newer. I mean, I can change the title if you like, but that's why it says pwr7. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for answering my question and updating the title of the PR, Roland. I think LGTM.
Special case small int constant in the PPC custom lowering of scalar_to_vector. The operation is legal for 64-bit pwr8+, and could probably be legal for 32-bit pwr8+ if the patterns were changed to avoid using 64-bit instructions, but -mcpu=pwr7 is still used and lacks move-to-VSR instructions, so this is of particular interest for pwr7.