Skip to content

[PowerPC] special case small int constant for custom scalar_to_vector #109850

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Oct 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion llvm/lib/Target/PowerPC/PPCISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11593,6 +11593,15 @@ SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,

MachineFunction &MF = DAG.getMachineFunction();
SDValue Op0 = Op.getOperand(0);
EVT ValVT = Op0.getValueType();
unsigned EltSize = Op.getValueType().getScalarSizeInBits();
if (isa<ConstantSDNode>(Op0) && EltSize <= 32) {
int64_t IntVal = Op.getConstantOperandVal(0);
if (IntVal >= -16 && IntVal <= 15)
return getCanonicalConstSplat(IntVal, EltSize / 8, Op.getValueType(), DAG,
dl);
}

ReuseLoadInfo RLI;
if (Subtarget.hasLFIWAX() && Subtarget.hasVSX() &&
Op.getValueType() == MVT::v4i32 && Op0.getOpcode() == ISD::LOAD &&
Expand All @@ -11617,7 +11626,6 @@ SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);

SDValue Val = Op0;
EVT ValVT = Val.getValueType();
// P10 hardware store forwarding requires that a single store contains all
// the data for the load. P10 is able to merge a pair of adjacent stores. Try
// to avoid load hit store on P10 when running binaries compiled for older
Expand Down
80 changes: 30 additions & 50 deletions llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll
Original file line number Diff line number Diff line change
Expand Up @@ -26,18 +26,14 @@ define <2 x i64> @build_v2i64_extload_0(ptr nocapture noundef readonly %p) {
;
; PWR7-LE-LABEL: build_v2i64_extload_0:
; PWR7-LE: # %bb.0: # %entry
; PWR7-LE-NEXT: li 4, 0
; PWR7-LE-NEXT: stw 4, -16(1)
; PWR7-LE-NEXT: addis 4, 2, .LCPI0_0@toc@ha
; PWR7-LE-NEXT: lfiwzx 0, 0, 3
; PWR7-LE-NEXT: addi 3, 1, -16
; PWR7-LE-NEXT: addi 4, 4, .LCPI0_0@toc@l
; PWR7-LE-NEXT: lxvd2x 1, 0, 4
; PWR7-LE-NEXT: xxspltw 35, 0, 1
; PWR7-LE-NEXT: addis 3, 2, .LCPI0_0@toc@ha
; PWR7-LE-NEXT: xxlxor 36, 36, 36
; PWR7-LE-NEXT: addi 3, 3, .LCPI0_0@toc@l
; PWR7-LE-NEXT: xxspltw 34, 0, 1
; PWR7-LE-NEXT: lxvd2x 0, 0, 3
; PWR7-LE-NEXT: xxswapd 34, 1
; PWR7-LE-NEXT: xxswapd 36, 0
; PWR7-LE-NEXT: vperm 2, 4, 3, 2
; PWR7-LE-NEXT: xxswapd 35, 0
; PWR7-LE-NEXT: vperm 2, 4, 2, 3
; PWR7-LE-NEXT: blr
;
; PWR8-LE-LABEL: build_v2i64_extload_0:
Expand Down Expand Up @@ -357,18 +353,14 @@ define <4 x i32> @build_v4i32_load_0(ptr nocapture noundef readonly %p) {
;
; PWR7-LE-LABEL: build_v4i32_load_0:
; PWR7-LE: # %bb.0: # %entry
; PWR7-LE-NEXT: li 4, 0
; PWR7-LE-NEXT: stw 4, -16(1)
; PWR7-LE-NEXT: addis 4, 2, .LCPI8_0@toc@ha
; PWR7-LE-NEXT: lfiwzx 0, 0, 3
; PWR7-LE-NEXT: addi 3, 1, -16
; PWR7-LE-NEXT: addi 4, 4, .LCPI8_0@toc@l
; PWR7-LE-NEXT: lxvd2x 1, 0, 4
; PWR7-LE-NEXT: xxspltw 35, 0, 1
; PWR7-LE-NEXT: addis 3, 2, .LCPI8_0@toc@ha
; PWR7-LE-NEXT: xxlxor 36, 36, 36
; PWR7-LE-NEXT: addi 3, 3, .LCPI8_0@toc@l
; PWR7-LE-NEXT: xxspltw 34, 0, 1
; PWR7-LE-NEXT: lxvd2x 0, 0, 3
; PWR7-LE-NEXT: xxswapd 34, 1
; PWR7-LE-NEXT: xxswapd 36, 0
; PWR7-LE-NEXT: vperm 2, 4, 3, 2
; PWR7-LE-NEXT: xxswapd 35, 0
; PWR7-LE-NEXT: vperm 2, 4, 2, 3
; PWR7-LE-NEXT: blr
;
; PWR8-LE-LABEL: build_v4i32_load_0:
Expand Down Expand Up @@ -412,18 +404,14 @@ define <4 x i32> @build_v4i32_load_1(ptr nocapture noundef readonly %p) {
;
; PWR7-LE-LABEL: build_v4i32_load_1:
; PWR7-LE: # %bb.0: # %entry
; PWR7-LE-NEXT: li 4, 0
; PWR7-LE-NEXT: stw 4, -16(1)
; PWR7-LE-NEXT: addis 4, 2, .LCPI9_0@toc@ha
; PWR7-LE-NEXT: lfiwzx 0, 0, 3
; PWR7-LE-NEXT: addi 3, 1, -16
; PWR7-LE-NEXT: addi 4, 4, .LCPI9_0@toc@l
; PWR7-LE-NEXT: lxvd2x 1, 0, 4
; PWR7-LE-NEXT: xxspltw 35, 0, 1
; PWR7-LE-NEXT: addis 3, 2, .LCPI9_0@toc@ha
; PWR7-LE-NEXT: xxlxor 36, 36, 36
; PWR7-LE-NEXT: addi 3, 3, .LCPI9_0@toc@l
; PWR7-LE-NEXT: xxspltw 34, 0, 1
; PWR7-LE-NEXT: lxvd2x 0, 0, 3
; PWR7-LE-NEXT: xxswapd 34, 1
; PWR7-LE-NEXT: xxswapd 36, 0
; PWR7-LE-NEXT: vperm 2, 3, 4, 2
; PWR7-LE-NEXT: xxswapd 35, 0
; PWR7-LE-NEXT: vperm 2, 2, 4, 3
; PWR7-LE-NEXT: blr
;
; PWR8-LE-LABEL: build_v4i32_load_1:
Expand Down Expand Up @@ -469,18 +457,14 @@ define <4 x i32> @build_v4i32_load_2(ptr nocapture noundef readonly %p) {
;
; PWR7-LE-LABEL: build_v4i32_load_2:
; PWR7-LE: # %bb.0: # %entry
; PWR7-LE-NEXT: li 4, 0
; PWR7-LE-NEXT: stw 4, -16(1)
; PWR7-LE-NEXT: addis 4, 2, .LCPI10_0@toc@ha
; PWR7-LE-NEXT: lfiwzx 0, 0, 3
; PWR7-LE-NEXT: addi 3, 1, -16
; PWR7-LE-NEXT: addi 4, 4, .LCPI10_0@toc@l
; PWR7-LE-NEXT: lxvd2x 1, 0, 4
; PWR7-LE-NEXT: xxspltw 35, 0, 1
; PWR7-LE-NEXT: addis 3, 2, .LCPI10_0@toc@ha
; PWR7-LE-NEXT: xxlxor 36, 36, 36
; PWR7-LE-NEXT: addi 3, 3, .LCPI10_0@toc@l
; PWR7-LE-NEXT: xxspltw 34, 0, 1
; PWR7-LE-NEXT: lxvd2x 0, 0, 3
; PWR7-LE-NEXT: xxswapd 34, 1
; PWR7-LE-NEXT: xxswapd 36, 0
; PWR7-LE-NEXT: vperm 2, 3, 4, 2
; PWR7-LE-NEXT: xxswapd 35, 0
; PWR7-LE-NEXT: vperm 2, 2, 4, 3
; PWR7-LE-NEXT: blr
;
; PWR8-LE-LABEL: build_v4i32_load_2:
Expand Down Expand Up @@ -524,18 +508,14 @@ define <4 x i32> @build_v4i32_load_3(ptr nocapture noundef readonly %p) {
;
; PWR7-LE-LABEL: build_v4i32_load_3:
; PWR7-LE: # %bb.0: # %entry
; PWR7-LE-NEXT: li 4, 0
; PWR7-LE-NEXT: stw 4, -16(1)
; PWR7-LE-NEXT: addis 4, 2, .LCPI11_0@toc@ha
; PWR7-LE-NEXT: lfiwzx 0, 0, 3
; PWR7-LE-NEXT: addi 3, 1, -16
; PWR7-LE-NEXT: addi 4, 4, .LCPI11_0@toc@l
; PWR7-LE-NEXT: lxvd2x 1, 0, 4
; PWR7-LE-NEXT: xxspltw 35, 0, 1
; PWR7-LE-NEXT: addis 3, 2, .LCPI11_0@toc@ha
; PWR7-LE-NEXT: xxlxor 36, 36, 36
; PWR7-LE-NEXT: addi 3, 3, .LCPI11_0@toc@l
; PWR7-LE-NEXT: xxspltw 34, 0, 1
; PWR7-LE-NEXT: lxvd2x 0, 0, 3
; PWR7-LE-NEXT: xxswapd 34, 1
; PWR7-LE-NEXT: xxswapd 36, 0
; PWR7-LE-NEXT: vperm 2, 3, 4, 2
; PWR7-LE-NEXT: xxswapd 35, 0
; PWR7-LE-NEXT: vperm 2, 2, 4, 3
; PWR7-LE-NEXT: blr
;
; PWR8-LE-LABEL: build_v4i32_load_3:
Expand Down
10 changes: 3 additions & 7 deletions llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
Original file line number Diff line number Diff line change
Expand Up @@ -849,16 +849,12 @@ define dso_local <16 x i8> @no_RAUW_in_combine_during_legalize(ptr nocapture rea
;
; P8-AIX-32-LABEL: no_RAUW_in_combine_during_legalize:
; P8-AIX-32: # %bb.0: # %entry
; P8-AIX-32-NEXT: li r5, 0
; P8-AIX-32-NEXT: slwi r4, r4, 2
; P8-AIX-32-NEXT: xxlxor v3, v3, v3
; P8-AIX-32-NEXT: stw r5, -16(r1)
; P8-AIX-32-NEXT: xxlxor v2, v2, v2
; P8-AIX-32-NEXT: lfiwzx f0, r3, r4
; P8-AIX-32-NEXT: addi r3, r1, -16
; P8-AIX-32-NEXT: lxvw4x vs1, 0, r3
; P8-AIX-32-NEXT: xxspltw vs0, vs0, 1
; P8-AIX-32-NEXT: xxmrghw v2, vs1, vs0
; P8-AIX-32-NEXT: vmrghb v2, v2, v3
; P8-AIX-32-NEXT: xxmrghw v3, v2, vs0
; P8-AIX-32-NEXT: vmrghb v2, v3, v2
; P8-AIX-32-NEXT: blr
entry:
%idx.ext = sext i32 %offset to i64
Expand Down
164 changes: 164 additions & 0 deletions llvm/test/CodeGen/PowerPC/const-stov.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr -verify-machineinstrs \
; RUN: -mtriple=powerpc64-- -mcpu=pwr7 < %s | FileCheck \
; RUN: --check-prefix=PWR7-BE %s
; RUN: llc -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr -verify-machineinstrs \
; RUN: -mtriple=powerpc64-- -mcpu=pwr8 < %s | FileCheck \
; RUN: --check-prefix=PWR8-BE %s
; RUN: llc -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr -verify-machineinstrs \
; RUN: -mtriple=powerpc64le-- -mcpu=pwr8 < %s | FileCheck \
; RUN: --check-prefix=PWR8-LE %s

define <16 x i8> @i8(ptr nocapture noundef readonly %p) {
; PWR7-BE-LABEL: i8:
; PWR7-BE: # %bb.0: # %entry
; PWR7-BE-NEXT: lxvw4x v3, 0, r3
; PWR7-BE-NEXT: addis r3, r2, .LCPI0_0@toc@ha
; PWR7-BE-NEXT: vspltisb v2, 10
; PWR7-BE-NEXT: addi r3, r3, .LCPI0_0@toc@l
; PWR7-BE-NEXT: lxvw4x v4, 0, r3
; PWR7-BE-NEXT: vperm v2, v3, v2, v4
; PWR7-BE-NEXT: blr
;
; PWR8-BE-LABEL: i8:
; PWR8-BE: # %bb.0: # %entry
; PWR8-BE-NEXT: lxvw4x v2, 0, r3
; PWR8-BE-NEXT: addis r3, r2, .LCPI0_0@toc@ha
; PWR8-BE-NEXT: addi r3, r3, .LCPI0_0@toc@l
; PWR8-BE-NEXT: lxvw4x v3, 0, r3
; PWR8-BE-NEXT: li r3, 10
; PWR8-BE-NEXT: mtvsrwz v4, r3
; PWR8-BE-NEXT: vperm v2, v2, v4, v3
; PWR8-BE-NEXT: blr
;
; PWR8-LE-LABEL: i8:
; PWR8-LE: # %bb.0: # %entry
; PWR8-LE-NEXT: lxvd2x vs0, 0, r3
; PWR8-LE-NEXT: addis r3, r2, .LCPI0_0@toc@ha
; PWR8-LE-NEXT: addi r3, r3, .LCPI0_0@toc@l
; PWR8-LE-NEXT: xxswapd v2, vs0
; PWR8-LE-NEXT: lxvd2x vs0, 0, r3
; PWR8-LE-NEXT: li r3, 10
; PWR8-LE-NEXT: mtvsrd v4, r3
; PWR8-LE-NEXT: xxswapd v3, vs0
; PWR8-LE-NEXT: vperm v2, v4, v2, v3
; PWR8-LE-NEXT: blr
entry:
%0 = load <16 x i8>, ptr %p, align 16
%vecinit1 = insertelement <16 x i8> %0, i8 10, i64 1
ret <16 x i8> %vecinit1
}

define <8 x i16> @i16(ptr nocapture noundef readonly %p) {
; PWR7-BE-LABEL: i16:
; PWR7-BE: # %bb.0: # %entry
; PWR7-BE-NEXT: lxvw4x v3, 0, r3
; PWR7-BE-NEXT: addis r3, r2, .LCPI1_0@toc@ha
; PWR7-BE-NEXT: vspltish v2, 9
; PWR7-BE-NEXT: addi r3, r3, .LCPI1_0@toc@l
; PWR7-BE-NEXT: lxvw4x v4, 0, r3
; PWR7-BE-NEXT: vperm v2, v3, v2, v4
; PWR7-BE-NEXT: blr
;
; PWR8-BE-LABEL: i16:
; PWR8-BE: # %bb.0: # %entry
; PWR8-BE-NEXT: lxvw4x v2, 0, r3
; PWR8-BE-NEXT: addis r3, r2, .LCPI1_0@toc@ha
; PWR8-BE-NEXT: addi r3, r3, .LCPI1_0@toc@l
; PWR8-BE-NEXT: lxvw4x v3, 0, r3
; PWR8-BE-NEXT: li r3, 9
; PWR8-BE-NEXT: mtvsrwz v4, r3
; PWR8-BE-NEXT: vperm v2, v2, v4, v3
; PWR8-BE-NEXT: blr
;
; PWR8-LE-LABEL: i16:
; PWR8-LE: # %bb.0: # %entry
; PWR8-LE-NEXT: lxvd2x vs0, 0, r3
; PWR8-LE-NEXT: addis r3, r2, .LCPI1_0@toc@ha
; PWR8-LE-NEXT: addi r3, r3, .LCPI1_0@toc@l
; PWR8-LE-NEXT: xxswapd v2, vs0
; PWR8-LE-NEXT: lxvd2x vs0, 0, r3
; PWR8-LE-NEXT: li r3, 9
; PWR8-LE-NEXT: mtvsrd v4, r3
; PWR8-LE-NEXT: xxswapd v3, vs0
; PWR8-LE-NEXT: vperm v2, v4, v2, v3
; PWR8-LE-NEXT: blr
entry:
%0 = load <8 x i16>, ptr %p, align 16
%vecinit1 = insertelement <8 x i16> %0, i16 9, i64 1
ret <8 x i16> %vecinit1
}

define <4 x i32> @i32(ptr nocapture noundef readonly %p) {
; PWR7-BE-LABEL: i32:
; PWR7-BE: # %bb.0: # %entry
; PWR7-BE-NEXT: lxvw4x v3, 0, r3
; PWR7-BE-NEXT: addis r3, r2, .LCPI2_0@toc@ha
; PWR7-BE-NEXT: vspltisw v2, 7
; PWR7-BE-NEXT: addi r3, r3, .LCPI2_0@toc@l
; PWR7-BE-NEXT: lxvw4x v4, 0, r3
; PWR7-BE-NEXT: vperm v2, v3, v2, v4
; PWR7-BE-NEXT: blr
;
; PWR8-BE-LABEL: i32:
; PWR8-BE: # %bb.0: # %entry
; PWR8-BE-NEXT: lxvw4x v2, 0, r3
; PWR8-BE-NEXT: addis r3, r2, .LCPI2_0@toc@ha
; PWR8-BE-NEXT: addi r3, r3, .LCPI2_0@toc@l
; PWR8-BE-NEXT: lxvw4x v3, 0, r3
; PWR8-BE-NEXT: li r3, 7
; PWR8-BE-NEXT: mtvsrwz v4, r3
; PWR8-BE-NEXT: vperm v2, v2, v4, v3
; PWR8-BE-NEXT: blr
;
; PWR8-LE-LABEL: i32:
; PWR8-LE: # %bb.0: # %entry
; PWR8-LE-NEXT: lxvd2x vs0, 0, r3
; PWR8-LE-NEXT: addis r3, r2, .LCPI2_0@toc@ha
; PWR8-LE-NEXT: addi r3, r3, .LCPI2_0@toc@l
; PWR8-LE-NEXT: xxswapd v2, vs0
; PWR8-LE-NEXT: lxvd2x vs0, 0, r3
; PWR8-LE-NEXT: li r3, 7
; PWR8-LE-NEXT: mtvsrwz v4, r3
; PWR8-LE-NEXT: xxswapd v3, vs0
; PWR8-LE-NEXT: vperm v2, v4, v2, v3
; PWR8-LE-NEXT: blr
entry:
%0 = load <4 x i32>, ptr %p, align 16
%vecinit1 = insertelement <4 x i32> %0, i32 7, i64 1
ret <4 x i32> %vecinit1
}

define <2 x i64> @i64(ptr nocapture noundef readonly %p) {
; PWR7-BE-LABEL: i64:
; PWR7-BE: # %bb.0: # %entry
; PWR7-BE-NEXT: lxvd2x v2, 0, r3
; PWR7-BE-NEXT: li r3, 10
; PWR7-BE-NEXT: std r3, -16(r1)
; PWR7-BE-NEXT: std r3, -8(r1)
; PWR7-BE-NEXT: addi r3, r1, -16
; PWR7-BE-NEXT: lxvd2x v3, 0, r3
; PWR7-BE-NEXT: xxmrghd v2, v2, v3
; PWR7-BE-NEXT: blr
;
; PWR8-BE-LABEL: i64:
; PWR8-BE: # %bb.0: # %entry
; PWR8-BE-NEXT: lxvd2x v2, 0, r3
; PWR8-BE-NEXT: li r3, 10
; PWR8-BE-NEXT: mtfprd f0, r3
; PWR8-BE-NEXT: xxmrghd v2, v2, vs0
; PWR8-BE-NEXT: blr
;
; PWR8-LE-LABEL: i64:
; PWR8-LE: # %bb.0: # %entry
; PWR8-LE-NEXT: lxvd2x vs0, 0, r3
; PWR8-LE-NEXT: li r3, 10
; PWR8-LE-NEXT: xxswapd v2, vs0
; PWR8-LE-NEXT: mtfprd f0, r3
; PWR8-LE-NEXT: xxpermdi v2, vs0, v2, 1
; PWR8-LE-NEXT: blr
entry:
%0 = load <2 x i64>, ptr %p, align 16
%vecinit1 = insertelement <2 x i64> %0, i64 10, i64 1
ret <2 x i64> %vecinit1
}
14 changes: 3 additions & 11 deletions llvm/test/CodeGen/PowerPC/load-and-splat.ll
Original file line number Diff line number Diff line change
Expand Up @@ -356,39 +356,31 @@ define void @test6(ptr %a, ptr %in) {
;
; P9-AIX32-LABEL: test6:
; P9-AIX32: # %bb.0: # %entry
; P9-AIX32-NEXT: li r5, 0
; P9-AIX32-NEXT: stw r5, -16(r1)
; P9-AIX32-NEXT: lwz r5, L..C2(r2) # %const.0
; P9-AIX32-NEXT: lxvwsx vs1, 0, r4
; P9-AIX32-NEXT: lxv vs2, -16(r1)
; P9-AIX32-NEXT: xxlxor vs2, vs2, vs2
; P9-AIX32-NEXT: lxv vs0, 0(r5)
; P9-AIX32-NEXT: xxperm vs1, vs2, vs0
; P9-AIX32-NEXT: stxv vs1, 0(r3)
; P9-AIX32-NEXT: blr
;
; P8-AIX32-LABEL: test6:
; P8-AIX32: # %bb.0: # %entry
; P8-AIX32-NEXT: li r5, 0
; P8-AIX32-NEXT: stw r5, -16(r1)
; P8-AIX32-NEXT: lfiwzx f0, 0, r4
; P8-AIX32-NEXT: lwz r4, L..C2(r2) # %const.0
; P8-AIX32-NEXT: xxlxor v4, v4, v4
; P8-AIX32-NEXT: lxvw4x v3, 0, r4
; P8-AIX32-NEXT: addi r4, r1, -16
; P8-AIX32-NEXT: lxvw4x v4, 0, r4
; P8-AIX32-NEXT: xxspltw v2, vs0, 1
; P8-AIX32-NEXT: vperm v2, v4, v2, v3
; P8-AIX32-NEXT: stxvw4x v2, 0, r3
; P8-AIX32-NEXT: blr
;
; P7-AIX32-LABEL: test6:
; P7-AIX32: # %bb.0: # %entry
; P7-AIX32-NEXT: li r5, 0
; P7-AIX32-NEXT: stw r5, -16(r1)
; P7-AIX32-NEXT: lfiwzx f0, 0, r4
; P7-AIX32-NEXT: lwz r4, L..C2(r2) # %const.0
; P7-AIX32-NEXT: xxlxor v4, v4, v4
; P7-AIX32-NEXT: lxvw4x v3, 0, r4
; P7-AIX32-NEXT: addi r4, r1, -16
; P7-AIX32-NEXT: lxvw4x v4, 0, r4
; P7-AIX32-NEXT: xxspltw v2, vs0, 1
; P7-AIX32-NEXT: vperm v2, v4, v2, v3
; P7-AIX32-NEXT: stxvw4x v2, 0, r3
Expand Down
Loading
Loading