Skip to content

Commit fc59f2c

Browse files
authored
[PowerPC] special case small int constant for custom scalar_to_vector (#109850)
Special case small int constant in the PPC custom lowering of scalar_to_vector.
1 parent 54c93aa commit fc59f2c

File tree

7 files changed

+231
-98
lines changed

7 files changed

+231
-98
lines changed

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11595,6 +11595,15 @@ SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
1159511595

1159611596
MachineFunction &MF = DAG.getMachineFunction();
1159711597
SDValue Op0 = Op.getOperand(0);
11598+
EVT ValVT = Op0.getValueType();
11599+
unsigned EltSize = Op.getValueType().getScalarSizeInBits();
11600+
if (isa<ConstantSDNode>(Op0) && EltSize <= 32) {
11601+
int64_t IntVal = Op.getConstantOperandVal(0);
11602+
if (IntVal >= -16 && IntVal <= 15)
11603+
return getCanonicalConstSplat(IntVal, EltSize / 8, Op.getValueType(), DAG,
11604+
dl);
11605+
}
11606+
1159811607
ReuseLoadInfo RLI;
1159911608
if (Subtarget.hasLFIWAX() && Subtarget.hasVSX() &&
1160011609
Op.getValueType() == MVT::v4i32 && Op0.getOpcode() == ISD::LOAD &&
@@ -11619,7 +11628,6 @@ SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
1161911628
SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
1162011629

1162111630
SDValue Val = Op0;
11622-
EVT ValVT = Val.getValueType();
1162311631
// P10 hardware store forwarding requires that a single store contains all
1162411632
// the data for the load. P10 is able to merge a pair of adjacent stores. Try
1162511633
// to avoid load hit store on P10 when running binaries compiled for older

llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll

Lines changed: 30 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -26,18 +26,14 @@ define <2 x i64> @build_v2i64_extload_0(ptr nocapture noundef readonly %p) {
2626
;
2727
; PWR7-LE-LABEL: build_v2i64_extload_0:
2828
; PWR7-LE: # %bb.0: # %entry
29-
; PWR7-LE-NEXT: li 4, 0
30-
; PWR7-LE-NEXT: stw 4, -16(1)
31-
; PWR7-LE-NEXT: addis 4, 2, .LCPI0_0@toc@ha
3229
; PWR7-LE-NEXT: lfiwzx 0, 0, 3
33-
; PWR7-LE-NEXT: addi 3, 1, -16
34-
; PWR7-LE-NEXT: addi 4, 4, .LCPI0_0@toc@l
35-
; PWR7-LE-NEXT: lxvd2x 1, 0, 4
36-
; PWR7-LE-NEXT: xxspltw 35, 0, 1
30+
; PWR7-LE-NEXT: addis 3, 2, .LCPI0_0@toc@ha
31+
; PWR7-LE-NEXT: xxlxor 36, 36, 36
32+
; PWR7-LE-NEXT: addi 3, 3, .LCPI0_0@toc@l
33+
; PWR7-LE-NEXT: xxspltw 34, 0, 1
3734
; PWR7-LE-NEXT: lxvd2x 0, 0, 3
38-
; PWR7-LE-NEXT: xxswapd 34, 1
39-
; PWR7-LE-NEXT: xxswapd 36, 0
40-
; PWR7-LE-NEXT: vperm 2, 4, 3, 2
35+
; PWR7-LE-NEXT: xxswapd 35, 0
36+
; PWR7-LE-NEXT: vperm 2, 4, 2, 3
4137
; PWR7-LE-NEXT: blr
4238
;
4339
; PWR8-LE-LABEL: build_v2i64_extload_0:
@@ -357,18 +353,14 @@ define <4 x i32> @build_v4i32_load_0(ptr nocapture noundef readonly %p) {
357353
;
358354
; PWR7-LE-LABEL: build_v4i32_load_0:
359355
; PWR7-LE: # %bb.0: # %entry
360-
; PWR7-LE-NEXT: li 4, 0
361-
; PWR7-LE-NEXT: stw 4, -16(1)
362-
; PWR7-LE-NEXT: addis 4, 2, .LCPI8_0@toc@ha
363356
; PWR7-LE-NEXT: lfiwzx 0, 0, 3
364-
; PWR7-LE-NEXT: addi 3, 1, -16
365-
; PWR7-LE-NEXT: addi 4, 4, .LCPI8_0@toc@l
366-
; PWR7-LE-NEXT: lxvd2x 1, 0, 4
367-
; PWR7-LE-NEXT: xxspltw 35, 0, 1
357+
; PWR7-LE-NEXT: addis 3, 2, .LCPI8_0@toc@ha
358+
; PWR7-LE-NEXT: xxlxor 36, 36, 36
359+
; PWR7-LE-NEXT: addi 3, 3, .LCPI8_0@toc@l
360+
; PWR7-LE-NEXT: xxspltw 34, 0, 1
368361
; PWR7-LE-NEXT: lxvd2x 0, 0, 3
369-
; PWR7-LE-NEXT: xxswapd 34, 1
370-
; PWR7-LE-NEXT: xxswapd 36, 0
371-
; PWR7-LE-NEXT: vperm 2, 4, 3, 2
362+
; PWR7-LE-NEXT: xxswapd 35, 0
363+
; PWR7-LE-NEXT: vperm 2, 4, 2, 3
372364
; PWR7-LE-NEXT: blr
373365
;
374366
; PWR8-LE-LABEL: build_v4i32_load_0:
@@ -412,18 +404,14 @@ define <4 x i32> @build_v4i32_load_1(ptr nocapture noundef readonly %p) {
412404
;
413405
; PWR7-LE-LABEL: build_v4i32_load_1:
414406
; PWR7-LE: # %bb.0: # %entry
415-
; PWR7-LE-NEXT: li 4, 0
416-
; PWR7-LE-NEXT: stw 4, -16(1)
417-
; PWR7-LE-NEXT: addis 4, 2, .LCPI9_0@toc@ha
418407
; PWR7-LE-NEXT: lfiwzx 0, 0, 3
419-
; PWR7-LE-NEXT: addi 3, 1, -16
420-
; PWR7-LE-NEXT: addi 4, 4, .LCPI9_0@toc@l
421-
; PWR7-LE-NEXT: lxvd2x 1, 0, 4
422-
; PWR7-LE-NEXT: xxspltw 35, 0, 1
408+
; PWR7-LE-NEXT: addis 3, 2, .LCPI9_0@toc@ha
409+
; PWR7-LE-NEXT: xxlxor 36, 36, 36
410+
; PWR7-LE-NEXT: addi 3, 3, .LCPI9_0@toc@l
411+
; PWR7-LE-NEXT: xxspltw 34, 0, 1
423412
; PWR7-LE-NEXT: lxvd2x 0, 0, 3
424-
; PWR7-LE-NEXT: xxswapd 34, 1
425-
; PWR7-LE-NEXT: xxswapd 36, 0
426-
; PWR7-LE-NEXT: vperm 2, 3, 4, 2
413+
; PWR7-LE-NEXT: xxswapd 35, 0
414+
; PWR7-LE-NEXT: vperm 2, 2, 4, 3
427415
; PWR7-LE-NEXT: blr
428416
;
429417
; PWR8-LE-LABEL: build_v4i32_load_1:
@@ -469,18 +457,14 @@ define <4 x i32> @build_v4i32_load_2(ptr nocapture noundef readonly %p) {
469457
;
470458
; PWR7-LE-LABEL: build_v4i32_load_2:
471459
; PWR7-LE: # %bb.0: # %entry
472-
; PWR7-LE-NEXT: li 4, 0
473-
; PWR7-LE-NEXT: stw 4, -16(1)
474-
; PWR7-LE-NEXT: addis 4, 2, .LCPI10_0@toc@ha
475460
; PWR7-LE-NEXT: lfiwzx 0, 0, 3
476-
; PWR7-LE-NEXT: addi 3, 1, -16
477-
; PWR7-LE-NEXT: addi 4, 4, .LCPI10_0@toc@l
478-
; PWR7-LE-NEXT: lxvd2x 1, 0, 4
479-
; PWR7-LE-NEXT: xxspltw 35, 0, 1
461+
; PWR7-LE-NEXT: addis 3, 2, .LCPI10_0@toc@ha
462+
; PWR7-LE-NEXT: xxlxor 36, 36, 36
463+
; PWR7-LE-NEXT: addi 3, 3, .LCPI10_0@toc@l
464+
; PWR7-LE-NEXT: xxspltw 34, 0, 1
480465
; PWR7-LE-NEXT: lxvd2x 0, 0, 3
481-
; PWR7-LE-NEXT: xxswapd 34, 1
482-
; PWR7-LE-NEXT: xxswapd 36, 0
483-
; PWR7-LE-NEXT: vperm 2, 3, 4, 2
466+
; PWR7-LE-NEXT: xxswapd 35, 0
467+
; PWR7-LE-NEXT: vperm 2, 2, 4, 3
484468
; PWR7-LE-NEXT: blr
485469
;
486470
; PWR8-LE-LABEL: build_v4i32_load_2:
@@ -524,18 +508,14 @@ define <4 x i32> @build_v4i32_load_3(ptr nocapture noundef readonly %p) {
524508
;
525509
; PWR7-LE-LABEL: build_v4i32_load_3:
526510
; PWR7-LE: # %bb.0: # %entry
527-
; PWR7-LE-NEXT: li 4, 0
528-
; PWR7-LE-NEXT: stw 4, -16(1)
529-
; PWR7-LE-NEXT: addis 4, 2, .LCPI11_0@toc@ha
530511
; PWR7-LE-NEXT: lfiwzx 0, 0, 3
531-
; PWR7-LE-NEXT: addi 3, 1, -16
532-
; PWR7-LE-NEXT: addi 4, 4, .LCPI11_0@toc@l
533-
; PWR7-LE-NEXT: lxvd2x 1, 0, 4
534-
; PWR7-LE-NEXT: xxspltw 35, 0, 1
512+
; PWR7-LE-NEXT: addis 3, 2, .LCPI11_0@toc@ha
513+
; PWR7-LE-NEXT: xxlxor 36, 36, 36
514+
; PWR7-LE-NEXT: addi 3, 3, .LCPI11_0@toc@l
515+
; PWR7-LE-NEXT: xxspltw 34, 0, 1
535516
; PWR7-LE-NEXT: lxvd2x 0, 0, 3
536-
; PWR7-LE-NEXT: xxswapd 34, 1
537-
; PWR7-LE-NEXT: xxswapd 36, 0
538-
; PWR7-LE-NEXT: vperm 2, 3, 4, 2
517+
; PWR7-LE-NEXT: xxswapd 35, 0
518+
; PWR7-LE-NEXT: vperm 2, 2, 4, 3
539519
; PWR7-LE-NEXT: blr
540520
;
541521
; PWR8-LE-LABEL: build_v4i32_load_3:

llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -849,16 +849,12 @@ define dso_local <16 x i8> @no_RAUW_in_combine_during_legalize(ptr nocapture rea
849849
;
850850
; P8-AIX-32-LABEL: no_RAUW_in_combine_during_legalize:
851851
; P8-AIX-32: # %bb.0: # %entry
852-
; P8-AIX-32-NEXT: li r5, 0
853852
; P8-AIX-32-NEXT: slwi r4, r4, 2
854-
; P8-AIX-32-NEXT: xxlxor v3, v3, v3
855-
; P8-AIX-32-NEXT: stw r5, -16(r1)
853+
; P8-AIX-32-NEXT: xxlxor v2, v2, v2
856854
; P8-AIX-32-NEXT: lfiwzx f0, r3, r4
857-
; P8-AIX-32-NEXT: addi r3, r1, -16
858-
; P8-AIX-32-NEXT: lxvw4x vs1, 0, r3
859855
; P8-AIX-32-NEXT: xxspltw vs0, vs0, 1
860-
; P8-AIX-32-NEXT: xxmrghw v2, vs1, vs0
861-
; P8-AIX-32-NEXT: vmrghb v2, v2, v3
856+
; P8-AIX-32-NEXT: xxmrghw v3, v2, vs0
857+
; P8-AIX-32-NEXT: vmrghb v2, v3, v2
862858
; P8-AIX-32-NEXT: blr
863859
entry:
864860
%idx.ext = sext i32 %offset to i64
Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr -verify-machineinstrs \
3+
; RUN: -mtriple=powerpc64-- -mcpu=pwr7 < %s | FileCheck \
4+
; RUN: --check-prefix=PWR7-BE %s
5+
; RUN: llc -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr -verify-machineinstrs \
6+
; RUN: -mtriple=powerpc64-- -mcpu=pwr8 < %s | FileCheck \
7+
; RUN: --check-prefix=PWR8-BE %s
8+
; RUN: llc -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr -verify-machineinstrs \
9+
; RUN: -mtriple=powerpc64le-- -mcpu=pwr8 < %s | FileCheck \
10+
; RUN: --check-prefix=PWR8-LE %s
11+
12+
define <16 x i8> @i8(ptr nocapture noundef readonly %p) {
13+
; PWR7-BE-LABEL: i8:
14+
; PWR7-BE: # %bb.0: # %entry
15+
; PWR7-BE-NEXT: lxvw4x v3, 0, r3
16+
; PWR7-BE-NEXT: addis r3, r2, .LCPI0_0@toc@ha
17+
; PWR7-BE-NEXT: vspltisb v2, 10
18+
; PWR7-BE-NEXT: addi r3, r3, .LCPI0_0@toc@l
19+
; PWR7-BE-NEXT: lxvw4x v4, 0, r3
20+
; PWR7-BE-NEXT: vperm v2, v3, v2, v4
21+
; PWR7-BE-NEXT: blr
22+
;
23+
; PWR8-BE-LABEL: i8:
24+
; PWR8-BE: # %bb.0: # %entry
25+
; PWR8-BE-NEXT: lxvw4x v2, 0, r3
26+
; PWR8-BE-NEXT: addis r3, r2, .LCPI0_0@toc@ha
27+
; PWR8-BE-NEXT: addi r3, r3, .LCPI0_0@toc@l
28+
; PWR8-BE-NEXT: lxvw4x v3, 0, r3
29+
; PWR8-BE-NEXT: li r3, 10
30+
; PWR8-BE-NEXT: mtvsrwz v4, r3
31+
; PWR8-BE-NEXT: vperm v2, v2, v4, v3
32+
; PWR8-BE-NEXT: blr
33+
;
34+
; PWR8-LE-LABEL: i8:
35+
; PWR8-LE: # %bb.0: # %entry
36+
; PWR8-LE-NEXT: lxvd2x vs0, 0, r3
37+
; PWR8-LE-NEXT: addis r3, r2, .LCPI0_0@toc@ha
38+
; PWR8-LE-NEXT: addi r3, r3, .LCPI0_0@toc@l
39+
; PWR8-LE-NEXT: xxswapd v2, vs0
40+
; PWR8-LE-NEXT: lxvd2x vs0, 0, r3
41+
; PWR8-LE-NEXT: li r3, 10
42+
; PWR8-LE-NEXT: mtvsrd v4, r3
43+
; PWR8-LE-NEXT: xxswapd v3, vs0
44+
; PWR8-LE-NEXT: vperm v2, v4, v2, v3
45+
; PWR8-LE-NEXT: blr
46+
entry:
47+
%0 = load <16 x i8>, ptr %p, align 16
48+
%vecinit1 = insertelement <16 x i8> %0, i8 10, i64 1
49+
ret <16 x i8> %vecinit1
50+
}
51+
52+
define <8 x i16> @i16(ptr nocapture noundef readonly %p) {
53+
; PWR7-BE-LABEL: i16:
54+
; PWR7-BE: # %bb.0: # %entry
55+
; PWR7-BE-NEXT: lxvw4x v3, 0, r3
56+
; PWR7-BE-NEXT: addis r3, r2, .LCPI1_0@toc@ha
57+
; PWR7-BE-NEXT: vspltish v2, 9
58+
; PWR7-BE-NEXT: addi r3, r3, .LCPI1_0@toc@l
59+
; PWR7-BE-NEXT: lxvw4x v4, 0, r3
60+
; PWR7-BE-NEXT: vperm v2, v3, v2, v4
61+
; PWR7-BE-NEXT: blr
62+
;
63+
; PWR8-BE-LABEL: i16:
64+
; PWR8-BE: # %bb.0: # %entry
65+
; PWR8-BE-NEXT: lxvw4x v2, 0, r3
66+
; PWR8-BE-NEXT: addis r3, r2, .LCPI1_0@toc@ha
67+
; PWR8-BE-NEXT: addi r3, r3, .LCPI1_0@toc@l
68+
; PWR8-BE-NEXT: lxvw4x v3, 0, r3
69+
; PWR8-BE-NEXT: li r3, 9
70+
; PWR8-BE-NEXT: mtvsrwz v4, r3
71+
; PWR8-BE-NEXT: vperm v2, v2, v4, v3
72+
; PWR8-BE-NEXT: blr
73+
;
74+
; PWR8-LE-LABEL: i16:
75+
; PWR8-LE: # %bb.0: # %entry
76+
; PWR8-LE-NEXT: lxvd2x vs0, 0, r3
77+
; PWR8-LE-NEXT: addis r3, r2, .LCPI1_0@toc@ha
78+
; PWR8-LE-NEXT: addi r3, r3, .LCPI1_0@toc@l
79+
; PWR8-LE-NEXT: xxswapd v2, vs0
80+
; PWR8-LE-NEXT: lxvd2x vs0, 0, r3
81+
; PWR8-LE-NEXT: li r3, 9
82+
; PWR8-LE-NEXT: mtvsrd v4, r3
83+
; PWR8-LE-NEXT: xxswapd v3, vs0
84+
; PWR8-LE-NEXT: vperm v2, v4, v2, v3
85+
; PWR8-LE-NEXT: blr
86+
entry:
87+
%0 = load <8 x i16>, ptr %p, align 16
88+
%vecinit1 = insertelement <8 x i16> %0, i16 9, i64 1
89+
ret <8 x i16> %vecinit1
90+
}
91+
92+
define <4 x i32> @i32(ptr nocapture noundef readonly %p) {
93+
; PWR7-BE-LABEL: i32:
94+
; PWR7-BE: # %bb.0: # %entry
95+
; PWR7-BE-NEXT: lxvw4x v3, 0, r3
96+
; PWR7-BE-NEXT: addis r3, r2, .LCPI2_0@toc@ha
97+
; PWR7-BE-NEXT: vspltisw v2, 7
98+
; PWR7-BE-NEXT: addi r3, r3, .LCPI2_0@toc@l
99+
; PWR7-BE-NEXT: lxvw4x v4, 0, r3
100+
; PWR7-BE-NEXT: vperm v2, v3, v2, v4
101+
; PWR7-BE-NEXT: blr
102+
;
103+
; PWR8-BE-LABEL: i32:
104+
; PWR8-BE: # %bb.0: # %entry
105+
; PWR8-BE-NEXT: lxvw4x v2, 0, r3
106+
; PWR8-BE-NEXT: addis r3, r2, .LCPI2_0@toc@ha
107+
; PWR8-BE-NEXT: addi r3, r3, .LCPI2_0@toc@l
108+
; PWR8-BE-NEXT: lxvw4x v3, 0, r3
109+
; PWR8-BE-NEXT: li r3, 7
110+
; PWR8-BE-NEXT: mtvsrwz v4, r3
111+
; PWR8-BE-NEXT: vperm v2, v2, v4, v3
112+
; PWR8-BE-NEXT: blr
113+
;
114+
; PWR8-LE-LABEL: i32:
115+
; PWR8-LE: # %bb.0: # %entry
116+
; PWR8-LE-NEXT: lxvd2x vs0, 0, r3
117+
; PWR8-LE-NEXT: addis r3, r2, .LCPI2_0@toc@ha
118+
; PWR8-LE-NEXT: addi r3, r3, .LCPI2_0@toc@l
119+
; PWR8-LE-NEXT: xxswapd v2, vs0
120+
; PWR8-LE-NEXT: lxvd2x vs0, 0, r3
121+
; PWR8-LE-NEXT: li r3, 7
122+
; PWR8-LE-NEXT: mtvsrwz v4, r3
123+
; PWR8-LE-NEXT: xxswapd v3, vs0
124+
; PWR8-LE-NEXT: vperm v2, v4, v2, v3
125+
; PWR8-LE-NEXT: blr
126+
entry:
127+
%0 = load <4 x i32>, ptr %p, align 16
128+
%vecinit1 = insertelement <4 x i32> %0, i32 7, i64 1
129+
ret <4 x i32> %vecinit1
130+
}
131+
132+
define <2 x i64> @i64(ptr nocapture noundef readonly %p) {
133+
; PWR7-BE-LABEL: i64:
134+
; PWR7-BE: # %bb.0: # %entry
135+
; PWR7-BE-NEXT: lxvd2x v2, 0, r3
136+
; PWR7-BE-NEXT: li r3, 10
137+
; PWR7-BE-NEXT: std r3, -16(r1)
138+
; PWR7-BE-NEXT: std r3, -8(r1)
139+
; PWR7-BE-NEXT: addi r3, r1, -16
140+
; PWR7-BE-NEXT: lxvd2x v3, 0, r3
141+
; PWR7-BE-NEXT: xxmrghd v2, v2, v3
142+
; PWR7-BE-NEXT: blr
143+
;
144+
; PWR8-BE-LABEL: i64:
145+
; PWR8-BE: # %bb.0: # %entry
146+
; PWR8-BE-NEXT: lxvd2x v2, 0, r3
147+
; PWR8-BE-NEXT: li r3, 10
148+
; PWR8-BE-NEXT: mtfprd f0, r3
149+
; PWR8-BE-NEXT: xxmrghd v2, v2, vs0
150+
; PWR8-BE-NEXT: blr
151+
;
152+
; PWR8-LE-LABEL: i64:
153+
; PWR8-LE: # %bb.0: # %entry
154+
; PWR8-LE-NEXT: lxvd2x vs0, 0, r3
155+
; PWR8-LE-NEXT: li r3, 10
156+
; PWR8-LE-NEXT: xxswapd v2, vs0
157+
; PWR8-LE-NEXT: mtfprd f0, r3
158+
; PWR8-LE-NEXT: xxpermdi v2, vs0, v2, 1
159+
; PWR8-LE-NEXT: blr
160+
entry:
161+
%0 = load <2 x i64>, ptr %p, align 16
162+
%vecinit1 = insertelement <2 x i64> %0, i64 10, i64 1
163+
ret <2 x i64> %vecinit1
164+
}

llvm/test/CodeGen/PowerPC/load-and-splat.ll

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -356,39 +356,31 @@ define void @test6(ptr %a, ptr %in) {
356356
;
357357
; P9-AIX32-LABEL: test6:
358358
; P9-AIX32: # %bb.0: # %entry
359-
; P9-AIX32-NEXT: li r5, 0
360-
; P9-AIX32-NEXT: stw r5, -16(r1)
361359
; P9-AIX32-NEXT: lwz r5, L..C2(r2) # %const.0
362360
; P9-AIX32-NEXT: lxvwsx vs1, 0, r4
363-
; P9-AIX32-NEXT: lxv vs2, -16(r1)
361+
; P9-AIX32-NEXT: xxlxor vs2, vs2, vs2
364362
; P9-AIX32-NEXT: lxv vs0, 0(r5)
365363
; P9-AIX32-NEXT: xxperm vs1, vs2, vs0
366364
; P9-AIX32-NEXT: stxv vs1, 0(r3)
367365
; P9-AIX32-NEXT: blr
368366
;
369367
; P8-AIX32-LABEL: test6:
370368
; P8-AIX32: # %bb.0: # %entry
371-
; P8-AIX32-NEXT: li r5, 0
372-
; P8-AIX32-NEXT: stw r5, -16(r1)
373369
; P8-AIX32-NEXT: lfiwzx f0, 0, r4
374370
; P8-AIX32-NEXT: lwz r4, L..C2(r2) # %const.0
371+
; P8-AIX32-NEXT: xxlxor v4, v4, v4
375372
; P8-AIX32-NEXT: lxvw4x v3, 0, r4
376-
; P8-AIX32-NEXT: addi r4, r1, -16
377-
; P8-AIX32-NEXT: lxvw4x v4, 0, r4
378373
; P8-AIX32-NEXT: xxspltw v2, vs0, 1
379374
; P8-AIX32-NEXT: vperm v2, v4, v2, v3
380375
; P8-AIX32-NEXT: stxvw4x v2, 0, r3
381376
; P8-AIX32-NEXT: blr
382377
;
383378
; P7-AIX32-LABEL: test6:
384379
; P7-AIX32: # %bb.0: # %entry
385-
; P7-AIX32-NEXT: li r5, 0
386-
; P7-AIX32-NEXT: stw r5, -16(r1)
387380
; P7-AIX32-NEXT: lfiwzx f0, 0, r4
388381
; P7-AIX32-NEXT: lwz r4, L..C2(r2) # %const.0
382+
; P7-AIX32-NEXT: xxlxor v4, v4, v4
389383
; P7-AIX32-NEXT: lxvw4x v3, 0, r4
390-
; P7-AIX32-NEXT: addi r4, r1, -16
391-
; P7-AIX32-NEXT: lxvw4x v4, 0, r4
392384
; P7-AIX32-NEXT: xxspltw v2, vs0, 1
393385
; P7-AIX32-NEXT: vperm v2, v4, v2, v3
394386
; P7-AIX32-NEXT: stxvw4x v2, 0, r3

0 commit comments

Comments
 (0)