Skip to content

Commit 26ba186

Browse files
authored
[PowerPC] Improve pwr7 codegen for v4i8 load (#104507)
There are no partial vector loads on pwr7 so current v4i8 codegen is an int load then store to vector sized temp and re-load as vector. Try to use lfiwax to load 32 bits into an FP reg and take advantage of VSX FP and vector reg sharing to move the result to the right vector position.
1 parent 32bc670 commit 26ba186

11 files changed

+303
-465
lines changed

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11492,13 +11492,33 @@ SDValue PPCTargetLowering::LowerIS_FPCLASS(SDValue Op,
1149211492
SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
1149311493
SelectionDAG &DAG) const {
1149411494
SDLoc dl(Op);
11495+
11496+
MachineFunction &MF = DAG.getMachineFunction();
11497+
SDValue Op0 = Op.getOperand(0);
11498+
ReuseLoadInfo RLI;
11499+
if (Subtarget.hasLFIWAX() && Subtarget.hasVSX() &&
11500+
Op.getValueType() == MVT::v4i32 && Op0.getOpcode() == ISD::LOAD &&
11501+
Op0.getValueType() == MVT::i32 && Op0.hasOneUse() &&
11502+
canReuseLoadAddress(Op0, MVT::i32, RLI, DAG, ISD::NON_EXTLOAD)) {
11503+
11504+
MachineMemOperand *MMO =
11505+
MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
11506+
RLI.Alignment, RLI.AAInfo, RLI.Ranges);
11507+
SDValue Ops[] = {RLI.Chain, RLI.Ptr, DAG.getValueType(Op.getValueType())};
11508+
SDValue Bits = DAG.getMemIntrinsicNode(
11509+
PPCISD::LD_SPLAT, dl, DAG.getVTList(MVT::v4i32, MVT::Other), Ops,
11510+
MVT::i32, MMO);
11511+
spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
11512+
return Bits.getValue(0);
11513+
}
11514+
1149511515
// Create a stack slot that is 16-byte aligned.
11496-
MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
11516+
MachineFrameInfo &MFI = MF.getFrameInfo();
1149711517
int FrameIdx = MFI.CreateStackObject(16, Align(16), false);
1149811518
EVT PtrVT = getPointerTy(DAG.getDataLayout());
1149911519
SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
1150011520

11501-
SDValue Val = Op.getOperand(0);
11521+
SDValue Val = Op0;
1150211522
EVT ValVT = Val.getValueType();
1150311523
// P10 hardware store forwarding requires that a single store contains all
1150411524
// the data for the load. P10 is able to merge a pair of adjacent stores. Try

llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll

Lines changed: 44 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -27,20 +27,17 @@ define <2 x i64> @build_v2i64_extload_0(ptr nocapture noundef readonly %p) {
2727
; PWR7-LE-LABEL: build_v2i64_extload_0:
2828
; PWR7-LE: # %bb.0: # %entry
2929
; PWR7-LE-NEXT: li 4, 0
30-
; PWR7-LE-NEXT: lwz 3, 0(3)
3130
; PWR7-LE-NEXT: stw 4, -16(1)
3231
; PWR7-LE-NEXT: addis 4, 2, .LCPI0_0@toc@ha
32+
; PWR7-LE-NEXT: lfiwzx 0, 0, 3
33+
; PWR7-LE-NEXT: addi 3, 1, -16
3334
; PWR7-LE-NEXT: addi 4, 4, .LCPI0_0@toc@l
34-
; PWR7-LE-NEXT: stw 3, -32(1)
35-
; PWR7-LE-NEXT: addi 3, 1, -32
36-
; PWR7-LE-NEXT: lxvd2x 0, 0, 4
37-
; PWR7-LE-NEXT: addi 4, 1, -16
3835
; PWR7-LE-NEXT: lxvd2x 1, 0, 4
39-
; PWR7-LE-NEXT: xxswapd 34, 0
36+
; PWR7-LE-NEXT: xxspltw 35, 0, 1
4037
; PWR7-LE-NEXT: lxvd2x 0, 0, 3
41-
; PWR7-LE-NEXT: xxswapd 35, 1
38+
; PWR7-LE-NEXT: xxswapd 34, 1
4239
; PWR7-LE-NEXT: xxswapd 36, 0
43-
; PWR7-LE-NEXT: vperm 2, 3, 4, 2
40+
; PWR7-LE-NEXT: vperm 2, 4, 3, 2
4441
; PWR7-LE-NEXT: blr
4542
;
4643
; PWR8-LE-LABEL: build_v2i64_extload_0:
@@ -337,17 +334,13 @@ entry:
337334
define <4 x i32> @build_v4i32_load_0(ptr nocapture noundef readonly %p) {
338335
; PWR7-BE-LABEL: build_v4i32_load_0:
339336
; PWR7-BE: # %bb.0: # %entry
340-
; PWR7-BE-NEXT: lwz 3, 0(3)
341-
; PWR7-BE-NEXT: xxlxor 36, 36, 36
342-
; PWR7-BE-NEXT: sldi 3, 3, 32
343-
; PWR7-BE-NEXT: std 3, -32(1)
344-
; PWR7-BE-NEXT: std 3, -24(1)
337+
; PWR7-BE-NEXT: lfiwzx 0, 0, 3
345338
; PWR7-BE-NEXT: addis 3, 2, .LCPI8_0@toc@ha
339+
; PWR7-BE-NEXT: xxlxor 36, 36, 36
346340
; PWR7-BE-NEXT: addi 3, 3, .LCPI8_0@toc@l
347-
; PWR7-BE-NEXT: lxvw4x 34, 0, 3
348-
; PWR7-BE-NEXT: addi 3, 1, -32
349341
; PWR7-BE-NEXT: lxvw4x 35, 0, 3
350-
; PWR7-BE-NEXT: vperm 2, 3, 4, 2
342+
; PWR7-BE-NEXT: xxspltw 34, 0, 1
343+
; PWR7-BE-NEXT: vperm 2, 2, 4, 3
351344
; PWR7-BE-NEXT: blr
352345
;
353346
; PWR8-BE-LABEL: build_v4i32_load_0:
@@ -365,20 +358,17 @@ define <4 x i32> @build_v4i32_load_0(ptr nocapture noundef readonly %p) {
365358
; PWR7-LE-LABEL: build_v4i32_load_0:
366359
; PWR7-LE: # %bb.0: # %entry
367360
; PWR7-LE-NEXT: li 4, 0
368-
; PWR7-LE-NEXT: lwz 3, 0(3)
369361
; PWR7-LE-NEXT: stw 4, -16(1)
370362
; PWR7-LE-NEXT: addis 4, 2, .LCPI8_0@toc@ha
363+
; PWR7-LE-NEXT: lfiwzx 0, 0, 3
364+
; PWR7-LE-NEXT: addi 3, 1, -16
371365
; PWR7-LE-NEXT: addi 4, 4, .LCPI8_0@toc@l
372-
; PWR7-LE-NEXT: stw 3, -32(1)
373-
; PWR7-LE-NEXT: addi 3, 1, -32
374-
; PWR7-LE-NEXT: lxvd2x 0, 0, 4
375-
; PWR7-LE-NEXT: addi 4, 1, -16
376366
; PWR7-LE-NEXT: lxvd2x 1, 0, 4
377-
; PWR7-LE-NEXT: xxswapd 34, 0
367+
; PWR7-LE-NEXT: xxspltw 35, 0, 1
378368
; PWR7-LE-NEXT: lxvd2x 0, 0, 3
379-
; PWR7-LE-NEXT: xxswapd 35, 1
369+
; PWR7-LE-NEXT: xxswapd 34, 1
380370
; PWR7-LE-NEXT: xxswapd 36, 0
381-
; PWR7-LE-NEXT: vperm 2, 3, 4, 2
371+
; PWR7-LE-NEXT: vperm 2, 4, 3, 2
382372
; PWR7-LE-NEXT: blr
383373
;
384374
; PWR8-LE-LABEL: build_v4i32_load_0:
@@ -400,17 +390,13 @@ entry:
400390
define <4 x i32> @build_v4i32_load_1(ptr nocapture noundef readonly %p) {
401391
; PWR7-BE-LABEL: build_v4i32_load_1:
402392
; PWR7-BE: # %bb.0: # %entry
403-
; PWR7-BE-NEXT: lwz 3, 0(3)
404-
; PWR7-BE-NEXT: xxlxor 36, 36, 36
405-
; PWR7-BE-NEXT: sldi 3, 3, 32
406-
; PWR7-BE-NEXT: std 3, -16(1)
407-
; PWR7-BE-NEXT: std 3, -8(1)
393+
; PWR7-BE-NEXT: lfiwzx 0, 0, 3
408394
; PWR7-BE-NEXT: addis 3, 2, .LCPI9_0@toc@ha
395+
; PWR7-BE-NEXT: xxlxor 36, 36, 36
409396
; PWR7-BE-NEXT: addi 3, 3, .LCPI9_0@toc@l
410-
; PWR7-BE-NEXT: lxvw4x 34, 0, 3
411-
; PWR7-BE-NEXT: addi 3, 1, -16
412397
; PWR7-BE-NEXT: lxvw4x 35, 0, 3
413-
; PWR7-BE-NEXT: vperm 2, 4, 3, 2
398+
; PWR7-BE-NEXT: xxspltw 34, 0, 1
399+
; PWR7-BE-NEXT: vperm 2, 4, 2, 3
414400
; PWR7-BE-NEXT: blr
415401
;
416402
; PWR8-BE-LABEL: build_v4i32_load_1:
@@ -427,20 +413,17 @@ define <4 x i32> @build_v4i32_load_1(ptr nocapture noundef readonly %p) {
427413
; PWR7-LE-LABEL: build_v4i32_load_1:
428414
; PWR7-LE: # %bb.0: # %entry
429415
; PWR7-LE-NEXT: li 4, 0
430-
; PWR7-LE-NEXT: lwz 3, 0(3)
431-
; PWR7-LE-NEXT: stw 4, -32(1)
416+
; PWR7-LE-NEXT: stw 4, -16(1)
432417
; PWR7-LE-NEXT: addis 4, 2, .LCPI9_0@toc@ha
433-
; PWR7-LE-NEXT: addi 4, 4, .LCPI9_0@toc@l
434-
; PWR7-LE-NEXT: stw 3, -16(1)
418+
; PWR7-LE-NEXT: lfiwzx 0, 0, 3
435419
; PWR7-LE-NEXT: addi 3, 1, -16
436-
; PWR7-LE-NEXT: lxvd2x 0, 0, 4
437-
; PWR7-LE-NEXT: addi 4, 1, -32
420+
; PWR7-LE-NEXT: addi 4, 4, .LCPI9_0@toc@l
438421
; PWR7-LE-NEXT: lxvd2x 1, 0, 4
439-
; PWR7-LE-NEXT: xxswapd 34, 0
422+
; PWR7-LE-NEXT: xxspltw 35, 0, 1
440423
; PWR7-LE-NEXT: lxvd2x 0, 0, 3
441-
; PWR7-LE-NEXT: xxswapd 35, 1
424+
; PWR7-LE-NEXT: xxswapd 34, 1
442425
; PWR7-LE-NEXT: xxswapd 36, 0
443-
; PWR7-LE-NEXT: vperm 2, 4, 3, 2
426+
; PWR7-LE-NEXT: vperm 2, 3, 4, 2
444427
; PWR7-LE-NEXT: blr
445428
;
446429
; PWR8-LE-LABEL: build_v4i32_load_1:
@@ -463,17 +446,13 @@ entry:
463446
define <4 x i32> @build_v4i32_load_2(ptr nocapture noundef readonly %p) {
464447
; PWR7-BE-LABEL: build_v4i32_load_2:
465448
; PWR7-BE: # %bb.0: # %entry
466-
; PWR7-BE-NEXT: lwz 3, 0(3)
467-
; PWR7-BE-NEXT: xxlxor 36, 36, 36
468-
; PWR7-BE-NEXT: sldi 3, 3, 32
469-
; PWR7-BE-NEXT: std 3, -16(1)
470-
; PWR7-BE-NEXT: std 3, -8(1)
449+
; PWR7-BE-NEXT: lfiwzx 0, 0, 3
471450
; PWR7-BE-NEXT: addis 3, 2, .LCPI10_0@toc@ha
451+
; PWR7-BE-NEXT: xxlxor 36, 36, 36
472452
; PWR7-BE-NEXT: addi 3, 3, .LCPI10_0@toc@l
473-
; PWR7-BE-NEXT: lxvw4x 34, 0, 3
474-
; PWR7-BE-NEXT: addi 3, 1, -16
475453
; PWR7-BE-NEXT: lxvw4x 35, 0, 3
476-
; PWR7-BE-NEXT: vperm 2, 4, 3, 2
454+
; PWR7-BE-NEXT: xxspltw 34, 0, 1
455+
; PWR7-BE-NEXT: vperm 2, 4, 2, 3
477456
; PWR7-BE-NEXT: blr
478457
;
479458
; PWR8-BE-LABEL: build_v4i32_load_2:
@@ -491,20 +470,17 @@ define <4 x i32> @build_v4i32_load_2(ptr nocapture noundef readonly %p) {
491470
; PWR7-LE-LABEL: build_v4i32_load_2:
492471
; PWR7-LE: # %bb.0: # %entry
493472
; PWR7-LE-NEXT: li 4, 0
494-
; PWR7-LE-NEXT: lwz 3, 0(3)
495-
; PWR7-LE-NEXT: stw 4, -32(1)
473+
; PWR7-LE-NEXT: stw 4, -16(1)
496474
; PWR7-LE-NEXT: addis 4, 2, .LCPI10_0@toc@ha
497-
; PWR7-LE-NEXT: addi 4, 4, .LCPI10_0@toc@l
498-
; PWR7-LE-NEXT: stw 3, -16(1)
475+
; PWR7-LE-NEXT: lfiwzx 0, 0, 3
499476
; PWR7-LE-NEXT: addi 3, 1, -16
500-
; PWR7-LE-NEXT: lxvd2x 0, 0, 4
501-
; PWR7-LE-NEXT: addi 4, 1, -32
477+
; PWR7-LE-NEXT: addi 4, 4, .LCPI10_0@toc@l
502478
; PWR7-LE-NEXT: lxvd2x 1, 0, 4
503-
; PWR7-LE-NEXT: xxswapd 34, 0
479+
; PWR7-LE-NEXT: xxspltw 35, 0, 1
504480
; PWR7-LE-NEXT: lxvd2x 0, 0, 3
505-
; PWR7-LE-NEXT: xxswapd 35, 1
481+
; PWR7-LE-NEXT: xxswapd 34, 1
506482
; PWR7-LE-NEXT: xxswapd 36, 0
507-
; PWR7-LE-NEXT: vperm 2, 4, 3, 2
483+
; PWR7-LE-NEXT: vperm 2, 3, 4, 2
508484
; PWR7-LE-NEXT: blr
509485
;
510486
; PWR8-LE-LABEL: build_v4i32_load_2:
@@ -526,17 +502,13 @@ entry:
526502
define <4 x i32> @build_v4i32_load_3(ptr nocapture noundef readonly %p) {
527503
; PWR7-BE-LABEL: build_v4i32_load_3:
528504
; PWR7-BE: # %bb.0: # %entry
529-
; PWR7-BE-NEXT: lwz 3, 0(3)
530-
; PWR7-BE-NEXT: xxlxor 36, 36, 36
531-
; PWR7-BE-NEXT: sldi 3, 3, 32
532-
; PWR7-BE-NEXT: std 3, -16(1)
533-
; PWR7-BE-NEXT: std 3, -8(1)
505+
; PWR7-BE-NEXT: lfiwzx 0, 0, 3
534506
; PWR7-BE-NEXT: addis 3, 2, .LCPI11_0@toc@ha
507+
; PWR7-BE-NEXT: xxlxor 36, 36, 36
535508
; PWR7-BE-NEXT: addi 3, 3, .LCPI11_0@toc@l
536-
; PWR7-BE-NEXT: lxvw4x 34, 0, 3
537-
; PWR7-BE-NEXT: addi 3, 1, -16
538509
; PWR7-BE-NEXT: lxvw4x 35, 0, 3
539-
; PWR7-BE-NEXT: vperm 2, 4, 3, 2
510+
; PWR7-BE-NEXT: xxspltw 34, 0, 1
511+
; PWR7-BE-NEXT: vperm 2, 4, 2, 3
540512
; PWR7-BE-NEXT: blr
541513
;
542514
; PWR8-BE-LABEL: build_v4i32_load_3:
@@ -553,20 +525,17 @@ define <4 x i32> @build_v4i32_load_3(ptr nocapture noundef readonly %p) {
553525
; PWR7-LE-LABEL: build_v4i32_load_3:
554526
; PWR7-LE: # %bb.0: # %entry
555527
; PWR7-LE-NEXT: li 4, 0
556-
; PWR7-LE-NEXT: lwz 3, 0(3)
557-
; PWR7-LE-NEXT: stw 4, -32(1)
528+
; PWR7-LE-NEXT: stw 4, -16(1)
558529
; PWR7-LE-NEXT: addis 4, 2, .LCPI11_0@toc@ha
559-
; PWR7-LE-NEXT: addi 4, 4, .LCPI11_0@toc@l
560-
; PWR7-LE-NEXT: stw 3, -16(1)
530+
; PWR7-LE-NEXT: lfiwzx 0, 0, 3
561531
; PWR7-LE-NEXT: addi 3, 1, -16
562-
; PWR7-LE-NEXT: lxvd2x 0, 0, 4
563-
; PWR7-LE-NEXT: addi 4, 1, -32
532+
; PWR7-LE-NEXT: addi 4, 4, .LCPI11_0@toc@l
564533
; PWR7-LE-NEXT: lxvd2x 1, 0, 4
565-
; PWR7-LE-NEXT: xxswapd 34, 0
534+
; PWR7-LE-NEXT: xxspltw 35, 0, 1
566535
; PWR7-LE-NEXT: lxvd2x 0, 0, 3
567-
; PWR7-LE-NEXT: xxswapd 35, 1
536+
; PWR7-LE-NEXT: xxswapd 34, 1
568537
; PWR7-LE-NEXT: xxswapd 36, 0
569-
; PWR7-LE-NEXT: vperm 2, 4, 3, 2
538+
; PWR7-LE-NEXT: vperm 2, 3, 4, 2
570539
; PWR7-LE-NEXT: blr
571540
;
572541
; PWR8-LE-LABEL: build_v4i32_load_3:

llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll

Lines changed: 21 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -536,15 +536,12 @@ define dso_local <8 x i16> @testmrglb3(ptr nocapture readonly %a) local_unnamed_
536536
;
537537
; P8-AIX-32-LABEL: testmrglb3:
538538
; P8-AIX-32: # %bb.0: # %entry
539-
; P8-AIX-32-NEXT: lwz r4, 4(r3)
539+
; P8-AIX-32-NEXT: li r4, 4
540+
; P8-AIX-32-NEXT: lfiwzx f1, 0, r3
540541
; P8-AIX-32-NEXT: xxlxor v3, v3, v3
541-
; P8-AIX-32-NEXT: stw r4, -16(r1)
542-
; P8-AIX-32-NEXT: lwz r3, 0(r3)
543-
; P8-AIX-32-NEXT: stw r3, -32(r1)
544-
; P8-AIX-32-NEXT: addi r3, r1, -16
545-
; P8-AIX-32-NEXT: lxvw4x vs0, 0, r3
546-
; P8-AIX-32-NEXT: addi r3, r1, -32
547-
; P8-AIX-32-NEXT: lxvw4x vs1, 0, r3
542+
; P8-AIX-32-NEXT: lfiwzx f0, r3, r4
543+
; P8-AIX-32-NEXT: xxspltw vs1, vs1, 1
544+
; P8-AIX-32-NEXT: xxspltw vs0, vs0, 1
548545
; P8-AIX-32-NEXT: xxmrghw v2, vs1, vs0
549546
; P8-AIX-32-NEXT: vmrghb v2, v3, v2
550547
; P8-AIX-32-NEXT: blr
@@ -852,17 +849,15 @@ define dso_local <16 x i8> @no_RAUW_in_combine_during_legalize(ptr nocapture rea
852849
;
853850
; P8-AIX-32-LABEL: no_RAUW_in_combine_during_legalize:
854851
; P8-AIX-32: # %bb.0: # %entry
852+
; P8-AIX-32-NEXT: li r5, 0
855853
; P8-AIX-32-NEXT: slwi r4, r4, 2
856854
; P8-AIX-32-NEXT: xxlxor v3, v3, v3
857-
; P8-AIX-32-NEXT: lwzx r3, r3, r4
858-
; P8-AIX-32-NEXT: li r4, 0
859-
; P8-AIX-32-NEXT: stw r4, -32(r1)
860-
; P8-AIX-32-NEXT: stw r3, -16(r1)
861-
; P8-AIX-32-NEXT: addi r3, r1, -32
862-
; P8-AIX-32-NEXT: lxvw4x vs0, 0, r3
855+
; P8-AIX-32-NEXT: stw r5, -16(r1)
856+
; P8-AIX-32-NEXT: lfiwzx f0, r3, r4
863857
; P8-AIX-32-NEXT: addi r3, r1, -16
864858
; P8-AIX-32-NEXT: lxvw4x vs1, 0, r3
865-
; P8-AIX-32-NEXT: xxmrghw v2, vs0, vs1
859+
; P8-AIX-32-NEXT: xxspltw vs0, vs0, 1
860+
; P8-AIX-32-NEXT: xxmrghw v2, vs1, vs0
866861
; P8-AIX-32-NEXT: vmrghb v2, v2, v3
867862
; P8-AIX-32-NEXT: blr
868863
entry:
@@ -1026,14 +1021,11 @@ define dso_local <2 x i64> @testSplat8(ptr nocapture readonly %ptr) local_unname
10261021
;
10271022
; P8-AIX-32-LABEL: testSplat8:
10281023
; P8-AIX-32: # %bb.0: # %entry
1029-
; P8-AIX-32-NEXT: lwz r4, 4(r3)
1030-
; P8-AIX-32-NEXT: stw r4, -16(r1)
1031-
; P8-AIX-32-NEXT: lwz r3, 0(r3)
1032-
; P8-AIX-32-NEXT: stw r3, -32(r1)
1033-
; P8-AIX-32-NEXT: addi r3, r1, -16
1034-
; P8-AIX-32-NEXT: lxvw4x vs0, 0, r3
1035-
; P8-AIX-32-NEXT: addi r3, r1, -32
1036-
; P8-AIX-32-NEXT: lxvw4x vs1, 0, r3
1024+
; P8-AIX-32-NEXT: li r4, 4
1025+
; P8-AIX-32-NEXT: lfiwzx f1, 0, r3
1026+
; P8-AIX-32-NEXT: lfiwzx f0, r3, r4
1027+
; P8-AIX-32-NEXT: xxspltw vs1, vs1, 1
1028+
; P8-AIX-32-NEXT: xxspltw vs0, vs0, 1
10371029
; P8-AIX-32-NEXT: xxmrghw vs0, vs1, vs0
10381030
; P8-AIX-32-NEXT: xxmrghd v2, vs0, vs0
10391031
; P8-AIX-32-NEXT: blr
@@ -1081,17 +1073,14 @@ define <2 x i64> @testSplati64_0(ptr nocapture readonly %ptr) #0 {
10811073
;
10821074
; P8-AIX-32-LABEL: testSplati64_0:
10831075
; P8-AIX-32: # %bb.0: # %entry
1084-
; P8-AIX-32-NEXT: lwz r4, 0(r3)
1085-
; P8-AIX-32-NEXT: lwz r3, 4(r3)
1086-
; P8-AIX-32-NEXT: stw r3, -16(r1)
1076+
; P8-AIX-32-NEXT: li r4, 4
1077+
; P8-AIX-32-NEXT: lfiwzx f0, r3, r4
1078+
; P8-AIX-32-NEXT: xxspltw v2, vs0, 1
1079+
; P8-AIX-32-NEXT: lfiwzx f0, 0, r3
10871080
; P8-AIX-32-NEXT: lwz r3, L..C3(r2) # %const.0
1088-
; P8-AIX-32-NEXT: stw r4, -32(r1)
1089-
; P8-AIX-32-NEXT: lxvw4x v2, 0, r3
1090-
; P8-AIX-32-NEXT: addi r3, r1, -16
1091-
; P8-AIX-32-NEXT: lxvw4x v3, 0, r3
1092-
; P8-AIX-32-NEXT: addi r3, r1, -32
10931081
; P8-AIX-32-NEXT: lxvw4x v4, 0, r3
1094-
; P8-AIX-32-NEXT: vperm v2, v4, v3, v2
1082+
; P8-AIX-32-NEXT: xxspltw v3, vs0, 1
1083+
; P8-AIX-32-NEXT: vperm v2, v3, v2, v4
10951084
; P8-AIX-32-NEXT: blr
10961085
entry:
10971086
%0 = load <1 x i64>, ptr %ptr, align 8

0 commit comments

Comments
 (0)