Skip to content

Commit 477b650

Browse files
author
QingShan Zhang
committed
[PowerPC] Select the D-Form load if we know its offset meets the requirement
The LD/STD likewise instruction are selected only when the alignment in the load/store >= 4 to deal with the case that the offset might not be known(i.e. relocations). That means we have to select the X-Form load for %0 = load i64, i64* %arrayidx, align 2 In fact, we can still select the D-Form load if the offset is known. So, we only query the load/store alignment when we don't know if the offset is a multiple of 4. Reviewed By: jji, Nemanjai Differential Revision: https://reviews.llvm.org/D93099
1 parent 4b38885 commit 477b650

File tree

9 files changed

+44
-42
lines changed

9 files changed

+44
-42
lines changed

llvm/lib/Target/PowerPC/PPCInstr64Bit.td

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1062,7 +1062,7 @@ def LHA8: DForm_1<42, (outs g8rc:$rD), (ins memri:$src),
10621062
def LWA : DSForm_1<58, 2, (outs g8rc:$rD), (ins memrix:$src),
10631063
"lwa $rD, $src", IIC_LdStLWA,
10641064
[(set i64:$rD,
1065-
(aligned4sextloadi32 iaddrX4:$src))]>, isPPC64,
1065+
(DSFormSextLoadi32 iaddrX4:$src))]>, isPPC64,
10661066
PPC970_DGroup_Cracked;
10671067
let Interpretation64Bit = 1, isCodeGenOnly = 1 in
10681068
def LHAX8: XForm_1_memOp<31, 343, (outs g8rc:$rD), (ins memrr:$src),
@@ -1173,7 +1173,7 @@ def LWZUX8 : XForm_1_memOp<31, 55, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
11731173
let PPC970_Unit = 2 in {
11741174
def LD : DSForm_1<58, 0, (outs g8rc:$rD), (ins memrix:$src),
11751175
"ld $rD, $src", IIC_LdStLD,
1176-
[(set i64:$rD, (aligned4load iaddrX4:$src))]>, isPPC64;
1176+
[(set i64:$rD, (DSFormLoad iaddrX4:$src))]>, isPPC64;
11771177
// The following four definitions are selected for small code model only.
11781178
// Otherwise, we need to create two instructions to form a 32-bit offset,
11791179
// so we have a custom matcher for TOC_ENTRY in PPCDAGToDAGIsel::Select().
@@ -1380,7 +1380,7 @@ def STWX8 : XForm_8_memOp<31, 151, (outs), (ins g8rc:$rS, memrr:$dst),
13801380
// Normal 8-byte stores.
13811381
def STD : DSForm_1<62, 0, (outs), (ins g8rc:$rS, memrix:$dst),
13821382
"std $rS, $dst", IIC_LdStSTD,
1383-
[(aligned4store i64:$rS, iaddrX4:$dst)]>, isPPC64;
1383+
[(DSFormStore i64:$rS, iaddrX4:$dst)]>, isPPC64;
13841384
def STDX : XForm_8_memOp<31, 149, (outs), (ins g8rc:$rS, memrr:$dst),
13851385
"stdx $rS, $dst", IIC_LdStSTD,
13861386
[(store i64:$rS, xaddrX4:$dst)]>, isPPC64,
@@ -1447,7 +1447,7 @@ def : Pat<(pre_truncsti16 i64:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
14471447
(STHU8 $rS, iaddroff:$ptroff, $ptrreg)>;
14481448
def : Pat<(pre_truncsti32 i64:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
14491449
(STWU8 $rS, iaddroff:$ptroff, $ptrreg)>;
1450-
def : Pat<(aligned4pre_store i64:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
1450+
def : Pat<(DSFormPreStore i64:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
14511451
(STDU $rS, iaddroff:$ptroff, $ptrreg)>;
14521452

14531453
def : Pat<(pre_truncsti8 i64:$rS, iPTR:$ptrreg, iPTR:$ptroff),
@@ -1591,11 +1591,11 @@ def : Pat<(add i64:$in, (PPChi tblockaddress:$g, 0)),
15911591

15921592
// Patterns to match r+r indexed loads and stores for
15931593
// addresses without at least 4-byte alignment.
1594-
def : Pat<(i64 (unaligned4sextloadi32 xoaddr:$src)),
1594+
def : Pat<(i64 (NonDSFormSextLoadi32 xoaddr:$src)),
15951595
(LWAX xoaddr:$src)>;
1596-
def : Pat<(i64 (unaligned4load xoaddr:$src)),
1596+
def : Pat<(i64 (NonDSFormLoad xoaddr:$src)),
15971597
(LDX xoaddr:$src)>;
1598-
def : Pat<(unaligned4store i64:$rS, xoaddr:$dst),
1598+
def : Pat<(NonDSFormStore i64:$rS, xoaddr:$dst),
15991599
(STDX $rS, xoaddr:$dst)>;
16001600

16011601
// 64-bits atomic loads and stores

llvm/lib/Target/PowerPC/PPCInstrInfo.td

Lines changed: 19 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -495,37 +495,41 @@ def imm64ZExt32 : Operand<i64>, ImmLeaf<i64, [{
495495
return isUInt<32>(Imm);
496496
}]>;
497497

498-
// Some r+i load/store instructions (such as LD, STD, LDU, etc.) that require
498+
// This is a somewhat weaker condition than actually checking for 4-byte
499+
// alignment. It is simply checking that the displacement can be represented
500+
// as an immediate that is a multiple of 4 (i.e. the requirements for DS-Form
501+
// instructions).
502+
// But some r+i load/store instructions (such as LD, STD, LDU, etc.) that require
499503
// restricted memrix (4-aligned) constants are alignment sensitive. If these
500504
// offsets are hidden behind TOC entries than the values of the lower-order
501505
// bits cannot be checked directly. As a result, we need to also incorporate
502506
// an alignment check into the relevant patterns.
503507

504-
def aligned4load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
505-
return cast<LoadSDNode>(N)->getAlignment() >= 4;
508+
def DSFormLoad : PatFrag<(ops node:$ptr), (load node:$ptr), [{
509+
return isOffsetMultipleOf(N, 4) || cast<LoadSDNode>(N)->getAlignment() >= 4;
506510
}]>;
507-
def aligned4store : PatFrag<(ops node:$val, node:$ptr),
511+
def DSFormStore : PatFrag<(ops node:$val, node:$ptr),
508512
(store node:$val, node:$ptr), [{
509-
return cast<StoreSDNode>(N)->getAlignment() >= 4;
513+
return isOffsetMultipleOf(N, 4) || cast<StoreSDNode>(N)->getAlignment() >= 4;
510514
}]>;
511-
def aligned4sextloadi32 : PatFrag<(ops node:$ptr), (sextloadi32 node:$ptr), [{
512-
return cast<LoadSDNode>(N)->getAlignment() >= 4;
515+
def DSFormSextLoadi32 : PatFrag<(ops node:$ptr), (sextloadi32 node:$ptr), [{
516+
return isOffsetMultipleOf(N, 4) || cast<LoadSDNode>(N)->getAlignment() >= 4;
513517
}]>;
514-
def aligned4pre_store : PatFrag<
518+
def DSFormPreStore : PatFrag<
515519
(ops node:$val, node:$base, node:$offset),
516520
(pre_store node:$val, node:$base, node:$offset), [{
517-
return cast<StoreSDNode>(N)->getAlignment() >= 4;
521+
return isOffsetMultipleOf(N, 4) || cast<StoreSDNode>(N)->getAlignment() >= 4;
518522
}]>;
519523

520-
def unaligned4load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
521-
return cast<LoadSDNode>(N)->getAlignment() < 4;
524+
def NonDSFormLoad : PatFrag<(ops node:$ptr), (load node:$ptr), [{
525+
return cast<LoadSDNode>(N)->getAlignment() < 4 && !isOffsetMultipleOf(N, 4);
522526
}]>;
523-
def unaligned4store : PatFrag<(ops node:$val, node:$ptr),
527+
def NonDSFormStore : PatFrag<(ops node:$val, node:$ptr),
524528
(store node:$val, node:$ptr), [{
525-
return cast<StoreSDNode>(N)->getAlignment() < 4;
529+
return cast<StoreSDNode>(N)->getAlignment() < 4 && !isOffsetMultipleOf(N, 4);
526530
}]>;
527-
def unaligned4sextloadi32 : PatFrag<(ops node:$ptr), (sextloadi32 node:$ptr), [{
528-
return cast<LoadSDNode>(N)->getAlignment() < 4;
531+
def NonDSFormSextLoadi32 : PatFrag<(ops node:$ptr), (sextloadi32 node:$ptr), [{
532+
return cast<LoadSDNode>(N)->getAlignment() < 4 && !isOffsetMultipleOf(N, 4);
529533
}]>;
530534

531535
// This is a somewhat weaker condition than actually checking for 16-byte

llvm/test/CodeGen/PowerPC/ldst-align.ll

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,8 @@ define i64 @load(i64* %p) {
66
; CHECK: bb.0.entry:
77
; CHECK: liveins: $x3
88
; CHECK: [[COPY:%[0-9]+]]:g8rc_and_g8rc_nox0 = COPY $x3
9-
; CHECK: [[ADDI8_:%[0-9]+]]:g8rc = nuw ADDI8 [[COPY]], 24
10-
; CHECK: [[LDX:%[0-9]+]]:g8rc = LDX $zero8, killed [[ADDI8_]] :: (load 8 from %ir.arrayidx, align 2)
11-
; CHECK: $x3 = COPY [[LDX]]
9+
; CHECK: [[LD:%[0-9]+]]:g8rc = LD 24, [[COPY]] :: (load 8 from %ir.arrayidx, align 2)
10+
; CHECK: $x3 = COPY [[LD]]
1211
; CHECK: BLR8 implicit $lr8, implicit $rm, implicit $x3
1312
entry:
1413
%arrayidx = getelementptr inbounds i64, i64* %p, i64 3
@@ -21,9 +20,8 @@ define void @store(i64* %p) {
2120
; CHECK: bb.0.entry:
2221
; CHECK: liveins: $x3
2322
; CHECK: [[COPY:%[0-9]+]]:g8rc_and_g8rc_nox0 = COPY $x3
24-
; CHECK: [[ADDI8_:%[0-9]+]]:g8rc = nuw ADDI8 [[COPY]], 16
2523
; CHECK: [[LI8_:%[0-9]+]]:g8rc = LI8 9
26-
; CHECK: STDX killed [[LI8_]], $zero8, killed [[ADDI8_]] :: (store 8 into %ir.arrayidx, align 1)
24+
; CHECK: STD killed [[LI8_]], 16, [[COPY]] :: (store 8 into %ir.arrayidx, align 1)
2725
; CHECK: BLR8 implicit $lr8, implicit $rm
2826
entry:
2927
%arrayidx = getelementptr inbounds i64, i64* %p, i64 2

llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,8 @@ define signext i32 @zeroEqualityTest02(i8* %x, i8* %y) {
3535
define signext i32 @zeroEqualityTest01(i8* %x, i8* %y) {
3636
; CHECK-LABEL: zeroEqualityTest01:
3737
; CHECK: # %bb.0:
38-
; CHECK-NEXT: ldx 5, 0, 3
39-
; CHECK-NEXT: ldx 6, 0, 4
38+
; CHECK-NEXT: ld 5, 0(3)
39+
; CHECK-NEXT: ld 6, 0(4)
4040
; CHECK-NEXT: cmpld 5, 6
4141
; CHECK-NEXT: bne 0, .LBB1_2
4242
; CHECK-NEXT: # %bb.1: # %loadbb1
@@ -125,7 +125,7 @@ define signext i32 @equalityFoldTwoConstants() {
125125
define signext i32 @equalityFoldOneConstant(i8* %X) {
126126
; CHECK-LABEL: equalityFoldOneConstant:
127127
; CHECK: # %bb.0:
128-
; CHECK-NEXT: ldx 4, 0, 3
128+
; CHECK-NEXT: ld 4, 0(3)
129129
; CHECK-NEXT: li 5, 1
130130
; CHECK-NEXT: sldi 5, 5, 32
131131
; CHECK-NEXT: cmpld 4, 5

llvm/test/CodeGen/PowerPC/memcmp-mergeexpand.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@
88
define zeroext i1 @opeq1(
99
; PPC64LE-LABEL: opeq1:
1010
; PPC64LE: # %bb.0: # %"entry+land.rhs.i"
11-
; PPC64LE-NEXT: ldx 3, 0, 3
12-
; PPC64LE-NEXT: ldx 4, 0, 4
11+
; PPC64LE-NEXT: ld 3, 0(3)
12+
; PPC64LE-NEXT: ld 4, 0(4)
1313
; PPC64LE-NEXT: xor 3, 3, 4
1414
; PPC64LE-NEXT: cntlzd 3, 3
1515
; PPC64LE-NEXT: rldicl 3, 3, 58, 63

llvm/test/CodeGen/PowerPC/pr45186.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
define i64 @e(i8* nocapture readonly %f) local_unnamed_addr #0 {
1010
; CHECK-LABEL: e:
1111
; CHECK: # %bb.0: # %entry
12-
; CHECK-NEXT: ldx r3, 0, r3
12+
; CHECK-NEXT: ld r3, 0(r3)
1313
; CHECK-NEXT: blr
1414
entry:
1515
%0 = load i8, i8* %f, align 1

llvm/test/CodeGen/PowerPC/store-combine.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ entry:
8080
define void @store_i64_by_i8(i64 %m, i8* %p) {
8181
; CHECK-PPC64LE-LABEL: store_i64_by_i8:
8282
; CHECK-PPC64LE: # %bb.0: # %entry
83-
; CHECK-PPC64LE-NEXT: stdx 3, 0, 4
83+
; CHECK-PPC64LE-NEXT: std 3, 0(4)
8484
; CHECK-PPC64LE-NEXT: blr
8585
;
8686
; CHECK-PPC64-LABEL: store_i64_by_i8:
@@ -138,7 +138,7 @@ define void @store_i64_by_i8_bswap(i64 %m, i8* %p) {
138138
;
139139
; CHECK-PPC64-LABEL: store_i64_by_i8_bswap:
140140
; CHECK-PPC64: # %bb.0: # %entry
141-
; CHECK-PPC64-NEXT: stdx 3, 0, 4
141+
; CHECK-PPC64-NEXT: std 3, 0(4)
142142
; CHECK-PPC64-NEXT: blr
143143
entry:
144144
%conv = trunc i64 %m to i8
@@ -198,7 +198,7 @@ define void @store_i64_by_i8_bswap_uses(i32 signext %t, i8* %p) {
198198
; CHECK-PPC64-NEXT: slwi 5, 3, 3
199199
; CHECK-PPC64-NEXT: sub 3, 5, 3
200200
; CHECK-PPC64-NEXT: extsw 3, 3
201-
; CHECK-PPC64-NEXT: stdx 3, 0, 4
201+
; CHECK-PPC64-NEXT: std 3, 0(4)
202202
; CHECK-PPC64-NEXT: blr
203203
entry:
204204
%mul = mul nsw i32 %t, 7

llvm/test/CodeGen/PowerPC/unal4-std.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ define void @copy_to_conceal(<8 x i16>* %inp) #0 {
1313
; CHECK-NEXT: ld 4, -8(1)
1414
; CHECK-NEXT: std 4, 8(3)
1515
; CHECK-NEXT: ld 4, -16(1)
16-
; CHECK-NEXT: stdx 4, 0, 3
16+
; CHECK-NEXT: std 4, 0(3)
1717
; CHECK-NEXT: blr
1818
;
1919
; CHECK-VSX-LABEL: copy_to_conceal:

llvm/test/CodeGen/PowerPC/unaligned.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -46,14 +46,14 @@ entry:
4646
define void @foo3(i64* %p, i64* %r) nounwind {
4747
; CHECK-LABEL: foo3:
4848
; CHECK: # %bb.0: # %entry
49-
; CHECK-NEXT: ldx 3, 0, 3
50-
; CHECK-NEXT: stdx 3, 0, 4
49+
; CHECK-NEXT: ld 3, 0(3)
50+
; CHECK-NEXT: std 3, 0(4)
5151
; CHECK-NEXT: blr
5252
;
5353
; CHECK-VSX-LABEL: foo3:
5454
; CHECK-VSX: # %bb.0: # %entry
55-
; CHECK-VSX-NEXT: ldx 3, 0, 3
56-
; CHECK-VSX-NEXT: stdx 3, 0, 4
55+
; CHECK-VSX-NEXT: ld 3, 0(3)
56+
; CHECK-VSX-NEXT: std 3, 0(4)
5757
; CHECK-VSX-NEXT: blr
5858
entry:
5959
%v = load i64, i64* %p, align 1
@@ -118,7 +118,7 @@ define void @foo6(<4 x float>* %p, <4 x float>* %r) nounwind {
118118
; CHECK-NEXT: ld 3, -8(1)
119119
; CHECK-NEXT: std 3, 8(4)
120120
; CHECK-NEXT: ld 3, -16(1)
121-
; CHECK-NEXT: stdx 3, 0, 4
121+
; CHECK-NEXT: std 3, 0(4)
122122
; CHECK-NEXT: blr
123123
;
124124
; CHECK-VSX-LABEL: foo6:

0 commit comments

Comments
 (0)