Skip to content

Commit 23b673e

Browse files
authored
[DAG][AArch64] Handle vscale addressing modes in reassociationCanBreakAddressingModePattern (#89908)
reassociationCanBreakAddressingModePattern tries to prevent bad add reassociations that would break adrressing mode patterns. This adds support for vscale offset addressing modes, making sure we don't break patterns that already exist. It does not optimize _to_ the correct addressing modes yet, but prevents us from optimizating _away_ from them.
1 parent a76518c commit 23b673e

File tree

2 files changed

+58
-38
lines changed

2 files changed

+58
-38
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 40 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1083,7 +1083,44 @@ bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
10831083
// (load/store (add, (add, x, y), offset2)) ->
10841084
// (load/store (add, (add, x, offset2), y)).
10851085

1086-
if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD)
1086+
if (N0.getOpcode() != ISD::ADD)
1087+
return false;
1088+
1089+
// Check for vscale addressing modes.
1090+
// (load/store (add/sub (add x, y), vscale))
1091+
// (load/store (add/sub (add x, y), (lsl vscale, C)))
1092+
// (load/store (add/sub (add x, y), (mul vscale, C)))
1093+
if ((N1.getOpcode() == ISD::VSCALE ||
1094+
((N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::MUL) &&
1095+
N1.getOperand(0).getOpcode() == ISD::VSCALE &&
1096+
isa<ConstantSDNode>(N1.getOperand(1)))) &&
1097+
N1.getValueType().getFixedSizeInBits() <= 64) {
1098+
int64_t ScalableOffset =
1099+
N1.getOpcode() == ISD::VSCALE
1100+
? N1.getConstantOperandVal(0)
1101+
: (N1.getOperand(0).getConstantOperandVal(0) *
1102+
(N1.getOpcode() == ISD::SHL ? (1 << N1.getConstantOperandVal(1))
1103+
: N1.getConstantOperandVal(1)));
1104+
if (Opc == ISD::SUB)
1105+
ScalableOffset = -ScalableOffset;
1106+
if (all_of(N->uses(), [&](SDNode *Node) {
1107+
if (auto *LoadStore = dyn_cast<MemSDNode>(Node);
1108+
LoadStore && LoadStore->getBasePtr().getNode() == N) {
1109+
TargetLoweringBase::AddrMode AM;
1110+
AM.HasBaseReg = true;
1111+
AM.ScalableOffset = ScalableOffset;
1112+
EVT VT = LoadStore->getMemoryVT();
1113+
unsigned AS = LoadStore->getAddressSpace();
1114+
Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1115+
return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy,
1116+
AS);
1117+
}
1118+
return false;
1119+
}))
1120+
return true;
1121+
}
1122+
1123+
if (Opc != ISD::ADD)
10871124
return false;
10881125

10891126
auto *C2 = dyn_cast<ConstantSDNode>(N1);
@@ -3971,7 +4008,8 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
39714008

39724009
// Hoist one-use addition by non-opaque constant:
39734010
// (x + C) - y -> (x - y) + C
3974-
if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
4011+
if (!reassociationCanBreakAddressingModePattern(ISD::SUB, DL, N, N0, N1) &&
4012+
N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
39754013
isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
39764014
SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
39774015
return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));

llvm/test/CodeGen/AArch64/sve-reassocadd.ll

Lines changed: 18 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,9 @@ entry:
2222
define <vscale x 16 x i8> @i8_4s_1v(ptr %b) {
2323
; CHECK-LABEL: i8_4s_1v:
2424
; CHECK: // %bb.0: // %entry
25-
; CHECK-NEXT: rdvl x8, #1
2625
; CHECK-NEXT: ptrue p0.b
27-
; CHECK-NEXT: mov w9, #4 // =0x4
28-
; CHECK-NEXT: add x8, x0, x8
29-
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x8, x9]
26+
; CHECK-NEXT: add x8, x0, #4
27+
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x8, #1, mul vl]
3028
; CHECK-NEXT: ret
3129
entry:
3230
%add.ptr = getelementptr inbounds i8, ptr %b, i64 4
@@ -58,11 +56,9 @@ entry:
5856
define <vscale x 8 x i16> @i16_8s_1v(ptr %b) {
5957
; CHECK-LABEL: i16_8s_1v:
6058
; CHECK: // %bb.0: // %entry
61-
; CHECK-NEXT: rdvl x8, #1
6259
; CHECK-NEXT: ptrue p0.h
63-
; CHECK-NEXT: mov x9, #4 // =0x4
64-
; CHECK-NEXT: add x8, x0, x8
65-
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8, x9, lsl #1]
60+
; CHECK-NEXT: add x8, x0, #8
61+
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8, #1, mul vl]
6662
; CHECK-NEXT: ret
6763
entry:
6864
%add.ptr = getelementptr inbounds i8, ptr %b, i64 8
@@ -94,11 +90,9 @@ entry:
9490
define <vscale x 8 x i16> @i16_8s_2v(ptr %b) {
9591
; CHECK-LABEL: i16_8s_2v:
9692
; CHECK: // %bb.0: // %entry
97-
; CHECK-NEXT: rdvl x8, #2
9893
; CHECK-NEXT: ptrue p0.h
99-
; CHECK-NEXT: mov x9, #4 // =0x4
100-
; CHECK-NEXT: add x8, x0, x8
101-
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8, x9, lsl #1]
94+
; CHECK-NEXT: add x8, x0, #8
95+
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8, #2, mul vl]
10296
; CHECK-NEXT: ret
10397
entry:
10498
%add.ptr = getelementptr inbounds i8, ptr %b, i64 8
@@ -130,11 +124,9 @@ entry:
130124
define <vscale x 4 x i32> @i32_16s_2v(ptr %b) {
131125
; CHECK-LABEL: i32_16s_2v:
132126
; CHECK: // %bb.0: // %entry
133-
; CHECK-NEXT: rdvl x8, #1
134127
; CHECK-NEXT: ptrue p0.s
135-
; CHECK-NEXT: mov x9, #4 // =0x4
136-
; CHECK-NEXT: add x8, x0, x8
137-
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8, x9, lsl #2]
128+
; CHECK-NEXT: add x8, x0, #16
129+
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8, #1, mul vl]
138130
; CHECK-NEXT: ret
139131
entry:
140132
%add.ptr = getelementptr inbounds i8, ptr %b, i64 16
@@ -166,11 +158,9 @@ entry:
166158
define <vscale x 2 x i64> @i64_32s_2v(ptr %b) {
167159
; CHECK-LABEL: i64_32s_2v:
168160
; CHECK: // %bb.0: // %entry
169-
; CHECK-NEXT: rdvl x8, #1
170161
; CHECK-NEXT: ptrue p0.d
171-
; CHECK-NEXT: mov x9, #4 // =0x4
172-
; CHECK-NEXT: add x8, x0, x8
173-
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, x9, lsl #3]
162+
; CHECK-NEXT: add x8, x0, #32
163+
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, #1, mul vl]
174164
; CHECK-NEXT: ret
175165
entry:
176166
%add.ptr = getelementptr inbounds i8, ptr %b, i64 32
@@ -203,11 +193,9 @@ entry:
203193
define <vscale x 16 x i8> @i8_4s_m2v(ptr %b) {
204194
; CHECK-LABEL: i8_4s_m2v:
205195
; CHECK: // %bb.0: // %entry
206-
; CHECK-NEXT: cnth x8, all, mul #4
207196
; CHECK-NEXT: ptrue p0.b
208-
; CHECK-NEXT: mov w9, #4 // =0x4
209-
; CHECK-NEXT: sub x8, x0, x8
210-
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x8, x9]
197+
; CHECK-NEXT: add x8, x0, #4
198+
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x8, #-2, mul vl]
211199
; CHECK-NEXT: ret
212200
entry:
213201
%add.ptr = getelementptr inbounds i8, ptr %b, i64 4
@@ -239,11 +227,9 @@ entry:
239227
define <vscale x 8 x i16> @i16_8s_m2v(ptr %b) {
240228
; CHECK-LABEL: i16_8s_m2v:
241229
; CHECK: // %bb.0: // %entry
242-
; CHECK-NEXT: cnth x8, all, mul #4
243230
; CHECK-NEXT: ptrue p0.h
244-
; CHECK-NEXT: mov x9, #4 // =0x4
245-
; CHECK-NEXT: sub x8, x0, x8
246-
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8, x9, lsl #1]
231+
; CHECK-NEXT: add x8, x0, #8
232+
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8, #-2, mul vl]
247233
; CHECK-NEXT: ret
248234
entry:
249235
%add.ptr = getelementptr inbounds i8, ptr %b, i64 8
@@ -275,11 +261,9 @@ entry:
275261
define <vscale x 4 x i32> @i32_16s_m2v(ptr %b) {
276262
; CHECK-LABEL: i32_16s_m2v:
277263
; CHECK: // %bb.0: // %entry
278-
; CHECK-NEXT: cnth x8, all, mul #4
279264
; CHECK-NEXT: ptrue p0.s
280-
; CHECK-NEXT: mov x9, #4 // =0x4
281-
; CHECK-NEXT: sub x8, x0, x8
282-
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8, x9, lsl #2]
265+
; CHECK-NEXT: add x8, x0, #16
266+
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8, #-2, mul vl]
283267
; CHECK-NEXT: ret
284268
entry:
285269
%add.ptr = getelementptr inbounds i8, ptr %b, i64 16
@@ -311,11 +295,9 @@ entry:
311295
define <vscale x 2 x i64> @i64_32s_m2v(ptr %b) {
312296
; CHECK-LABEL: i64_32s_m2v:
313297
; CHECK: // %bb.0: // %entry
314-
; CHECK-NEXT: cnth x8, all, mul #4
315298
; CHECK-NEXT: ptrue p0.d
316-
; CHECK-NEXT: mov x9, #4 // =0x4
317-
; CHECK-NEXT: sub x8, x0, x8
318-
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, x9, lsl #3]
299+
; CHECK-NEXT: add x8, x0, #32
300+
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, #-2, mul vl]
319301
; CHECK-NEXT: ret
320302
entry:
321303
%add.ptr = getelementptr inbounds i8, ptr %b, i64 32

0 commit comments

Comments
 (0)