Skip to content

Commit ad140d3

Browse files
committed
[DAG][AArch64] Handle vscale addressing modes in reassociationCanBreakAddressingModePattern.
reassociationCanBreakAddressingModePattern tries to prevent bad add reassociations that would break adrressing mode patterns. This adds support for vscale offset addressing modes, making sure we don't break patterns that already exist. It does not optimize _to_ the correct addressing modes yet, but prevents us from optimizating _away_ from them.
1 parent 333aad7 commit ad140d3

File tree

2 files changed

+41
-20
lines changed

2 files changed

+41
-20
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1085,6 +1085,37 @@ bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
10851085
if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD)
10861086
return false;
10871087

1088+
// Check for vscale addressing modes.
1089+
// (load/store (add (add x, y), vscale))
1090+
// (load/store (add (add x, y), (lsl vscale, C)))
1091+
// (load/store (add (add x, y), (mul vscale, C)))
1092+
if ((N1.getOpcode() == ISD::VSCALE ||
1093+
((N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::MUL) &&
1094+
N1.getOperand(0).getOpcode() == ISD::VSCALE &&
1095+
isa<ConstantSDNode>(N1.getOperand(1)))) &&
1096+
N1.getValueSizeInBits() <= 64) {
1097+
unsigned ScalableOffset =
1098+
N1.getOpcode() == ISD::VSCALE
1099+
? N1.getConstantOperandVal(0)
1100+
: (N1.getOperand(0).getConstantOperandVal(0) *
1101+
(N1.getOpcode() == ISD::SHL ? (1 << N1.getConstantOperandVal(1))
1102+
: N1.getConstantOperandVal(1)));
1103+
if (all_of(N->uses(), [&](SDNode *Node) {
1104+
if (auto *LoadStore = dyn_cast<MemSDNode>(Node)) {
1105+
TargetLoweringBase::AddrMode AM;
1106+
AM.HasBaseReg = true;
1107+
AM.ScalableOffset = ScalableOffset;
1108+
EVT VT = LoadStore->getMemoryVT();
1109+
unsigned AS = LoadStore->getAddressSpace();
1110+
Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1111+
return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy,
1112+
AS);
1113+
}
1114+
return false;
1115+
}))
1116+
return true;
1117+
}
1118+
10881119
auto *C2 = dyn_cast<ConstantSDNode>(N1);
10891120
if (!C2)
10901121
return false;

llvm/test/CodeGen/AArch64/sve-reassocadd.ll

Lines changed: 10 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,9 @@ entry:
2222
define <vscale x 16 x i8> @i8_4s_1v(ptr %b) {
2323
; CHECK-LABEL: i8_4s_1v:
2424
; CHECK: // %bb.0: // %entry
25-
; CHECK-NEXT: rdvl x8, #1
2625
; CHECK-NEXT: ptrue p0.b
27-
; CHECK-NEXT: mov w9, #4 // =0x4
28-
; CHECK-NEXT: add x8, x0, x8
29-
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x8, x9]
26+
; CHECK-NEXT: add x8, x0, #4
27+
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x8, #1, mul vl]
3028
; CHECK-NEXT: ret
3129
entry:
3230
%add.ptr = getelementptr inbounds i8, ptr %b, i64 4
@@ -58,11 +56,9 @@ entry:
5856
define <vscale x 8 x i16> @i16_8s_1v(ptr %b) {
5957
; CHECK-LABEL: i16_8s_1v:
6058
; CHECK: // %bb.0: // %entry
61-
; CHECK-NEXT: rdvl x8, #1
6259
; CHECK-NEXT: ptrue p0.h
63-
; CHECK-NEXT: mov x9, #4 // =0x4
64-
; CHECK-NEXT: add x8, x0, x8
65-
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8, x9, lsl #1]
60+
; CHECK-NEXT: add x8, x0, #8
61+
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8, #1, mul vl]
6662
; CHECK-NEXT: ret
6763
entry:
6864
%add.ptr = getelementptr inbounds i8, ptr %b, i64 8
@@ -94,11 +90,9 @@ entry:
9490
define <vscale x 8 x i16> @i16_8s_2v(ptr %b) {
9591
; CHECK-LABEL: i16_8s_2v:
9692
; CHECK: // %bb.0: // %entry
97-
; CHECK-NEXT: rdvl x8, #2
9893
; CHECK-NEXT: ptrue p0.h
99-
; CHECK-NEXT: mov x9, #4 // =0x4
100-
; CHECK-NEXT: add x8, x0, x8
101-
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8, x9, lsl #1]
94+
; CHECK-NEXT: add x8, x0, #8
95+
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8, #2, mul vl]
10296
; CHECK-NEXT: ret
10397
entry:
10498
%add.ptr = getelementptr inbounds i8, ptr %b, i64 8
@@ -130,11 +124,9 @@ entry:
130124
define <vscale x 4 x i32> @i32_16s_2v(ptr %b) {
131125
; CHECK-LABEL: i32_16s_2v:
132126
; CHECK: // %bb.0: // %entry
133-
; CHECK-NEXT: rdvl x8, #1
134127
; CHECK-NEXT: ptrue p0.s
135-
; CHECK-NEXT: mov x9, #4 // =0x4
136-
; CHECK-NEXT: add x8, x0, x8
137-
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8, x9, lsl #2]
128+
; CHECK-NEXT: add x8, x0, #16
129+
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8, #1, mul vl]
138130
; CHECK-NEXT: ret
139131
entry:
140132
%add.ptr = getelementptr inbounds i8, ptr %b, i64 16
@@ -166,11 +158,9 @@ entry:
166158
define <vscale x 2 x i64> @i64_32s_2v(ptr %b) {
167159
; CHECK-LABEL: i64_32s_2v:
168160
; CHECK: // %bb.0: // %entry
169-
; CHECK-NEXT: rdvl x8, #1
170161
; CHECK-NEXT: ptrue p0.d
171-
; CHECK-NEXT: mov x9, #4 // =0x4
172-
; CHECK-NEXT: add x8, x0, x8
173-
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, x9, lsl #3]
162+
; CHECK-NEXT: add x8, x0, #32
163+
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, #1, mul vl]
174164
; CHECK-NEXT: ret
175165
entry:
176166
%add.ptr = getelementptr inbounds i8, ptr %b, i64 32

0 commit comments

Comments
 (0)