Skip to content

Commit 215e61c

Browse files
authored
[AMDGPU][SDAG] Add ISD::PTRADD DAG combines (#142739)
This patch focuses on generic DAG combines, plus an AMDGPU-target-specific one that is closely connected. The generic DAG combine is based on a part of PR #105669 by rgwott, which was adapted from work by jrtc27, arichardson, davidchisnall in the CHERI/Morello LLVM tree. I added some parts and removed several disjuncts from the reassociation condition: - `isNullConstant(X)`, since there are address spaces where 0 is a perfectly normal value that shouldn't be treated specially, - `(YIsConstant && ZOneUse)` and `(N0OneUse && ZOneUse && !ZIsConstant)`, since they cause regressions in AMDGPU. For SWDEV-516125.
1 parent 28aa871 commit 215e61c

File tree

6 files changed

+286
-196
lines changed

6 files changed

+286
-196
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 84 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -421,6 +421,7 @@ namespace {
421421
SDValue visitADDLike(SDNode *N);
422422
SDValue visitADDLikeCommutative(SDValue N0, SDValue N1,
423423
SDNode *LocReference);
424+
SDValue visitPTRADD(SDNode *N);
424425
SDValue visitSUB(SDNode *N);
425426
SDValue visitADDSAT(SDNode *N);
426427
SDValue visitSUBSAT(SDNode *N);
@@ -1140,7 +1141,7 @@ bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
11401141
return true;
11411142
}
11421143

1143-
if (Opc != ISD::ADD)
1144+
if (Opc != ISD::ADD && Opc != ISD::PTRADD)
11441145
return false;
11451146

11461147
auto *C2 = dyn_cast<ConstantSDNode>(N1);
@@ -1894,6 +1895,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
18941895
case ISD::TokenFactor: return visitTokenFactor(N);
18951896
case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
18961897
case ISD::ADD: return visitADD(N);
1898+
case ISD::PTRADD: return visitPTRADD(N);
18971899
case ISD::SUB: return visitSUB(N);
18981900
case ISD::SADDSAT:
18991901
case ISD::UADDSAT: return visitADDSAT(N);
@@ -2664,6 +2666,86 @@ SDValue DAGCombiner::foldSubToAvg(SDNode *N, const SDLoc &DL) {
26642666
return SDValue();
26652667
}
26662668

2669+
/// Try to fold a pointer arithmetic node.
2670+
/// This needs to be done separately from normal addition, because pointer
2671+
/// addition is not commutative.
2672+
SDValue DAGCombiner::visitPTRADD(SDNode *N) {
2673+
SDValue N0 = N->getOperand(0);
2674+
SDValue N1 = N->getOperand(1);
2675+
EVT PtrVT = N0.getValueType();
2676+
EVT IntVT = N1.getValueType();
2677+
SDLoc DL(N);
2678+
2679+
// This is already ensured by an assert in SelectionDAG::getNode(). Several
2680+
// combines here depend on this assumption.
2681+
assert(PtrVT == IntVT &&
2682+
"PTRADD with different operand types is not supported");
2683+
2684+
// fold (ptradd x, 0) -> x
2685+
if (isNullConstant(N1))
2686+
return N0;
2687+
2688+
// fold (ptradd 0, x) -> x
2689+
if (PtrVT == IntVT && isNullConstant(N0))
2690+
return N1;
2691+
2692+
if (N0.getOpcode() != ISD::PTRADD ||
2693+
reassociationCanBreakAddressingModePattern(ISD::PTRADD, DL, N, N0, N1))
2694+
return SDValue();
2695+
2696+
SDValue X = N0.getOperand(0);
2697+
SDValue Y = N0.getOperand(1);
2698+
SDValue Z = N1;
2699+
bool N0OneUse = N0.hasOneUse();
2700+
bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
2701+
bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
2702+
2703+
// (ptradd (ptradd x, y), z) -> (ptradd x, (add y, z)) if:
2704+
// * y is a constant and (ptradd x, y) has one use; or
2705+
// * y and z are both constants.
2706+
if ((YIsConstant && N0OneUse) || (YIsConstant && ZIsConstant)) {
2707+
// If both additions in the original were NUW, the new ones are as well.
2708+
SDNodeFlags Flags =
2709+
(N->getFlags() & N0->getFlags()) & SDNodeFlags::NoUnsignedWrap;
2710+
SDValue Add = DAG.getNode(ISD::ADD, DL, IntVT, {Y, Z}, Flags);
2711+
AddToWorklist(Add.getNode());
2712+
return DAG.getMemBasePlusOffset(X, Add, DL, Flags);
2713+
}
2714+
2715+
// TODO: There is another possible fold here that was proven useful.
2716+
// It would be this:
2717+
//
2718+
// (ptradd (ptradd x, y), z) -> (ptradd (ptradd x, z), y) if:
2719+
// * (ptradd x, y) has one use; and
2720+
// * y is a constant; and
2721+
// * z is not a constant.
2722+
//
2723+
// In some cases, specifically in AArch64's FEAT_CPA, it exposes the
2724+
// opportunity to select more complex instructions such as SUBPT and
2725+
// MSUBPT. However, a hypothetical corner case has been found that we could
2726+
// not avoid. Consider this (pseudo-POSIX C):
2727+
//
2728+
// char *foo(char *x, int z) {return (x + LARGE_CONSTANT) + z;}
2729+
// char *p = mmap(LARGE_CONSTANT);
2730+
// char *q = foo(p, -LARGE_CONSTANT);
2731+
//
2732+
// Then x + LARGE_CONSTANT is one-past-the-end, so valid, and a
2733+
// further + z takes it back to the start of the mapping, so valid,
2734+
// regardless of the address mmap gave back. However, if mmap gives you an
2735+
// address < LARGE_CONSTANT (ignoring high bits), x - LARGE_CONSTANT will
2736+
// borrow from the high bits (with the subsequent + z carrying back into
2737+
// the high bits to give you a well-defined pointer) and thus trip
2738+
// FEAT_CPA's pointer corruption checks.
2739+
//
2740+
// We leave this fold as an opportunity for future work, addressing the
2741+
// corner case for FEAT_CPA, as well as reconciling the solution with the
2742+
// more general application of pointer arithmetic in other future targets.
2743+
// For now each architecture that wants this fold must implement it in the
2744+
// target-specific code (see e.g. SITargetLowering::performPtrAddCombine)
2745+
2746+
return SDValue();
2747+
}
2748+
26672749
/// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
26682750
/// a shift and add with a different constant.
26692751
static SDValue foldAddSubOfSignBit(SDNode *N, const SDLoc &DL,
@@ -15095,6 +15177,7 @@ SDValue DAGCombiner::visitAssertAlign(SDNode *N) {
1509515177
default:
1509615178
break;
1509715179
case ISD::ADD:
15180+
case ISD::PTRADD:
1509815181
case ISD::SUB: {
1509915182
unsigned AlignShift = Log2(AL);
1510015183
SDValue LHS = N0.getOperand(0);

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -945,6 +945,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
945945
}
946946

947947
setTargetDAGCombine({ISD::ADD,
948+
ISD::PTRADD,
948949
ISD::UADDO_CARRY,
949950
ISD::SUB,
950951
ISD::USUBO_CARRY,
@@ -15084,6 +15085,49 @@ SDValue SITargetLowering::performAddCombine(SDNode *N,
1508415085
return SDValue();
1508515086
}
1508615087

15088+
SDValue SITargetLowering::performPtrAddCombine(SDNode *N,
15089+
DAGCombinerInfo &DCI) const {
15090+
SelectionDAG &DAG = DCI.DAG;
15091+
SDLoc DL(N);
15092+
SDValue N0 = N->getOperand(0);
15093+
SDValue N1 = N->getOperand(1);
15094+
15095+
if (N1.getOpcode() == ISD::ADD) {
15096+
// (ptradd x, (add y, z)) -> (ptradd (ptradd x, y), z) if z is a constant,
15097+
// y is not, and (add y, z) is used only once.
15098+
// (ptradd x, (add y, z)) -> (ptradd (ptradd x, z), y) if y is a constant,
15099+
// z is not, and (add y, z) is used only once.
15100+
// The goal is to move constant offsets to the outermost ptradd, to create
15101+
// more opportunities to fold offsets into memory instructions.
15102+
// Together with the generic combines in DAGCombiner.cpp, this also
15103+
// implements (ptradd (ptradd x, y), z) -> (ptradd (ptradd x, z), y)).
15104+
//
15105+
// This transform is here instead of in the general DAGCombiner as it can
15106+
// turn in-bounds pointer arithmetic out-of-bounds, which is problematic for
15107+
// AArch64's CPA.
15108+
SDValue X = N0;
15109+
SDValue Y = N1.getOperand(0);
15110+
SDValue Z = N1.getOperand(1);
15111+
if (N1.hasOneUse()) {
15112+
bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
15113+
bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
15114+
if (ZIsConstant != YIsConstant) {
15115+
// If both additions in the original were NUW, the new ones are as well.
15116+
SDNodeFlags Flags =
15117+
(N->getFlags() & N1->getFlags()) & SDNodeFlags::NoUnsignedWrap;
15118+
if (YIsConstant)
15119+
std::swap(Y, Z);
15120+
15121+
SDValue Inner = DAG.getMemBasePlusOffset(X, Y, DL, Flags);
15122+
DCI.AddToWorklist(Inner.getNode());
15123+
return DAG.getMemBasePlusOffset(Inner, Z, DL, Flags);
15124+
}
15125+
}
15126+
}
15127+
15128+
return SDValue();
15129+
}
15130+
1508715131
SDValue SITargetLowering::performSubCombine(SDNode *N,
1508815132
DAGCombinerInfo &DCI) const {
1508915133
SelectionDAG &DAG = DCI.DAG;
@@ -15622,6 +15666,8 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
1562215666
switch (N->getOpcode()) {
1562315667
case ISD::ADD:
1562415668
return performAddCombine(N, DCI);
15669+
case ISD::PTRADD:
15670+
return performPtrAddCombine(N, DCI);
1562515671
case ISD::SUB:
1562615672
return performSubCombine(N, DCI);
1562715673
case ISD::UADDO_CARRY:

llvm/lib/Target/AMDGPU/SIISelLowering.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,7 @@ class SITargetLowering final : public AMDGPUTargetLowering {
220220
DAGCombinerInfo &DCI) const;
221221

222222
SDValue performAddCombine(SDNode *N, DAGCombinerInfo &DCI) const;
223+
SDValue performPtrAddCombine(SDNode *N, DAGCombinerInfo &DCI) const;
223224
SDValue performAddCarrySubCarryCombine(SDNode *N, DAGCombinerInfo &DCI) const;
224225
SDValue performSubCombine(SDNode *N, DAGCombinerInfo &DCI) const;
225226
SDValue performFAddCombine(SDNode *N, DAGCombinerInfo &DCI) const;

llvm/test/CodeGen/AArch64/cpa-selectiondag.ll

Lines changed: 46 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -119,23 +119,17 @@ define void @msubpt1(i32 %index, i32 %elem) {
119119
; CHECK-CPA-O0: // %bb.0: // %entry
120120
; CHECK-CPA-O0-NEXT: // implicit-def: $x8
121121
; CHECK-CPA-O0-NEXT: mov w8, w0
122-
; CHECK-CPA-O0-NEXT: sxtw x9, w8
123-
; CHECK-CPA-O0-NEXT: mov x8, xzr
124-
; CHECK-CPA-O0-NEXT: subs x8, x8, x9
125-
; CHECK-CPA-O0-NEXT: lsl x8, x8, #1
126-
; CHECK-CPA-O0-NEXT: subs x10, x8, x9
122+
; CHECK-CPA-O0-NEXT: sxtw x8, w8
123+
; CHECK-CPA-O0-NEXT: mov w9, #48 // =0x30
124+
; CHECK-CPA-O0-NEXT: // kill: def $x9 killed $w9
125+
; CHECK-CPA-O0-NEXT: mneg x8, x8, x9
126+
; CHECK-CPA-O0-NEXT: add x8, x8, #288
127127
; CHECK-CPA-O0-NEXT: adrp x9, array2
128128
; CHECK-CPA-O0-NEXT: add x9, x9, :lo12:array2
129-
; CHECK-CPA-O0-NEXT: mov w8, #288 // =0x120
130-
; CHECK-CPA-O0-NEXT: // kill: def $x8 killed $w8
131129
; CHECK-CPA-O0-NEXT: addpt x8, x9, x8
132-
; CHECK-CPA-O0-NEXT: addpt x8, x8, x10, lsl #4
133-
; CHECK-CPA-O0-NEXT: mov w10, #96 // =0x60
134-
; CHECK-CPA-O0-NEXT: // kill: def $x10 killed $w10
135-
; CHECK-CPA-O0-NEXT: addpt x10, x9, x10
136-
; CHECK-CPA-O0-NEXT: ldr q1, [x10, #16]
137-
; CHECK-CPA-O0-NEXT: ldr q2, [x10, #32]
138130
; CHECK-CPA-O0-NEXT: ldr q0, [x9, #96]
131+
; CHECK-CPA-O0-NEXT: ldr q1, [x9, #112]
132+
; CHECK-CPA-O0-NEXT: ldr q2, [x9, #128]
139133
; CHECK-CPA-O0-NEXT: str q2, [x8, #32]
140134
; CHECK-CPA-O0-NEXT: str q1, [x8, #16]
141135
; CHECK-CPA-O0-NEXT: str q0, [x8]
@@ -144,21 +138,17 @@ define void @msubpt1(i32 %index, i32 %elem) {
144138
; CHECK-CPA-O3-LABEL: msubpt1:
145139
; CHECK-CPA-O3: // %bb.0: // %entry
146140
; CHECK-CPA-O3-NEXT: // kill: def $w0 killed $w0 def $x0
147-
; CHECK-CPA-O3-NEXT: sxtw x9, w0
148-
; CHECK-CPA-O3-NEXT: adrp x8, array2
149-
; CHECK-CPA-O3-NEXT: add x8, x8, :lo12:array2
150-
; CHECK-CPA-O3-NEXT: mov w11, #96 // =0x60
151-
; CHECK-CPA-O3-NEXT: mov w12, #288 // =0x120
152-
; CHECK-CPA-O3-NEXT: ldr q2, [x8, #96]
153-
; CHECK-CPA-O3-NEXT: neg x10, x9
154-
; CHECK-CPA-O3-NEXT: addpt x11, x8, x11
155-
; CHECK-CPA-O3-NEXT: lsl x10, x10, #1
156-
; CHECK-CPA-O3-NEXT: ldp q1, q0, [x11, #16]
157-
; CHECK-CPA-O3-NEXT: sub x9, x10, x9
158-
; CHECK-CPA-O3-NEXT: addpt x10, x8, x12
159-
; CHECK-CPA-O3-NEXT: addpt x9, x10, x9, lsl #4
160-
; CHECK-CPA-O3-NEXT: stp q1, q0, [x9, #16]
161-
; CHECK-CPA-O3-NEXT: str q2, [x9]
141+
; CHECK-CPA-O3-NEXT: sxtw x8, w0
142+
; CHECK-CPA-O3-NEXT: mov w9, #48 // =0x30
143+
; CHECK-CPA-O3-NEXT: mneg x8, x8, x9
144+
; CHECK-CPA-O3-NEXT: adrp x9, array2
145+
; CHECK-CPA-O3-NEXT: add x9, x9, :lo12:array2
146+
; CHECK-CPA-O3-NEXT: ldp q1, q0, [x9, #112]
147+
; CHECK-CPA-O3-NEXT: ldr q2, [x9, #96]
148+
; CHECK-CPA-O3-NEXT: add x8, x8, #288
149+
; CHECK-CPA-O3-NEXT: addpt x8, x9, x8
150+
; CHECK-CPA-O3-NEXT: stp q1, q0, [x8, #16]
151+
; CHECK-CPA-O3-NEXT: str q2, [x8]
162152
; CHECK-CPA-O3-NEXT: ret
163153
;
164154
; CHECK-NOCPA-O0-LABEL: msubpt1:
@@ -205,29 +195,29 @@ entry:
205195
define void @subpt1(i32 %index, i32 %elem) {
206196
; CHECK-CPA-O0-LABEL: subpt1:
207197
; CHECK-CPA-O0: // %bb.0: // %entry
208-
; CHECK-CPA-O0-NEXT: adrp x9, array
209-
; CHECK-CPA-O0-NEXT: add x9, x9, :lo12:array
198+
; CHECK-CPA-O0-NEXT: // implicit-def: $x8
199+
; CHECK-CPA-O0-NEXT: mov w8, w0
200+
; CHECK-CPA-O0-NEXT: sxtw x9, w8
210201
; CHECK-CPA-O0-NEXT: mov w8, #96 // =0x60
211202
; CHECK-CPA-O0-NEXT: // kill: def $x8 killed $w8
203+
; CHECK-CPA-O0-NEXT: subs x8, x8, x9, lsl #8
204+
; CHECK-CPA-O0-NEXT: adrp x9, array
205+
; CHECK-CPA-O0-NEXT: add x9, x9, :lo12:array
212206
; CHECK-CPA-O0-NEXT: addpt x8, x9, x8
213-
; CHECK-CPA-O0-NEXT: // implicit-def: $x10
214-
; CHECK-CPA-O0-NEXT: mov w10, w0
215-
; CHECK-CPA-O0-NEXT: sbfiz x10, x10, #8, #32
216-
; CHECK-CPA-O0-NEXT: subpt x8, x8, x10
217207
; CHECK-CPA-O0-NEXT: ldr q0, [x9, #32]
218208
; CHECK-CPA-O0-NEXT: str q0, [x8]
219209
; CHECK-CPA-O0-NEXT: ret
220210
;
221211
; CHECK-CPA-O3-LABEL: subpt1:
222212
; CHECK-CPA-O3: // %bb.0: // %entry
223213
; CHECK-CPA-O3-NEXT: // kill: def $w0 killed $w0 def $x0
224-
; CHECK-CPA-O3-NEXT: adrp x8, array
225-
; CHECK-CPA-O3-NEXT: add x8, x8, :lo12:array
214+
; CHECK-CPA-O3-NEXT: sxtw x8, w0
226215
; CHECK-CPA-O3-NEXT: mov w9, #96 // =0x60
227-
; CHECK-CPA-O3-NEXT: sbfiz x10, x0, #8, #32
228-
; CHECK-CPA-O3-NEXT: addpt x9, x8, x9
229-
; CHECK-CPA-O3-NEXT: ldr q0, [x8, #32]
230-
; CHECK-CPA-O3-NEXT: subpt x8, x9, x10
216+
; CHECK-CPA-O3-NEXT: sub x8, x9, x8, lsl #8
217+
; CHECK-CPA-O3-NEXT: adrp x9, array
218+
; CHECK-CPA-O3-NEXT: add x9, x9, :lo12:array
219+
; CHECK-CPA-O3-NEXT: ldr q0, [x9, #32]
220+
; CHECK-CPA-O3-NEXT: addpt x8, x9, x8
231221
; CHECK-CPA-O3-NEXT: str q0, [x8]
232222
; CHECK-CPA-O3-NEXT: ret
233223
;
@@ -264,28 +254,24 @@ entry:
264254
define void @subpt2(i32 %index, i32 %elem) {
265255
; CHECK-CPA-O0-LABEL: subpt2:
266256
; CHECK-CPA-O0: // %bb.0: // %entry
267-
; CHECK-CPA-O0-NEXT: mov x8, xzr
268-
; CHECK-CPA-O0-NEXT: subs x10, x8, w0, sxtw
269-
; CHECK-CPA-O0-NEXT: adrp x9, array
270-
; CHECK-CPA-O0-NEXT: add x9, x9, :lo12:array
271257
; CHECK-CPA-O0-NEXT: mov w8, #96 // =0x60
272258
; CHECK-CPA-O0-NEXT: // kill: def $x8 killed $w8
259+
; CHECK-CPA-O0-NEXT: subs x8, x8, w0, sxtw #4
260+
; CHECK-CPA-O0-NEXT: adrp x9, array
261+
; CHECK-CPA-O0-NEXT: add x9, x9, :lo12:array
273262
; CHECK-CPA-O0-NEXT: addpt x8, x9, x8
274-
; CHECK-CPA-O0-NEXT: addpt x8, x8, x10, lsl #4
275263
; CHECK-CPA-O0-NEXT: ldr q0, [x9, #32]
276264
; CHECK-CPA-O0-NEXT: str q0, [x8]
277265
; CHECK-CPA-O0-NEXT: ret
278266
;
279267
; CHECK-CPA-O3-LABEL: subpt2:
280268
; CHECK-CPA-O3: // %bb.0: // %entry
281-
; CHECK-CPA-O3-NEXT: mov x8, xzr
282-
; CHECK-CPA-O3-NEXT: mov w9, #96 // =0x60
283-
; CHECK-CPA-O3-NEXT: adrp x10, array
284-
; CHECK-CPA-O3-NEXT: add x10, x10, :lo12:array
285-
; CHECK-CPA-O3-NEXT: sub x8, x8, w0, sxtw
286-
; CHECK-CPA-O3-NEXT: addpt x9, x10, x9
287-
; CHECK-CPA-O3-NEXT: ldr q0, [x10, #32]
288-
; CHECK-CPA-O3-NEXT: addpt x8, x9, x8, lsl #4
269+
; CHECK-CPA-O3-NEXT: mov w8, #96 // =0x60
270+
; CHECK-CPA-O3-NEXT: adrp x9, array
271+
; CHECK-CPA-O3-NEXT: add x9, x9, :lo12:array
272+
; CHECK-CPA-O3-NEXT: sub x8, x8, w0, sxtw #4
273+
; CHECK-CPA-O3-NEXT: ldr q0, [x9, #32]
274+
; CHECK-CPA-O3-NEXT: addpt x8, x9, x8
289275
; CHECK-CPA-O3-NEXT: str q0, [x8]
290276
; CHECK-CPA-O3-NEXT: ret
291277
;
@@ -670,14 +656,13 @@ define hidden void @multidim() {
670656
; CHECK-CPA-O0-NEXT: .cfi_offset w30, -16
671657
; CHECK-CPA-O0-NEXT: adrp x8, b
672658
; CHECK-CPA-O0-NEXT: ldrh w9, [x8, :lo12:b]
659+
; CHECK-CPA-O0-NEXT: // implicit-def: $x8
673660
; CHECK-CPA-O0-NEXT: mov w8, w9
674-
; CHECK-CPA-O0-NEXT: mov w10, w8
661+
; CHECK-CPA-O0-NEXT: ubfiz x8, x8, #1, #32
662+
; CHECK-CPA-O0-NEXT: add x10, x8, #2
675663
; CHECK-CPA-O0-NEXT: adrp x8, a
676664
; CHECK-CPA-O0-NEXT: add x8, x8, :lo12:a
677-
; CHECK-CPA-O0-NEXT: mov w11, #2 // =0x2
678-
; CHECK-CPA-O0-NEXT: // kill: def $x11 killed $w11
679-
; CHECK-CPA-O0-NEXT: addpt x8, x8, x11
680-
; CHECK-CPA-O0-NEXT: addpt x8, x8, x10, lsl #1
665+
; CHECK-CPA-O0-NEXT: addpt x8, x8, x10
681666
; CHECK-CPA-O0-NEXT: add w9, w9, #1
682667
; CHECK-CPA-O0-NEXT: mov w9, w9
683668
; CHECK-CPA-O0-NEXT: // kill: def $x9 killed $w9
@@ -697,13 +682,13 @@ define hidden void @multidim() {
697682
; CHECK-CPA-O3-LABEL: multidim:
698683
; CHECK-CPA-O3: // %bb.0: // %entry
699684
; CHECK-CPA-O3-NEXT: adrp x8, b
700-
; CHECK-CPA-O3-NEXT: mov w9, #2 // =0x2
701685
; CHECK-CPA-O3-NEXT: adrp x10, a
702686
; CHECK-CPA-O3-NEXT: add x10, x10, :lo12:a
703687
; CHECK-CPA-O3-NEXT: ldrh w8, [x8, :lo12:b]
704-
; CHECK-CPA-O3-NEXT: addpt x9, x10, x9
705-
; CHECK-CPA-O3-NEXT: addpt x9, x9, x8, lsl #1
688+
; CHECK-CPA-O3-NEXT: lsl x9, x8, #1
706689
; CHECK-CPA-O3-NEXT: add x8, x8, #1
690+
; CHECK-CPA-O3-NEXT: add x9, x9, #2
691+
; CHECK-CPA-O3-NEXT: addpt x9, x10, x9
707692
; CHECK-CPA-O3-NEXT: addpt x8, x9, x8
708693
; CHECK-CPA-O3-NEXT: ldrb w8, [x8]
709694
; CHECK-CPA-O3-NEXT: cbz w8, .LBB14_2

0 commit comments

Comments
 (0)