Skip to content

Commit 4506bf1

Browse files
committed
Address comments
1 parent ae4c9f4 commit 4506bf1

23 files changed

+8074
-8118
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp

Lines changed: 10 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -4590,7 +4590,7 @@ void DAGTypeLegalizer::ExpandIntRes_ShiftThroughStack(SDNode *N, SDValue &Lo,
45904590
LoadVT = TLI.getTypeToTransformTo(*DAG.getContext(), LoadVT);
45914591
} while (!TLI.isTypeLegal(LoadVT));
45924592

4593-
const unsigned ShiftUnitInBits = LoadVT.getStoreSize() * 8;
4593+
const unsigned ShiftUnitInBits = LoadVT.getStoreSizeInBits();
45944594
assert(ShiftUnitInBits <= VT.getScalarSizeInBits());
45954595
assert(isPowerOf2_32(ShiftUnitInBits) &&
45964596
"Shifting unit is not a a power of two!");
@@ -4616,8 +4616,6 @@ void DAGTypeLegalizer::ExpandIntRes_ShiftThroughStack(SDNode *N, SDValue &Lo,
46164616
// Get a temporary stack slot 2x the width of our VT.
46174617
// FIXME: reuse stack slots?
46184618
Align StackAlign = DAG.getReducedAlign(StackSlotVT, /*UseABI=*/false);
4619-
assert(DAG.getReducedAlign(LoadVT, /*UseABI=*/false) <= StackAlign);
4620-
46214619
SDValue StackPtr =
46224620
DAG.CreateStackTemporary(StackSlotVT.getStoreSize(), StackAlign);
46234621
EVT PtrTy = StackPtr.getValueType();
@@ -4639,28 +4637,26 @@ void DAGTypeLegalizer::ExpandIntRes_ShiftThroughStack(SDNode *N, SDValue &Lo,
46394637
Init = DAG.getNode(ISD::BUILD_PAIR, dl, StackSlotVT, AllZeros, Shiftee);
46404638
}
46414639
// And spill it into the stack slot.
4642-
Ch = DAG.getStore(Ch, dl, Init, StackPtr, StackPtrInfo);
4640+
Ch = DAG.getStore(Ch, dl, Init, StackPtr, StackPtrInfo, StackAlign);
46434641

46444642
// Now, compute the full-byte offset into stack slot from where we can load.
46454643
// We have shift amount, which is in bits. Offset should point to an aligned
46464644
// address.
46474645
SDNodeFlags Flags;
4648-
4649-
if (IsOneStepShift)
4650-
Flags.setExact(true);
4646+
Flags.setExact(IsOneStepShift);
46514647
SDValue SrlTmp = DAG.getNode(
46524648
ISD::SRL, dl, ShAmtVT, ShAmt,
46534649
DAG.getConstant(Log2_32(ShiftUnitInBits), dl, ShAmtVT), Flags);
4654-
SDValue OffsetInBits =
4650+
SDValue BitOffset =
46554651
DAG.getNode(ISD::SHL, dl, ShAmtVT, SrlTmp,
46564652
DAG.getConstant(Log2_32(ShiftUnitInBits), dl, ShAmtVT));
46574653

46584654
Flags.setExact(true);
4659-
SDValue Offset = DAG.getNode(ISD::SRL, dl, ShAmtVT, OffsetInBits,
4655+
SDValue ByteOffset = DAG.getNode(ISD::SRL, dl, ShAmtVT, BitOffset,
46604656
DAG.getConstant(3, dl, ShAmtVT), Flags);
46614657
// And clamp it, because OOB load is an immediate UB,
46624658
// while shift overflow would have *just* been poison.
4663-
Offset = DAG.getNode(ISD::AND, dl, ShAmtVT, Offset,
4659+
ByteOffset = DAG.getNode(ISD::AND, dl, ShAmtVT, ByteOffset,
46644660
DAG.getConstant(VTByteWidth - 1, dl, ShAmtVT));
46654661
// We have exactly two strategies on indexing into stack slot here:
46664662
// 1. upwards starting from the beginning of the slot
@@ -4677,18 +4673,18 @@ void DAGTypeLegalizer::ExpandIntRes_ShiftThroughStack(SDNode *N, SDValue &Lo,
46774673
} else {
46784674
AdjStackPtr = DAG.getMemBasePlusOffset(
46794675
StackPtr, DAG.getConstant(VTByteWidth, dl, PtrTy), dl);
4680-
Offset = DAG.getNegative(Offset, dl, ShAmtVT);
4676+
ByteOffset = DAG.getNegative(ByteOffset, dl, ShAmtVT);
46814677
}
46824678

46834679
// Get the pointer somewhere into the stack slot from which we need to load.
4684-
Offset = DAG.getSExtOrTrunc(Offset, dl, PtrTy);
4685-
AdjStackPtr = DAG.getMemBasePlusOffset(AdjStackPtr, Offset, dl);
4680+
ByteOffset = DAG.getSExtOrTrunc(ByteOffset, dl, PtrTy);
4681+
AdjStackPtr = DAG.getMemBasePlusOffset(AdjStackPtr, ByteOffset, dl);
46864682

46874683
// And load it! While the load is not legal, legalizing it is obvious.
46884684
SDValue Res =
46894685
DAG.getLoad(VT, dl, Ch, AdjStackPtr,
46904686
MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()),
4691-
DAG.getReducedAlign(LoadVT, /*UseABI=*/false));
4687+
commonAlignment(StackAlign, LoadVT.getStoreSize()));
46924688

46934689
// If we may still have a remaining bits to shift by, do so now.
46944690
if (!IsOneStepShift) {

llvm/test/CodeGen/AArch64/wide-scalar-shift-by-byte-multiple-legalization.ll

Lines changed: 36 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -179,20 +179,21 @@ define void @ashr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
179179
define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
180180
; ALL-LABEL: lshr_32bytes:
181181
; ALL: // %bb.0:
182-
; ALL-NEXT: ldr q0, [x0]
183-
; ALL-NEXT: ldp x8, x9, [x0, #16]
182+
; ALL-NEXT: sub sp, sp, #64
183+
; ALL-NEXT: ldp x9, x8, [x0, #16]
184+
; ALL-NEXT: movi v0.2d, #0000000000000000
184185
; ALL-NEXT: ldr x10, [x1]
185-
; ALL-NEXT: movi v1.2d, #0000000000000000
186-
; ALL-NEXT: str q0, [sp, #-64]!
187-
; ALL-NEXT: stp x8, x9, [sp, #16]
186+
; ALL-NEXT: ldr q1, [x0]
187+
; ALL-NEXT: stp x9, x8, [sp, #16]
188188
; ALL-NEXT: mov x8, sp
189189
; ALL-NEXT: and x9, x10, #0x18
190-
; ALL-NEXT: stp q1, q1, [sp, #32]
190+
; ALL-NEXT: str q1, [sp]
191191
; ALL-NEXT: add x8, x8, x9
192192
; ALL-NEXT: lsl x9, x10, #3
193+
; ALL-NEXT: stp q0, q0, [sp, #32]
193194
; ALL-NEXT: ldp x11, x10, [x8, #16]
194-
; ALL-NEXT: ldp x8, x12, [x8]
195195
; ALL-NEXT: mvn w13, w9
196+
; ALL-NEXT: ldp x8, x12, [x8]
196197
; ALL-NEXT: and x9, x9, #0x38
197198
; ALL-NEXT: lsl x14, x10, #1
198199
; ALL-NEXT: lsl x15, x11, #1
@@ -251,22 +252,22 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
251252
; ALL-LABEL: shl_32bytes:
252253
; ALL: // %bb.0:
253254
; ALL-NEXT: sub sp, sp, #64
255+
; ALL-NEXT: ldp x9, x8, [x0, #16]
254256
; ALL-NEXT: movi v0.2d, #0000000000000000
255-
; ALL-NEXT: ldp x8, x9, [x0, #16]
256257
; ALL-NEXT: ldr x10, [x1]
257258
; ALL-NEXT: ldr q1, [x0]
258-
; ALL-NEXT: mov x11, sp
259-
; ALL-NEXT: add x11, x11, #32
260-
; ALL-NEXT: and x12, x10, #0x18
261-
; ALL-NEXT: stp x8, x9, [sp, #48]
259+
; ALL-NEXT: stp x9, x8, [sp, #48]
260+
; ALL-NEXT: mov x8, sp
261+
; ALL-NEXT: and x9, x10, #0x18
262+
; ALL-NEXT: add x8, x8, #32
263+
; ALL-NEXT: stp q0, q0, [sp]
264+
; ALL-NEXT: str q1, [sp, #32]
265+
; ALL-NEXT: sub x8, x8, x9
262266
; ALL-NEXT: lsl x9, x10, #3
263-
; ALL-NEXT: stp q0, q1, [sp, #16]
264-
; ALL-NEXT: sub x8, x11, x12
265-
; ALL-NEXT: str q0, [sp]
266-
; ALL-NEXT: mvn w13, w9
267-
; ALL-NEXT: and x9, x9, #0x38
268267
; ALL-NEXT: ldp x10, x11, [x8]
269268
; ALL-NEXT: ldp x12, x8, [x8, #16]
269+
; ALL-NEXT: mvn w13, w9
270+
; ALL-NEXT: and x9, x9, #0x38
270271
; ALL-NEXT: lsr x14, x10, #1
271272
; ALL-NEXT: lsr x15, x11, #1
272273
; ALL-NEXT: lsl x11, x11, x9
@@ -324,30 +325,31 @@ define void @shl_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) nou
324325
define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
325326
; ALL-LABEL: ashr_32bytes:
326327
; ALL: // %bb.0:
327-
; ALL-NEXT: ldr q0, [x0]
328+
; ALL-NEXT: sub sp, sp, #64
328329
; ALL-NEXT: ldp x9, x8, [x0, #16]
329330
; ALL-NEXT: ldr x10, [x1]
330-
; ALL-NEXT: str q0, [sp, #-64]!
331-
; ALL-NEXT: asr x11, x8, #63
331+
; ALL-NEXT: ldr q0, [x0]
332+
; ALL-NEXT: and x11, x10, #0x18
332333
; ALL-NEXT: stp x9, x8, [sp, #16]
333-
; ALL-NEXT: mov x8, sp
334-
; ALL-NEXT: and x9, x10, #0x18
335-
; ALL-NEXT: stp x11, x11, [sp, #48]
336-
; ALL-NEXT: add x8, x8, x9
337-
; ALL-NEXT: lsl x9, x10, #3
338-
; ALL-NEXT: stp x11, x11, [sp, #32]
339-
; ALL-NEXT: ldp x11, x10, [x8, #16]
340-
; ALL-NEXT: mvn w13, w9
341-
; ALL-NEXT: ldp x8, x12, [x8]
342-
; ALL-NEXT: and x9, x9, #0x38
334+
; ALL-NEXT: asr x8, x8, #63
335+
; ALL-NEXT: mov x9, sp
336+
; ALL-NEXT: str q0, [sp]
337+
; ALL-NEXT: add x9, x9, x11
338+
; ALL-NEXT: stp x8, x8, [sp, #48]
339+
; ALL-NEXT: stp x8, x8, [sp, #32]
340+
; ALL-NEXT: lsl x8, x10, #3
341+
; ALL-NEXT: ldp x11, x10, [x9, #16]
342+
; ALL-NEXT: ldp x9, x12, [x9]
343+
; ALL-NEXT: mvn w13, w8
344+
; ALL-NEXT: and x8, x8, #0x38
343345
; ALL-NEXT: lsl x14, x10, #1
344346
; ALL-NEXT: lsl x15, x11, #1
345-
; ALL-NEXT: lsr x11, x11, x9
347+
; ALL-NEXT: lsr x11, x11, x8
346348
; ALL-NEXT: lsl x16, x12, #1
347-
; ALL-NEXT: asr x10, x10, x9
348-
; ALL-NEXT: lsr x12, x12, x9
349+
; ALL-NEXT: asr x10, x10, x8
350+
; ALL-NEXT: lsr x12, x12, x8
349351
; ALL-NEXT: lsl x14, x14, x13
350-
; ALL-NEXT: lsr x8, x8, x9
352+
; ALL-NEXT: lsr x8, x9, x8
351353
; ALL-NEXT: lsl x9, x16, x13
352354
; ALL-NEXT: lsl x13, x15, x13
353355
; ALL-NEXT: orr x11, x14, x11

llvm/test/CodeGen/AArch64/wide-scalar-shift-legalization.ll

Lines changed: 60 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -154,38 +154,39 @@ define void @ashr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
154154
define void @lshr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
155155
; ALL-LABEL: lshr_32bytes:
156156
; ALL: // %bb.0:
157-
; ALL-NEXT: ldr q0, [x0]
158-
; ALL-NEXT: ldp x8, x10, [x0, #16]
159-
; ALL-NEXT: ldr x9, [x1]
160-
; ALL-NEXT: movi v1.2d, #0000000000000000
161-
; ALL-NEXT: str q0, [sp, #-64]!
162-
; ALL-NEXT: lsr x11, x9, #3
163-
; ALL-NEXT: stp x8, x10, [sp, #16]
164-
; ALL-NEXT: mov x8, sp
165-
; ALL-NEXT: stp q1, q1, [sp, #32]
166-
; ALL-NEXT: and x12, x9, #0x3f
167-
; ALL-NEXT: and x10, x11, #0x18
157+
; ALL-NEXT: sub sp, sp, #64
158+
; ALL-NEXT: ldp x9, x8, [x0, #16]
159+
; ALL-NEXT: movi v0.2d, #0000000000000000
160+
; ALL-NEXT: ldr x10, [x1]
161+
; ALL-NEXT: ldr q1, [x0]
162+
; ALL-NEXT: stp x9, x8, [sp, #16]
163+
; ALL-NEXT: lsr x8, x10, #3
164+
; ALL-NEXT: mov x9, sp
165+
; ALL-NEXT: str q1, [sp]
166+
; ALL-NEXT: and x12, x10, #0x3f
167+
; ALL-NEXT: and x8, x8, #0x18
168+
; ALL-NEXT: stp q0, q0, [sp, #32]
168169
; ALL-NEXT: eor x12, x12, #0x3f
169-
; ALL-NEXT: add x8, x8, x10
170+
; ALL-NEXT: add x8, x9, x8
170171
; ALL-NEXT: ldp x13, x11, [x8]
171-
; ALL-NEXT: ldr x10, [x8, #24]
172+
; ALL-NEXT: ldr x9, [x8, #24]
172173
; ALL-NEXT: ldr x8, [x8, #16]
173-
; ALL-NEXT: lsl x14, x10, #1
174-
; ALL-NEXT: lsr x10, x10, x9
174+
; ALL-NEXT: lsl x14, x9, #1
175+
; ALL-NEXT: lsr x9, x9, x10
175176
; ALL-NEXT: lsl x15, x11, #1
176-
; ALL-NEXT: lsr x11, x11, x9
177-
; ALL-NEXT: lsr x13, x13, x9
177+
; ALL-NEXT: lsr x11, x11, x10
178+
; ALL-NEXT: lsr x13, x13, x10
178179
; ALL-NEXT: lsl x14, x14, x12
179180
; ALL-NEXT: lsl x12, x15, x12
180181
; ALL-NEXT: lsl x15, x8, #1
181-
; ALL-NEXT: lsr x8, x8, x9
182-
; ALL-NEXT: mvn w9, w9
183-
; ALL-NEXT: lsl x9, x15, x9
182+
; ALL-NEXT: lsr x8, x8, x10
183+
; ALL-NEXT: mvn w10, w10
184+
; ALL-NEXT: lsl x10, x15, x10
184185
; ALL-NEXT: orr x8, x14, x8
185-
; ALL-NEXT: stp x8, x10, [x2, #16]
186-
; ALL-NEXT: orr x10, x12, x13
187-
; ALL-NEXT: orr x8, x11, x9
188-
; ALL-NEXT: stp x10, x8, [x2]
186+
; ALL-NEXT: stp x8, x9, [x2, #16]
187+
; ALL-NEXT: orr x9, x12, x13
188+
; ALL-NEXT: orr x8, x11, x10
189+
; ALL-NEXT: stp x9, x8, [x2]
189190
; ALL-NEXT: add sp, sp, #64
190191
; ALL-NEXT: ret
191192
%src = load i256, ptr %src.ptr, align 1
@@ -198,39 +199,39 @@ define void @shl_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
198199
; ALL-LABEL: shl_32bytes:
199200
; ALL: // %bb.0:
200201
; ALL-NEXT: sub sp, sp, #64
201-
; ALL-NEXT: ldp x10, x8, [x0, #16]
202-
; ALL-NEXT: movi v1.2d, #0000000000000000
203-
; ALL-NEXT: ldr x9, [x1]
204-
; ALL-NEXT: ldr q0, [x0]
205-
; ALL-NEXT: lsr x11, x9, #3
206-
; ALL-NEXT: stp x10, x8, [sp, #48]
207-
; ALL-NEXT: mov x8, sp
208-
; ALL-NEXT: add x8, x8, #32
209-
; ALL-NEXT: stp q1, q0, [sp, #16]
210-
; ALL-NEXT: and x12, x9, #0x3f
211-
; ALL-NEXT: and x10, x11, #0x18
212-
; ALL-NEXT: str q1, [sp]
202+
; ALL-NEXT: ldp x9, x8, [x0, #16]
203+
; ALL-NEXT: movi v0.2d, #0000000000000000
204+
; ALL-NEXT: ldr x10, [x1]
205+
; ALL-NEXT: ldr q1, [x0]
206+
; ALL-NEXT: stp x9, x8, [sp, #48]
207+
; ALL-NEXT: lsr x8, x10, #3
208+
; ALL-NEXT: mov x9, sp
209+
; ALL-NEXT: add x9, x9, #32
210+
; ALL-NEXT: stp q0, q1, [sp, #16]
211+
; ALL-NEXT: and x12, x10, #0x3f
212+
; ALL-NEXT: and x8, x8, #0x18
213+
; ALL-NEXT: str q0, [sp]
213214
; ALL-NEXT: eor x12, x12, #0x3f
214-
; ALL-NEXT: sub x8, x8, x10
215+
; ALL-NEXT: sub x8, x9, x8
215216
; ALL-NEXT: ldp x11, x13, [x8, #16]
216-
; ALL-NEXT: ldr x10, [x8]
217+
; ALL-NEXT: ldr x9, [x8]
217218
; ALL-NEXT: ldr x8, [x8, #8]
218-
; ALL-NEXT: lsr x15, x10, #1
219-
; ALL-NEXT: lsl x10, x10, x9
219+
; ALL-NEXT: lsr x15, x9, #1
220+
; ALL-NEXT: lsl x9, x9, x10
220221
; ALL-NEXT: lsr x14, x11, #1
221-
; ALL-NEXT: lsl x11, x11, x9
222-
; ALL-NEXT: lsl x13, x13, x9
222+
; ALL-NEXT: lsl x11, x11, x10
223+
; ALL-NEXT: lsl x13, x13, x10
223224
; ALL-NEXT: lsr x14, x14, x12
224225
; ALL-NEXT: lsr x12, x15, x12
225226
; ALL-NEXT: lsr x15, x8, #1
226-
; ALL-NEXT: lsl x8, x8, x9
227-
; ALL-NEXT: mvn w9, w9
228-
; ALL-NEXT: lsr x9, x15, x9
227+
; ALL-NEXT: lsl x8, x8, x10
228+
; ALL-NEXT: mvn w10, w10
229+
; ALL-NEXT: lsr x10, x15, x10
229230
; ALL-NEXT: orr x8, x8, x12
230-
; ALL-NEXT: stp x10, x8, [x2]
231-
; ALL-NEXT: orr x10, x13, x14
232-
; ALL-NEXT: orr x8, x11, x9
233-
; ALL-NEXT: stp x8, x10, [x2, #16]
231+
; ALL-NEXT: stp x9, x8, [x2]
232+
; ALL-NEXT: orr x9, x13, x14
233+
; ALL-NEXT: orr x8, x11, x10
234+
; ALL-NEXT: stp x8, x9, [x2, #16]
234235
; ALL-NEXT: add sp, sp, #64
235236
; ALL-NEXT: ret
236237
%src = load i256, ptr %src.ptr, align 1
@@ -242,20 +243,21 @@ define void @shl_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
242243
define void @ashr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
243244
; ALL-LABEL: ashr_32bytes:
244245
; ALL: // %bb.0:
245-
; ALL-NEXT: ldr q0, [x0]
246+
; ALL-NEXT: sub sp, sp, #64
246247
; ALL-NEXT: ldp x9, x8, [x0, #16]
248+
; ALL-NEXT: mov x11, sp
247249
; ALL-NEXT: ldr x10, [x1]
248-
; ALL-NEXT: str q0, [sp, #-64]!
249-
; ALL-NEXT: asr x11, x8, #63
250+
; ALL-NEXT: ldr q0, [x0]
250251
; ALL-NEXT: stp x9, x8, [sp, #16]
251-
; ALL-NEXT: lsr x8, x10, #3
252-
; ALL-NEXT: mov x9, sp
252+
; ALL-NEXT: lsr x9, x10, #3
253+
; ALL-NEXT: asr x8, x8, #63
254+
; ALL-NEXT: str q0, [sp]
253255
; ALL-NEXT: and x12, x10, #0x3f
254-
; ALL-NEXT: and x8, x8, #0x18
255-
; ALL-NEXT: stp x11, x11, [sp, #48]
256+
; ALL-NEXT: and x9, x9, #0x18
257+
; ALL-NEXT: stp x8, x8, [sp, #48]
256258
; ALL-NEXT: eor x12, x12, #0x3f
257-
; ALL-NEXT: stp x11, x11, [sp, #32]
258-
; ALL-NEXT: add x8, x9, x8
259+
; ALL-NEXT: stp x8, x8, [sp, #32]
260+
; ALL-NEXT: add x8, x11, x9
259261
; ALL-NEXT: ldp x13, x11, [x8]
260262
; ALL-NEXT: ldr x9, [x8, #24]
261263
; ALL-NEXT: ldr x8, [x8, #16]

0 commit comments

Comments
 (0)