Skip to content

Commit 195a7af

Browse files
author
Jessica Paquette
committed
[AArch64][GlobalISel] Narrow 128-bit regs to 64-bit regs in emitTestBit
When we have a 128-bit register, emitTestBit would incorrectly narrow to 32 bits always. If the bit number was > 32, then we would need a TB(N)ZX. This would cause a crash, as we'd have the wrong register class. (PR48379) This generalizes `narrowExtReg` into `moveScalarRegClass`. This also allows us to remove `widenGPRBankRegIfNeeded` entirely, since `selectCopy` correctly handles SUBREG_TO_REG etc. This does create some codegen changes (since `selectCopy` uses the `all` regclass variants). However, I think that these will likely be optimized away, and we can always improve the `selectCopy` code. It looks like we should revisit `selectCopy` at this point, and possibly refactor it into at least one `emit` function. Differential Revision: https://reviews.llvm.org/D92707
1 parent 2656885 commit 195a7af

File tree

4 files changed

+67
-77
lines changed

4 files changed

+67
-77
lines changed

llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

Lines changed: 26 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -356,14 +356,15 @@ class AArch64InstructionSelector : public InstructionSelector {
356356
getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI,
357357
bool IsLoadStore = false) const;
358358

359-
/// Instructions that accept extend modifiers like UXTW expect the register
360-
/// being extended to be a GPR32. Narrow ExtReg to a 32-bit register using a
361-
/// subregister copy if necessary. Return either ExtReg, or the result of the
362-
/// new copy.
363-
Register narrowExtendRegIfNeeded(Register ExtReg,
364-
MachineIRBuilder &MIB) const;
365-
Register widenGPRBankRegIfNeeded(Register Reg, unsigned Size,
366-
MachineIRBuilder &MIB) const;
359+
/// Move \p Reg to \p RC if \p Reg is not already on \p RC.
360+
///
361+
/// \returns Either \p Reg if no change was necessary, or the new register
362+
/// created by moving \p Reg.
363+
///
364+
/// Note: This uses emitCopy right now.
365+
Register moveScalarRegClass(Register Reg, const TargetRegisterClass &RC,
366+
MachineIRBuilder &MIB) const;
367+
367368
ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
368369

369370
void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
@@ -1353,10 +1354,10 @@ MachineInstr *AArch64InstructionSelector::emitTestBit(
13531354
// TBNZW work.
13541355
bool UseWReg = Bit < 32;
13551356
unsigned NecessarySize = UseWReg ? 32 : 64;
1356-
if (Size < NecessarySize)
1357-
TestReg = widenGPRBankRegIfNeeded(TestReg, NecessarySize, MIB);
1358-
else if (Size > NecessarySize)
1359-
TestReg = narrowExtendRegIfNeeded(TestReg, MIB);
1357+
if (Size != NecessarySize)
1358+
TestReg = moveScalarRegClass(
1359+
TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass,
1360+
MIB);
13601361

13611362
static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
13621363
{AArch64::TBZW, AArch64::TBNZW}};
@@ -5152,7 +5153,7 @@ AArch64InstructionSelector::selectExtendedSHL(
51525153

51535154
// Need a 32-bit wide register here.
51545155
MachineIRBuilder MIB(*MRI.getVRegDef(Root.getReg()));
5155-
OffsetReg = narrowExtendRegIfNeeded(OffsetReg, MIB);
5156+
OffsetReg = moveScalarRegClass(OffsetReg, AArch64::GPR32RegClass, MIB);
51565157
}
51575158

51585159
// We can use the LHS of the GEP as the base, and the LHS of the shift as an
@@ -5372,8 +5373,8 @@ AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root,
53725373

53735374
// Need a 32-bit wide register.
53745375
MachineIRBuilder MIB(*PtrAdd);
5375-
Register ExtReg =
5376-
narrowExtendRegIfNeeded(OffsetInst->getOperand(1).getReg(), MIB);
5376+
Register ExtReg = moveScalarRegClass(OffsetInst->getOperand(1).getReg(),
5377+
AArch64::GPR32RegClass, MIB);
53775378
unsigned SignExtend = Ext == AArch64_AM::SXTW;
53785379

53795380
// Base is LHS, offset is ExtReg.
@@ -5647,67 +5648,21 @@ AArch64_AM::ShiftExtendType AArch64InstructionSelector::getExtendTypeForInst(
56475648
}
56485649
}
56495650

5650-
Register AArch64InstructionSelector::narrowExtendRegIfNeeded(
5651-
Register ExtReg, MachineIRBuilder &MIB) const {
5651+
Register AArch64InstructionSelector::moveScalarRegClass(
5652+
Register Reg, const TargetRegisterClass &RC, MachineIRBuilder &MIB) const {
56525653
MachineRegisterInfo &MRI = *MIB.getMRI();
5653-
if (MRI.getType(ExtReg).getSizeInBits() == 32)
5654-
return ExtReg;
5655-
5656-
// Insert a copy to move ExtReg to GPR32.
5657-
Register NarrowReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
5658-
auto Copy = MIB.buildCopy({NarrowReg}, {ExtReg});
5654+
auto Ty = MRI.getType(Reg);
5655+
assert(!Ty.isVector() && "Expected scalars only!");
5656+
if (Ty.getSizeInBits() == TRI.getRegSizeInBits(RC))
5657+
return Reg;
56595658

5660-
// Select the copy into a subregister copy.
5659+
// Create a copy and immediately select it.
5660+
// FIXME: We should have an emitCopy function?
5661+
auto Copy = MIB.buildCopy({&RC}, {Reg});
56615662
selectCopy(*Copy, TII, MRI, TRI, RBI);
56625663
return Copy.getReg(0);
56635664
}
56645665

5665-
Register AArch64InstructionSelector::widenGPRBankRegIfNeeded(
5666-
Register Reg, unsigned WideSize, MachineIRBuilder &MIB) const {
5667-
assert(WideSize >= 8 && "WideSize is smaller than all possible registers?");
5668-
MachineRegisterInfo &MRI = *MIB.getMRI();
5669-
unsigned NarrowSize = MRI.getType(Reg).getSizeInBits();
5670-
assert(WideSize >= NarrowSize &&
5671-
"WideSize cannot be smaller than NarrowSize!");
5672-
5673-
// If the sizes match, just return the register.
5674-
//
5675-
// If NarrowSize is an s1, then we can select it to any size, so we'll treat
5676-
// it as a don't care.
5677-
if (NarrowSize == WideSize || NarrowSize == 1)
5678-
return Reg;
5679-
5680-
// Now check the register classes.
5681-
const RegisterBank *RB = RBI.getRegBank(Reg, MRI, TRI);
5682-
const TargetRegisterClass *OrigRC = getMinClassForRegBank(*RB, NarrowSize);
5683-
const TargetRegisterClass *WideRC = getMinClassForRegBank(*RB, WideSize);
5684-
assert(OrigRC && "Could not determine narrow RC?");
5685-
assert(WideRC && "Could not determine wide RC?");
5686-
5687-
// If the sizes differ, but the register classes are the same, there is no
5688-
// need to insert a SUBREG_TO_REG.
5689-
//
5690-
// For example, an s8 that's supposed to be a GPR will be selected to either
5691-
// a GPR32 or a GPR64 register. Note that this assumes that the s8 will
5692-
// always end up on a GPR32.
5693-
if (OrigRC == WideRC)
5694-
return Reg;
5695-
5696-
// We have two different register classes. Insert a SUBREG_TO_REG.
5697-
unsigned SubReg = 0;
5698-
getSubRegForClass(OrigRC, TRI, SubReg);
5699-
assert(SubReg && "Couldn't determine subregister?");
5700-
5701-
// Build the SUBREG_TO_REG and return the new, widened register.
5702-
auto SubRegToReg =
5703-
MIB.buildInstr(AArch64::SUBREG_TO_REG, {WideRC}, {})
5704-
.addImm(0)
5705-
.addUse(Reg)
5706-
.addImm(SubReg);
5707-
constrainSelectedInstRegOperands(*SubRegToReg, TII, TRI, RBI);
5708-
return SubRegToReg.getReg(0);
5709-
}
5710-
57115666
/// Select an "extended register" operand. This operand folds in an extend
57125667
/// followed by an optional left shift.
57135668
InstructionSelector::ComplexRendererFns
@@ -5768,7 +5723,7 @@ AArch64InstructionSelector::selectArithExtendedRegister(
57685723
// We require a GPR32 here. Narrow the ExtReg if needed using a subregister
57695724
// copy.
57705725
MachineIRBuilder MIB(*RootDef);
5771-
ExtReg = narrowExtendRegIfNeeded(ExtReg, MIB);
5726+
ExtReg = moveScalarRegClass(ExtReg, AArch64::GPR32RegClass, MIB);
57725727

57735728
return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
57745729
[=](MachineInstrBuilder &MIB) {

llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-ext-tbz-tbnz.mir

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,8 +78,9 @@ body: |
7878
; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
7979
; CHECK: liveins: $h0
8080
; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:fpr32 = SUBREG_TO_REG 0, $h0, %subreg.hsub
81-
; CHECK: %copy:gpr32 = COPY [[SUBREG_TO_REG]]
82-
; CHECK: TBNZW %copy, 3, %bb.1
81+
; CHECK: %copy:gpr32all = COPY [[SUBREG_TO_REG]]
82+
; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %copy
83+
; CHECK: TBNZW [[COPY]], 3, %bb.1
8384
; CHECK: B %bb.0
8485
; CHECK: bb.1:
8586
; CHECK: RET_ReallyLR

llvm/test/CodeGen/AArch64/GlobalISel/subreg-copy.mir

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,3 +34,35 @@ body: |
3434
bb.1:
3535
RET_ReallyLR
3636
...
37+
---
38+
name: no_trunc
39+
alignment: 4
40+
legalized: true
41+
regBankSelected: true
42+
tracksRegLiveness: true
43+
body: |
44+
; CHECK-LABEL: name: no_trunc
45+
; CHECK: bb.0:
46+
; CHECK: successors: %bb.1(0x80000000)
47+
; CHECK: liveins: $x0
48+
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
49+
; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY]], 0 :: (load 16)
50+
; CHECK: [[COPY1:%[0-9]+]]:gpr64all = COPY [[LDRQui]].dsub
51+
; CHECK: [[COPY2:%[0-9]+]]:gpr64 = COPY [[COPY1]]
52+
; CHECK: TBNZX [[COPY2]], 33, %bb.1
53+
; CHECK: bb.1:
54+
; CHECK: RET_ReallyLR
55+
bb.0:
56+
liveins: $x0
57+
%1:gpr(p0) = COPY $x0
58+
%3:gpr(s64) = G_CONSTANT i64 8589934592
59+
%5:gpr(s64) = G_CONSTANT i64 0
60+
%0:fpr(s128) = G_LOAD %1:gpr(p0) :: (load 16)
61+
%2:fpr(s64) = G_TRUNC %0:fpr(s128)
62+
%8:gpr(s64) = COPY %2:fpr(s64)
63+
%4:gpr(s64) = G_AND %8:gpr, %3:gpr
64+
%7:gpr(s32) = G_ICMP intpred(ne), %4:gpr(s64), %5:gpr
65+
%6:gpr(s1) = G_TRUNC %7:gpr(s32)
66+
G_BRCOND %6:gpr(s1), %bb.1
67+
bb.1:
68+
RET_ReallyLR

llvm/test/CodeGen/AArch64/GlobalISel/widen-narrow-tbz-tbnz.mir

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -106,8 +106,9 @@ body: |
106106
; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
107107
; CHECK: liveins: $w0
108108
; CHECK: %reg:gpr32all = COPY $w0
109-
; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, %reg, %subreg.sub_32
110-
; CHECK: TBZX [[SUBREG_TO_REG]], 33, %bb.1
109+
; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64all = SUBREG_TO_REG 0, %reg, %subreg.sub_32
110+
; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY [[SUBREG_TO_REG]]
111+
; CHECK: TBZX [[COPY]], 33, %bb.1
111112
; CHECK: B %bb.0
112113
; CHECK: bb.1:
113114
; CHECK: RET_ReallyLR
@@ -140,8 +141,9 @@ body: |
140141
; CHECK: bb.0:
141142
; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
142143
; CHECK: %reg:gpr32 = IMPLICIT_DEF
143-
; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, %reg, %subreg.sub_32
144-
; CHECK: TBZX [[SUBREG_TO_REG]], 33, %bb.1
144+
; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64all = SUBREG_TO_REG 0, %reg, %subreg.sub_32
145+
; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY [[SUBREG_TO_REG]]
146+
; CHECK: TBZX [[COPY]], 33, %bb.1
145147
; CHECK: B %bb.0
146148
; CHECK: bb.1:
147149
; CHECK: RET_ReallyLR

0 commit comments

Comments
 (0)