Skip to content

Commit 45e1a22

Browse files
committed
GlobalISel: Make known bits/alignment API more consistent
Just computing the alignment makes sense without caring about the general known bits, such as for non-integral pointers. Separate the two and start calling into the TargetLowering hooks for frame indexes. Start calling the TargetLowering implementation for FrameIndexes, which improves the AMDGPU matching for stack addressing modes. Also introduce a new hook for returning known alignment of target instructions. For AMDGPU, it would be useful to report the known alignment implied by certain intrinsic calls. Also stop using MaybeAlign.
1 parent 6c570f7 commit 45e1a22

File tree

6 files changed

+41
-51
lines changed

6 files changed

+41
-51
lines changed

llvm/include/llvm/CodeGen/GlobalISel/GISelKnownBits.h

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -69,18 +69,14 @@ class GISelKnownBits : public GISelChangeObserver {
6969
/// predicate to simplify operations downstream.
7070
bool signBitIsZero(Register Op);
7171

72-
// FIXME: Is this the right place for G_FRAME_INDEX? Should it be in
73-
// TargetLowering?
74-
void computeKnownBitsForFrameIndex(Register R, KnownBits &Known,
75-
const APInt &DemandedElts,
76-
unsigned Depth = 0);
77-
static Align inferAlignmentForFrameIdx(int FrameIdx, int Offset,
78-
const MachineFunction &MF);
7972
static void computeKnownBitsForAlignment(KnownBits &Known,
80-
MaybeAlign Alignment);
73+
Align Alignment) {
74+
// The low bits are known zero if the pointer is aligned.
75+
Known.Zero.setLowBits(Log2(Alignment));
76+
}
8177

82-
// Try to infer alignment for MI.
83-
static MaybeAlign inferPtrAlignment(const MachineInstr &MI);
78+
/// \return The known alignment for the pointer-like value \p R.
79+
Align computeKnownAlignment(Register R, unsigned Depth = 0);
8480

8581
// Observer API. No-op for non-caching implementation.
8682
void erasingInstr(MachineInstr &MI) override{};

llvm/include/llvm/CodeGen/TargetLowering.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3348,6 +3348,15 @@ class TargetLowering : public TargetLoweringBase {
33483348
const MachineRegisterInfo &MRI,
33493349
unsigned Depth = 0) const;
33503350

3351+
/// Determine the known alignment for the pointer value \p R. This is can
3352+
/// typically be inferred from the number of low known 0 bits. However, for a
3353+
/// pointer with a non-integral address space, the alignment value may be
3354+
/// independent from the known low bits.
3355+
virtual Align computeKnownAlignForTargetInstr(GISelKnownBits &Analysis,
3356+
Register R,
3357+
const MachineRegisterInfo &MRI,
3358+
unsigned Depth = 0) const;
3359+
33513360
/// Determine which of the bits of FrameIndex \p FIOp are known to be 0.
33523361
/// Default implementation computes low bits based on alignment
33533362
/// information. This should preserve known bits passed into it.

llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp

Lines changed: 15 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -31,36 +31,23 @@ GISelKnownBits::GISelKnownBits(MachineFunction &MF, unsigned MaxDepth)
3131
: MF(MF), MRI(MF.getRegInfo()), TL(*MF.getSubtarget().getTargetLowering()),
3232
DL(MF.getFunction().getParent()->getDataLayout()), MaxDepth(MaxDepth) {}
3333

34-
Align GISelKnownBits::inferAlignmentForFrameIdx(int FrameIdx, int Offset,
35-
const MachineFunction &MF) {
36-
const MachineFrameInfo &MFI = MF.getFrameInfo();
37-
return commonAlignment(MFI.getObjectAlign(FrameIdx), Offset);
38-
// TODO: How to handle cases with Base + Offset?
39-
}
40-
41-
MaybeAlign GISelKnownBits::inferPtrAlignment(const MachineInstr &MI) {
42-
if (MI.getOpcode() == TargetOpcode::G_FRAME_INDEX) {
43-
int FrameIdx = MI.getOperand(1).getIndex();
44-
return inferAlignmentForFrameIdx(FrameIdx, 0, *MI.getMF());
34+
Align GISelKnownBits::computeKnownAlignment(Register R, unsigned Depth) {
35+
const MachineInstr *MI = MRI.getVRegDef(R);
36+
switch (MI->getOpcode()) {
37+
case TargetOpcode::G_FRAME_INDEX: {
38+
int FrameIdx = MI->getOperand(1).getIndex();
39+
return MF.getFrameInfo().getObjectAlign(FrameIdx);
40+
}
41+
case TargetOpcode::G_INTRINSIC:
42+
case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
43+
default:
44+
return TL.computeKnownAlignForTargetInstr(*this, R, MRI, Depth + 1);
4545
}
46-
return None;
47-
}
48-
49-
void GISelKnownBits::computeKnownBitsForFrameIndex(Register R, KnownBits &Known,
50-
const APInt &DemandedElts,
51-
unsigned Depth) {
52-
const MachineInstr &MI = *MRI.getVRegDef(R);
53-
computeKnownBitsForAlignment(Known, inferPtrAlignment(MI));
54-
}
55-
56-
void GISelKnownBits::computeKnownBitsForAlignment(KnownBits &Known,
57-
MaybeAlign Alignment) {
58-
if (Alignment)
59-
// The low bits are known zero if the pointer is aligned.
60-
Known.Zero.setLowBits(Log2(*Alignment));
6146
}
6247

6348
KnownBits GISelKnownBits::getKnownBits(MachineInstr &MI) {
49+
assert(MI.getNumExplicitDefs() == 1 &&
50+
"expected single return generic instruction");
6451
return getKnownBits(MI.getOperand(0).getReg());
6552
}
6653

@@ -215,7 +202,8 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
215202
break;
216203
}
217204
case TargetOpcode::G_FRAME_INDEX: {
218-
computeKnownBitsForFrameIndex(R, Known, DemandedElts);
205+
int FrameIdx = MI.getOperand(1).getIndex();
206+
TL.computeKnownBitsForFrameIndex(FrameIdx, Known, MF);
219207
break;
220208
}
221209
case TargetOpcode::G_SUB: {

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2832,6 +2832,12 @@ void TargetLowering::computeKnownBitsForFrameIndex(
28322832
Known.Zero.setLowBits(Log2(MF.getFrameInfo().getObjectAlign(FrameIdx)));
28332833
}
28342834

2835+
Align TargetLowering::computeKnownAlignForTargetInstr(
2836+
GISelKnownBits &Analysis, Register R, const MachineRegisterInfo &MRI,
2837+
unsigned Depth) const {
2838+
return Align(1);
2839+
}
2840+
28352841
/// This method can be implemented by targets that want to expose additional
28362842
/// information about sign bits to the DAG Combiner.
28372843
unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,

llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -793,10 +793,7 @@ body: |
793793
bb.0:
794794
795795
; GFX6-LABEL: name: load_private_s32_from_1_fi_offset_4095
796-
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
797-
; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec
798-
; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, implicit $exec
799-
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
796+
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4095, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
800797
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
801798
; GFX9-LABEL: name: load_private_s32_from_1_fi_offset_4095
802799
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4095, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)

llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-private.mir

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -205,11 +205,8 @@ body: |
205205
bb.0:
206206
207207
; GFX6-LABEL: name: function_store_private_s32_to_1_fi_offset_4095
208-
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
209-
; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec
210-
; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, implicit $exec
211-
; GFX6: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
212-
; GFX6: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_2]], %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
208+
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
209+
; GFX6: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4095, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
213210
; GFX9-LABEL: name: function_store_private_s32_to_1_fi_offset_4095
214211
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
215212
; GFX9: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4095, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
@@ -477,11 +474,8 @@ body: |
477474
478475
; GFX6-LABEL: name: kernel_store_private_s32_to_1_fi_offset_4095
479476
; GFX6: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
480-
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
481-
; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec
482-
; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, implicit $exec
483-
; GFX6: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
484-
; GFX6: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_2]], %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
477+
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
478+
; GFX6: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
485479
; GFX9-LABEL: name: kernel_store_private_s32_to_1_fi_offset_4095
486480
; GFX9: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
487481
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec

0 commit comments

Comments
 (0)